From e8c0b5685fe0e44363b0ebbf9ca6856c72b78d08 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 22 May 2017 13:59:16 +1000 Subject: [PATCH 001/190] fdr_confirm: remove complex confirm --- src/fdr/fdr_confirm.h | 4 +--- src/fdr/fdr_confirm_compile.cpp | 6 +----- src/fdr/fdr_confirm_runtime.h | 11 ----------- 3 files changed, 2 insertions(+), 19 deletions(-) diff --git a/src/fdr/fdr_confirm.h b/src/fdr/fdr_confirm.h index 6ce85afd..7f73ea13 100644 --- a/src/fdr/fdr_confirm.h +++ b/src/fdr/fdr_confirm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -46,7 +46,6 @@ typedef enum LitInfoFlags { NoFlags = 0, Caseless = 1, NoRepeat = 2, - ComplexConfirm = 4 } LitInfoFlags; /** @@ -63,7 +62,6 @@ struct LitInfo { u8 size; u8 flags; /* LitInfoFlags */ u8 next; - u8 extended_size; }; #define FDRC_FLAG_NO_CONFIRM 1 diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index b14ffb42..3594ed4a 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -99,12 +99,8 @@ void fillLitInfo(const vector &lits, vector &tmpLitInfo, if (lit.noruns) { flags |= NoRepeat; } - if (lit.msk.size() > lit.s.size()) { - flags |= ComplexConfirm; - info.extended_size = verify_u8(lit.msk.size()); - } info.flags = flags; - info.size = verify_u8(lit.s.size()); + info.size = verify_u8(max(lit.msk.size(), lit.s.size())); info.groups = lit.groups; // these are built up assuming a LE machine diff --git a/src/fdr/fdr_confirm_runtime.h b/src/fdr/fdr_confirm_runtime.h index a0603c92..ace54543 100644 --- a/src/fdr/fdr_confirm_runtime.h +++ b/src/fdr/fdr_confirm_runtime.h @@ -86,17 +86,6 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a goto out; } - if (unlikely(li->flags & ComplexConfirm)) { - const u8 *loc2 = buf + i - li->extended_size + 1; - if (loc2 < buf) { - u32 full_overhang = buf - loc2; - size_t len_history = a->len_history; - if (full_overhang > len_history) { - goto out; - } - } - } - *last_match = li->id; *control = a->cb(loc - buf, i, li->id, a->ctxt); out: From c36c07156438bafe57d53e87a75f35fda935a357 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 23 May 2017 13:39:24 +1000 Subject: [PATCH 002/190] fdr_confirm: remove dead flags - Caseless was unused - NoFlags is a bit redundant --- src/fdr/fdr_confirm.h | 2 -- src/fdr/fdr_confirm_compile.cpp | 5 +---- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/src/fdr/fdr_confirm.h b/src/fdr/fdr_confirm.h index 7f73ea13..816e9fe6 100644 --- a/src/fdr/fdr_confirm.h +++ b/src/fdr/fdr_confirm.h @@ -43,8 +43,6 @@ u32 mul_hash_64(u64a lv, u64a andmsk, u64a mult, u32 nBits) { #define CONF_HASH_CALL mul_hash_64 typedef enum LitInfoFlags { - NoFlags = 0, - Caseless = 1, NoRepeat = 2, } LitInfoFlags; diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index 3594ed4a..7eacb495 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -92,10 +92,7 @@ void fillLitInfo(const vector &lits, vector &tmpLitInfo, LitInfo &info = tmpLitInfo[i]; memset(&info, 0, sizeof(info)); info.id = lit.id; - u8 flags = NoFlags; - if (lit.nocase) { - flags |= Caseless; - } + u8 flags = 0; if (lit.noruns) { flags |= NoRepeat; } From 9bdd3701635fe55f3c042b88d83e00222e355196 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 23 May 2017 14:28:12 +1000 Subject: [PATCH 003/190] fdr: align major structures to cachelines --- src/fdr/fdr.c | 9 +++-- src/fdr/fdr_compile.cpp | 61 +++++++++++++++++++-------------- src/fdr/fdr_confirm_compile.cpp | 2 +- 3 files changed, 43 insertions(+), 29 deletions(-) diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c index 92e75aaa..aa5f1804 100644 --- a/src/fdr/fdr.c +++ b/src/fdr/fdr.c @@ -725,13 +725,18 @@ static never_inline hwlm_error_t fdr_engine_exec(const struct FDR *fdr, const struct FDR_Runtime_Args *a, hwlm_group_t control) { + assert(ISALIGNED_CL(fdr)); + u32 floodBackoff = FLOOD_BACKOFF_START; u32 last_match_id = INVALID_MATCH_ID; u32 domain_mask_flipped = ~fdr->domainMask; u8 stride = fdr->stride; const u64a *ft = - (const u64a *)((const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR))); - const u32 *confBase = (const u32 *)((const u8 *)ft + fdr->tabSize); + (const u64a *)((const u8 *)fdr + ROUNDUP_CL(sizeof(struct FDR))); + assert(ISALIGNED_CL(ft)); + const u32 *confBase = + (const u32 *)((const u8 *)ft + ROUNDUP_CL(fdr->tabSize)); + assert(ISALIGNED_CL(confBase)); struct zone zones[ZONE_MAX]; assert(fdr->domain > 8 && fdr->domain < 16); diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index c4ea50f2..987379dd 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -144,50 +144,59 @@ void FDRCompiler::createInitialState(FDR *fdr) { } } +/** + * \brief Lay out FDR structures in bytecode. + * + * Note that each major structure (header, table, confirm, flood control) is + * cacheline-aligned. + */ bytecode_ptr FDRCompiler::setupFDR() { - size_t tabSize = eng.getTabSizeBytes(); + size_t tabSize = ROUNDUP_CL(eng.getTabSizeBytes()); - auto floodControlTmp = setupFDRFloodControl(lits, eng, grey); - auto confirmTmp = setupFullConfs(lits, eng, bucketToLits, make_small); + auto floodTable = setupFDRFloodControl(lits, eng, grey); + auto confirmTable = setupFullConfs(lits, eng, bucketToLits, make_small); - assert(ISALIGNED_16(tabSize)); - assert(ISALIGNED_16(confirmTmp.size())); - assert(ISALIGNED_16(floodControlTmp.size())); - size_t headerSize = ROUNDUP_16(sizeof(FDR)); - size_t size = ROUNDUP_16(headerSize + tabSize + confirmTmp.size() + - floodControlTmp.size()); + size_t headerSize = ROUNDUP_CL(sizeof(FDR)); + size_t size = headerSize + tabSize + ROUNDUP_CL(confirmTable.size()) + + floodTable.size(); DEBUG_PRINTF("sizes base=%zu tabSize=%zu confirm=%zu floodControl=%zu " "total=%zu\n", - headerSize, tabSize, confirmTmp.size(), floodControlTmp.size(), + headerSize, tabSize, confirmTable.size(), floodTable.size(), size); auto fdr = make_zeroed_bytecode_ptr(size, 64); assert(fdr); // otherwise would have thrown std::bad_alloc + u8 *fdr_base = (u8 *)fdr.get(); + + // Write header. fdr->size = size; fdr->engineID = eng.getID(); fdr->maxStringLen = verify_u32(maxLen(lits)); - createInitialState(fdr.get()); - - u8 *fdr_base = (u8 *)fdr.get(); - u8 *ptr = fdr_base + ROUNDUP_16(sizeof(FDR)); - copy(tab.begin(), tab.end(), ptr); - ptr += tabSize; - - memcpy(ptr, confirmTmp.get(), confirmTmp.size()); - ptr += confirmTmp.size(); - - fdr->floodOffset = verify_u32(ptr - fdr_base); - memcpy(ptr, floodControlTmp.get(), floodControlTmp.size()); - ptr += floodControlTmp.size(); - - /* we are allowing domains 9 to 15 only */ - assert(eng.bits > 8 && eng.bits < 16); + assert(eng.bits > 8 && eng.bits < 16); // we allow domains 9 to 15 only fdr->domain = eng.bits; fdr->domainMask = (1 << eng.bits) - 1; fdr->tabSize = (1 << eng.bits) * (eng.schemeWidth / 8); fdr->stride = eng.stride; + createInitialState(fdr.get()); + + // Write table. + u8 *ptr = fdr_base + ROUNDUP_CL(sizeof(FDR)); + assert(ISALIGNED_CL(ptr)); + copy(tab.begin(), tab.end(), ptr); + ptr += tabSize; + + // Write confirm structures. + assert(ISALIGNED_CL(ptr)); + memcpy(ptr, confirmTable.get(), confirmTable.size()); + ptr += ROUNDUP_CL(confirmTable.size()); + + // Write flood control structures. + assert(ISALIGNED_CL(ptr)); + fdr->floodOffset = verify_u32(ptr - fdr_base); + memcpy(ptr, floodTable.get(), floodTable.size()); + ptr += floodTable.size(); // last write, no need to round up return fdr; } diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index 7eacb495..fa064bf3 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -367,7 +367,7 @@ setupFullConfs(const vector &lits, u32 totalConfSwitchSize = nBuckets * sizeof(u32); u32 totalSize = ROUNDUP_16(totalConfSwitchSize + totalConfirmSize); - auto buf = make_zeroed_bytecode_ptr(totalSize, 16); + auto buf = make_zeroed_bytecode_ptr(totalSize, 64); assert(buf); // otherwise would have thrown std::bad_alloc u32 *confBase = (u32 *)buf.get(); From 4f32a167d53f7758c2b2a1befb06d7f504f161e0 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 23 May 2017 14:40:04 +1000 Subject: [PATCH 004/190] teddy: align major structures to cachelines --- src/fdr/teddy_avx2.c | 10 +++++----- src/fdr/teddy_compile.cpp | 32 ++++++++++++++++++-------------- src/fdr/teddy_runtime_common.h | 10 +++++----- 3 files changed, 28 insertions(+), 24 deletions(-) diff --git a/src/fdr/teddy_avx2.c b/src/fdr/teddy_avx2.c index 299825cc..38ac3f72 100644 --- a/src/fdr/teddy_avx2.c +++ b/src/fdr/teddy_avx2.c @@ -196,14 +196,14 @@ m256 prep_conf_fat_teddy_m4(const m256 *maskBase, m256 *old_1, m256 *old_2, } static really_inline -const m256 * getMaskBase_avx2(const struct Teddy *teddy) { - return (const m256 *)((const u8 *)teddy + sizeof(struct Teddy)); +const m256 *getMaskBase_avx2(const struct Teddy *teddy) { + return (const m256 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy))); } static really_inline -const u32 * getConfBase_avx2(const struct Teddy *teddy, u8 numMask) { - return (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + - (numMask*32*2)); +const u32 *getConfBase_avx2(const struct Teddy *teddy, u8 numMask) { + return (const u32 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy)) + + ROUNDUP_CL((numMask * 32 * 2))); } hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr, diff --git a/src/fdr/teddy_compile.cpp b/src/fdr/teddy_compile.cpp index 6f956e8c..19e595fb 100644 --- a/src/fdr/teddy_compile.cpp +++ b/src/fdr/teddy_compile.cpp @@ -313,35 +313,39 @@ bytecode_ptr TeddyCompiler::build() { } u32 maskWidth = eng.getNumBuckets() / 8; - size_t maskLen = eng.numMasks * 16 * 2 * maskWidth; + size_t headerSize = ROUNDUP_CL(sizeof(Teddy)); + size_t maskLen = ROUNDUP_CL(eng.numMasks * 16 * 2 * maskWidth); - auto floodControlTmp = setupFDRFloodControl(lits, eng, grey); - auto confirmTmp = setupFullConfs(lits, eng, bucketToLits, make_small); + auto floodTable = setupFDRFloodControl(lits, eng, grey); + auto confirmTable = setupFullConfs(lits, eng, bucketToLits, make_small); - size_t size = ROUNDUP_N(sizeof(Teddy) + - maskLen + - confirmTmp.size() + - floodControlTmp.size(), - 16 * maskWidth); + size_t size = headerSize + maskLen + ROUNDUP_CL(confirmTable.size()) + + floodTable.size(); auto fdr = make_zeroed_bytecode_ptr(size, 64); assert(fdr); // otherwise would have thrown std::bad_alloc Teddy *teddy = (Teddy *)fdr.get(); // ugly u8 *teddy_base = (u8 *)teddy; + // Write header. teddy->size = size; teddy->engineID = eng.getID(); teddy->maxStringLen = verify_u32(maxLen(lits)); - u8 *ptr = teddy_base + sizeof(Teddy) + maskLen; - memcpy(ptr, confirmTmp.get(), confirmTmp.size()); - ptr += confirmTmp.size(); + // Write confirm structures. + u8 *ptr = teddy_base + headerSize + maskLen; + assert(ISALIGNED_CL(ptr)); + memcpy(ptr, confirmTable.get(), confirmTable.size()); + ptr += ROUNDUP_CL(confirmTable.size()); + // Write flood control structures. + assert(ISALIGNED_CL(ptr)); teddy->floodOffset = verify_u32(ptr - teddy_base); - memcpy(ptr, floodControlTmp.get(), floodControlTmp.size()); - ptr += floodControlTmp.size(); + memcpy(ptr, floodTable.get(), floodTable.size()); + ptr += floodTable.size(); - u8 *baseMsk = teddy_base + sizeof(Teddy); + // Write teddy masks. + u8 *baseMsk = teddy_base + headerSize; for (const auto &b2l : bucketToLits) { const u32 &bucket_id = b2l.first; diff --git a/src/fdr/teddy_runtime_common.h b/src/fdr/teddy_runtime_common.h index c5f0885f..883a68fc 100644 --- a/src/fdr/teddy_runtime_common.h +++ b/src/fdr/teddy_runtime_common.h @@ -239,14 +239,14 @@ void do_confWithBitMany_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset, } static really_inline -const m128 * getMaskBase(const struct Teddy *teddy) { - return (const m128 *)((const u8 *)teddy + sizeof(struct Teddy)); +const m128 *getMaskBase(const struct Teddy *teddy) { + return (const m128 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy))); } static really_inline -const u32 * getConfBase(const struct Teddy *teddy, u8 numMask) { - return (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + - (numMask*32)); +const u32 *getConfBase(const struct Teddy *teddy, u8 numMask) { + return (const u32 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy)) + + ROUNDUP_CL(numMask * 32)); } #endif /* TEDDY_RUNTIME_COMMON_H_ */ From 549062ec2bffeba165844d175b328e8669b67098 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 23 May 2017 14:44:20 +1000 Subject: [PATCH 005/190] fdr_confirm: start FDRConfirm structs at cacheline --- src/fdr/fdr_confirm_compile.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index fa064bf3..2c835938 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -364,14 +364,15 @@ setupFullConfs(const vector &lits, } u32 nBuckets = eng.getNumBuckets(); - u32 totalConfSwitchSize = nBuckets * sizeof(u32); - u32 totalSize = ROUNDUP_16(totalConfSwitchSize + totalConfirmSize); + u32 totalConfSwitchSize = ROUNDUP_CL(nBuckets * sizeof(u32)); + u32 totalSize = totalConfSwitchSize + totalConfirmSize; auto buf = make_zeroed_bytecode_ptr(totalSize, 64); assert(buf); // otherwise would have thrown std::bad_alloc u32 *confBase = (u32 *)buf.get(); u8 *ptr = buf.get() + totalConfSwitchSize; + assert(ISALIGNED_CL(ptr)); for (const auto &m : bc2Conf) { const BucketIndex &idx = m.first; From c878d5ec66501e42ee8dcbcd483e4addc3187a9d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 24 May 2017 10:13:06 +1000 Subject: [PATCH 006/190] fdr: further tidy up layout --- src/fdr/fdr_compile.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index 987379dd..e4f8c194 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -151,14 +151,15 @@ void FDRCompiler::createInitialState(FDR *fdr) { * cacheline-aligned. */ bytecode_ptr FDRCompiler::setupFDR() { - size_t tabSize = ROUNDUP_CL(eng.getTabSizeBytes()); - auto floodTable = setupFDRFloodControl(lits, eng, grey); auto confirmTable = setupFullConfs(lits, eng, bucketToLits, make_small); - size_t headerSize = ROUNDUP_CL(sizeof(FDR)); - size_t size = headerSize + tabSize + ROUNDUP_CL(confirmTable.size()) + - floodTable.size(); + size_t headerSize = sizeof(FDR); + size_t tabSize = eng.getTabSizeBytes(); + + // Note: we place each major structure here on a cacheline boundary. + size_t size = ROUNDUP_CL(headerSize) + ROUNDUP_CL(tabSize) + + ROUNDUP_CL(confirmTable.size()) + floodTable.size(); DEBUG_PRINTF("sizes base=%zu tabSize=%zu confirm=%zu floodControl=%zu " "total=%zu\n", @@ -177,7 +178,7 @@ bytecode_ptr FDRCompiler::setupFDR() { assert(eng.bits > 8 && eng.bits < 16); // we allow domains 9 to 15 only fdr->domain = eng.bits; fdr->domainMask = (1 << eng.bits) - 1; - fdr->tabSize = (1 << eng.bits) * (eng.schemeWidth / 8); + fdr->tabSize = tabSize; fdr->stride = eng.stride; createInitialState(fdr.get()); @@ -185,7 +186,7 @@ bytecode_ptr FDRCompiler::setupFDR() { u8 *ptr = fdr_base + ROUNDUP_CL(sizeof(FDR)); assert(ISALIGNED_CL(ptr)); copy(tab.begin(), tab.end(), ptr); - ptr += tabSize; + ptr += ROUNDUP_CL(tabSize); // Write confirm structures. assert(ISALIGNED_CL(ptr)); From 06bafae81da966871b16bb87574f8138781172e0 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 24 May 2017 10:29:28 +1000 Subject: [PATCH 007/190] fdr_confirm: clean up use of flags --- src/fdr/fdr_confirm.h | 11 +++++++---- src/fdr/fdr_confirm_compile.cpp | 4 ++-- src/fdr/fdr_confirm_runtime.h | 6 +++--- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/fdr/fdr_confirm.h b/src/fdr/fdr_confirm.h index 816e9fe6..ebf0669c 100644 --- a/src/fdr/fdr_confirm.h +++ b/src/fdr/fdr_confirm.h @@ -42,9 +42,11 @@ u32 mul_hash_64(u64a lv, u64a andmsk, u64a mult, u32 nBits) { #define CONF_TYPE u64a #define CONF_HASH_CALL mul_hash_64 -typedef enum LitInfoFlags { - NoRepeat = 2, -} LitInfoFlags; +/** + * \brief Flag indicating this literal doesn't need to be delivered more than + * once, used in LitInfo::flags. + */ +#define FDR_LIT_FLAG_NOREPEAT 2 /** * \brief Structure describing a literal, linked to by FDRConfirm. @@ -58,11 +60,12 @@ struct LitInfo { hwlm_group_t groups; u32 id; // literal ID as passed in u8 size; - u8 flags; /* LitInfoFlags */ + u8 flags; //!< bitfield of flags from FDR_LIT_FLAG_* above. u8 next; }; #define FDRC_FLAG_NO_CONFIRM 1 +#define FDRC_FLAG_NOREPEAT 2 /** * \brief FDR confirm header. diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index 2c835938..e9ec9dcf 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -94,7 +94,7 @@ void fillLitInfo(const vector &lits, vector &tmpLitInfo, info.id = lit.id; u8 flags = 0; if (lit.noruns) { - flags |= NoRepeat; + flags |= FDR_LIT_FLAG_NOREPEAT; } info.flags = flags; info.size = verify_u8(max(lit.msk.size(), lit.s.size())); @@ -170,7 +170,7 @@ bytecode_ptr getFDRConfirm(const vector &lits, if (!make_confirm) { flags = FDRC_FLAG_NO_CONFIRM; if (lits[0].noruns) { - flags |= NoRepeat; // messy - need to clean this up later as flags is sorta kinda obsoleted + flags |= FDRC_FLAG_NOREPEAT; // messy - need to clean this up later as flags is sorta kinda obsoleted } mult = 0; soleLitSize = lits[0].s.size() - 1; diff --git a/src/fdr/fdr_confirm_runtime.h b/src/fdr/fdr_confirm_runtime.h index ace54543..ea644bfb 100644 --- a/src/fdr/fdr_confirm_runtime.h +++ b/src/fdr/fdr_confirm_runtime.h @@ -64,7 +64,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a goto out; } - if ((*last_match == li->id) && (li->flags & NoRepeat)) { + if ((*last_match == li->id) && (li->flags & FDR_LIT_FLAG_NOREPEAT)) { goto out; } @@ -110,7 +110,7 @@ void confWithBit1(const struct FDRConfirm *fdrc, } else { u32 id = fdrc->nBitsOrSoleID; - if ((*last_match == id) && (fdrc->flags & NoRepeat)) { + if ((*last_match == id) && (fdrc->flags & FDRC_FLAG_NOREPEAT)) { return; } *last_match = id; @@ -139,7 +139,7 @@ void confWithBitMany(const struct FDRConfirm *fdrc, const u32 id = fdrc->nBitsOrSoleID; const u32 len = fdrc->soleLitSize; - if ((*last_match == id) && (fdrc->flags & NoRepeat)) { + if ((*last_match == id) && (fdrc->flags & FDRC_FLAG_NOREPEAT)) { return; } From b126cbf556e9dcde82f4ee769101cead8e0b11f7 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 24 May 2017 11:10:39 +1000 Subject: [PATCH 008/190] fdr/teddy: simplify computing of confirm base --- src/fdr/fdr.c | 3 +-- src/fdr/fdr_compile.cpp | 1 + src/fdr/fdr_internal.h | 1 + src/fdr/teddy.c | 16 ++++++++-------- src/fdr/teddy_avx2.c | 22 ++++++++-------------- src/fdr/teddy_compile.cpp | 1 + src/fdr/teddy_internal.h | 3 ++- src/fdr/teddy_runtime_common.h | 5 ++--- 8 files changed, 24 insertions(+), 28 deletions(-) diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c index aa5f1804..c77e31ff 100644 --- a/src/fdr/fdr.c +++ b/src/fdr/fdr.c @@ -734,8 +734,7 @@ hwlm_error_t fdr_engine_exec(const struct FDR *fdr, const u64a *ft = (const u64a *)((const u8 *)fdr + ROUNDUP_CL(sizeof(struct FDR))); assert(ISALIGNED_CL(ft)); - const u32 *confBase = - (const u32 *)((const u8 *)ft + ROUNDUP_CL(fdr->tabSize)); + const u32 *confBase = (const u32 *)((const u8 *)fdr + fdr->confOffset); assert(ISALIGNED_CL(confBase)); struct zone zones[ZONE_MAX]; assert(fdr->domain > 8 && fdr->domain < 16); diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index e4f8c194..cd0013e4 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -190,6 +190,7 @@ bytecode_ptr FDRCompiler::setupFDR() { // Write confirm structures. assert(ISALIGNED_CL(ptr)); + fdr->confOffset = verify_u32(ptr - fdr_base); memcpy(ptr, confirmTable.get(), confirmTable.size()); ptr += ROUNDUP_CL(confirmTable.size()); diff --git a/src/fdr/fdr_internal.h b/src/fdr/fdr_internal.h index a425d78c..dd81c2dc 100644 --- a/src/fdr/fdr_internal.h +++ b/src/fdr/fdr_internal.h @@ -69,6 +69,7 @@ struct FDR { u32 engineID; u32 size; u32 maxStringLen; + u32 confOffset; u32 floodOffset; u8 stride; /* stride - how frequeuntly the data is consulted by the first diff --git a/src/fdr/teddy.c b/src/fdr/teddy.c index a3f7cfaf..636c741b 100644 --- a/src/fdr/teddy.c +++ b/src/fdr/teddy.c @@ -191,7 +191,7 @@ hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr, a->buf, a->len, a->start_offset); const m128 *maskBase = getMaskBase(teddy); - const u32 *confBase = getConfBase(teddy, 1); + const u32 *confBase = getConfBase(teddy); const u8 *mainStart = ROUNDUP_PTR(ptr, 16); DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); @@ -247,7 +247,7 @@ hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr, a->buf, a->len, a->start_offset); const m128 *maskBase = getMaskBase(teddy); - const u32 *confBase = getConfBase(teddy, 1); + const u32 *confBase = getConfBase(teddy); const u8 *mainStart = ROUNDUP_PTR(ptr, 16); DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); @@ -303,7 +303,7 @@ hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr, a->buf, a->len, a->start_offset); const m128 *maskBase = getMaskBase(teddy); - const u32 *confBase = getConfBase(teddy, 2); + const u32 *confBase = getConfBase(teddy); m128 res_old_1 = ones128(); const u8 *mainStart = ROUNDUP_PTR(ptr, 16); @@ -360,7 +360,7 @@ hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr, a->buf, a->len, a->start_offset); const m128 *maskBase = getMaskBase(teddy); - const u32 *confBase = getConfBase(teddy, 2); + const u32 *confBase = getConfBase(teddy); m128 res_old_1 = ones128(); const u8 *mainStart = ROUNDUP_PTR(ptr, 16); @@ -417,7 +417,7 @@ hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr, a->buf, a->len, a->start_offset); const m128 *maskBase = getMaskBase(teddy); - const u32 *confBase = getConfBase(teddy, 3); + const u32 *confBase = getConfBase(teddy); m128 res_old_1 = ones128(); m128 res_old_2 = ones128(); @@ -479,7 +479,7 @@ hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr, a->buf, a->len, a->start_offset); const m128 *maskBase = getMaskBase(teddy); - const u32 *confBase = getConfBase(teddy, 3); + const u32 *confBase = getConfBase(teddy); m128 res_old_1 = ones128(); m128 res_old_2 = ones128(); @@ -541,7 +541,7 @@ hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr, a->buf, a->len, a->start_offset); const m128 *maskBase = getMaskBase(teddy); - const u32 *confBase = getConfBase(teddy, 4); + const u32 *confBase = getConfBase(teddy); m128 res_old_1 = ones128(); m128 res_old_2 = ones128(); @@ -605,7 +605,7 @@ hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr, a->buf, a->len, a->start_offset); const m128 *maskBase = getMaskBase(teddy); - const u32 *confBase = getConfBase(teddy, 4); + const u32 *confBase = getConfBase(teddy); m128 res_old_1 = ones128(); m128 res_old_2 = ones128(); diff --git a/src/fdr/teddy_avx2.c b/src/fdr/teddy_avx2.c index 38ac3f72..89117b0b 100644 --- a/src/fdr/teddy_avx2.c +++ b/src/fdr/teddy_avx2.c @@ -200,12 +200,6 @@ const m256 *getMaskBase_avx2(const struct Teddy *teddy) { return (const m256 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy))); } -static really_inline -const u32 *getConfBase_avx2(const struct Teddy *teddy, u8 numMask) { - return (const u32 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy)) + - ROUNDUP_CL((numMask * 32 * 2))); -} - hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr, const struct FDR_Runtime_Args *a, hwlm_group_t control) { @@ -220,7 +214,7 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr, a->buf, a->len, a->start_offset); const m256 *maskBase = getMaskBase_avx2(teddy); - const u32 *confBase = getConfBase_avx2(teddy, 1); + const u32 *confBase = getConfBase(teddy); const u8 *mainStart = ROUNDUP_PTR(ptr, 16); DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); @@ -276,7 +270,7 @@ hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr, a->buf, a->len, a->start_offset); const m256 *maskBase = getMaskBase_avx2(teddy); - const u32 *confBase = getConfBase_avx2(teddy, 1); + const u32 *confBase = getConfBase(teddy); const u8 *mainStart = ROUNDUP_PTR(ptr, 16); DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); @@ -332,7 +326,7 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr, a->buf, a->len, a->start_offset); const m256 *maskBase = getMaskBase_avx2(teddy); - const u32 *confBase = getConfBase_avx2(teddy, 2); + const u32 *confBase = getConfBase(teddy); m256 res_old_1 = ones256(); const u8 *mainStart = ROUNDUP_PTR(ptr, 16); @@ -390,7 +384,7 @@ hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr, a->buf, a->len, a->start_offset); const m256 *maskBase = getMaskBase_avx2(teddy); - const u32 *confBase = getConfBase_avx2(teddy, 2); + const u32 *confBase = getConfBase(teddy); m256 res_old_1 = ones256(); const u8 *mainStart = ROUNDUP_PTR(ptr, 16); @@ -448,7 +442,7 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr, a->buf, a->len, a->start_offset); const m256 *maskBase = getMaskBase_avx2(teddy); - const u32 *confBase = getConfBase_avx2(teddy, 3); + const u32 *confBase = getConfBase(teddy); m256 res_old_1 = ones256(); m256 res_old_2 = ones256(); @@ -511,7 +505,7 @@ hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr, a->buf, a->len, a->start_offset); const m256 *maskBase = getMaskBase_avx2(teddy); - const u32 *confBase = getConfBase_avx2(teddy, 3); + const u32 *confBase = getConfBase(teddy); m256 res_old_1 = ones256(); m256 res_old_2 = ones256(); @@ -574,7 +568,7 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr, a->buf, a->len, a->start_offset); const m256 *maskBase = getMaskBase_avx2(teddy); - const u32 *confBase = getConfBase_avx2(teddy, 4); + const u32 *confBase = getConfBase(teddy); m256 res_old_1 = ones256(); m256 res_old_2 = ones256(); @@ -638,7 +632,7 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr, a->buf, a->len, a->start_offset); const m256 *maskBase = getMaskBase_avx2(teddy); - const u32 *confBase = getConfBase_avx2(teddy, 4); + const u32 *confBase = getConfBase(teddy); m256 res_old_1 = ones256(); m256 res_old_2 = ones256(); diff --git a/src/fdr/teddy_compile.cpp b/src/fdr/teddy_compile.cpp index 19e595fb..2ea70f13 100644 --- a/src/fdr/teddy_compile.cpp +++ b/src/fdr/teddy_compile.cpp @@ -335,6 +335,7 @@ bytecode_ptr TeddyCompiler::build() { // Write confirm structures. u8 *ptr = teddy_base + headerSize + maskLen; assert(ISALIGNED_CL(ptr)); + teddy->confOffset = verify_u32(ptr - teddy_base); memcpy(ptr, confirmTable.get(), confirmTable.size()); ptr += ROUNDUP_CL(confirmTable.size()); diff --git a/src/fdr/teddy_internal.h b/src/fdr/teddy_internal.h index bbd8e788..f3319bd2 100644 --- a/src/fdr/teddy_internal.h +++ b/src/fdr/teddy_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,6 +36,7 @@ struct Teddy { u32 engineID; u32 size; u32 maxStringLen; + u32 confOffset; u32 floodOffset; u32 link; u32 pad1; diff --git a/src/fdr/teddy_runtime_common.h b/src/fdr/teddy_runtime_common.h index 883a68fc..f63df724 100644 --- a/src/fdr/teddy_runtime_common.h +++ b/src/fdr/teddy_runtime_common.h @@ -244,9 +244,8 @@ const m128 *getMaskBase(const struct Teddy *teddy) { } static really_inline -const u32 *getConfBase(const struct Teddy *teddy, u8 numMask) { - return (const u32 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy)) + - ROUNDUP_CL(numMask * 32)); +const u32 *getConfBase(const struct Teddy *teddy) { + return (const u32 *)((const u8 *)teddy + teddy->confOffset); } #endif /* TEDDY_RUNTIME_COMMON_H_ */ From e9d85f7b512c22d9404e4d6cdee8a09958f1f685 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 24 May 2017 11:22:36 +1000 Subject: [PATCH 009/190] fdr_confirm: renumber FDR_LIT_FLAG_NOREPEAT --- src/fdr/fdr_confirm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fdr/fdr_confirm.h b/src/fdr/fdr_confirm.h index ebf0669c..e160b96d 100644 --- a/src/fdr/fdr_confirm.h +++ b/src/fdr/fdr_confirm.h @@ -46,7 +46,7 @@ u32 mul_hash_64(u64a lv, u64a andmsk, u64a mult, u32 nBits) { * \brief Flag indicating this literal doesn't need to be delivered more than * once, used in LitInfo::flags. */ -#define FDR_LIT_FLAG_NOREPEAT 2 +#define FDR_LIT_FLAG_NOREPEAT 1 /** * \brief Structure describing a literal, linked to by FDRConfirm. From 71bd1c8dfee410c79b6eb3355401f03c62de8199 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 24 May 2017 11:24:45 +1000 Subject: [PATCH 010/190] teddy: clean up compile to match fdr style --- src/fdr/teddy_compile.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/fdr/teddy_compile.cpp b/src/fdr/teddy_compile.cpp index 2ea70f13..5a21927a 100644 --- a/src/fdr/teddy_compile.cpp +++ b/src/fdr/teddy_compile.cpp @@ -313,14 +313,15 @@ bytecode_ptr TeddyCompiler::build() { } u32 maskWidth = eng.getNumBuckets() / 8; - size_t headerSize = ROUNDUP_CL(sizeof(Teddy)); - size_t maskLen = ROUNDUP_CL(eng.numMasks * 16 * 2 * maskWidth); + size_t headerSize = sizeof(Teddy); + size_t maskLen = eng.numMasks * 16 * 2 * maskWidth; auto floodTable = setupFDRFloodControl(lits, eng, grey); auto confirmTable = setupFullConfs(lits, eng, bucketToLits, make_small); - size_t size = headerSize + maskLen + ROUNDUP_CL(confirmTable.size()) + - floodTable.size(); + // Note: we place each major structure here on a cacheline boundary. + size_t size = ROUNDUP_CL(headerSize) + ROUNDUP_CL(maskLen) + + ROUNDUP_CL(confirmTable.size()) + floodTable.size(); auto fdr = make_zeroed_bytecode_ptr(size, 64); assert(fdr); // otherwise would have thrown std::bad_alloc @@ -333,7 +334,7 @@ bytecode_ptr TeddyCompiler::build() { teddy->maxStringLen = verify_u32(maxLen(lits)); // Write confirm structures. - u8 *ptr = teddy_base + headerSize + maskLen; + u8 *ptr = teddy_base + ROUNDUP_CL(headerSize) + ROUNDUP_CL(maskLen); assert(ISALIGNED_CL(ptr)); teddy->confOffset = verify_u32(ptr - teddy_base); memcpy(ptr, confirmTable.get(), confirmTable.size()); @@ -346,7 +347,7 @@ bytecode_ptr TeddyCompiler::build() { ptr += floodTable.size(); // Write teddy masks. - u8 *baseMsk = teddy_base + headerSize; + u8 *baseMsk = teddy_base + ROUNDUP_CL(headerSize); for (const auto &b2l : bucketToLits) { const u32 &bucket_id = b2l.first; From 2b9b2ca9115756ef01a4d7eea25d861b7fa9bd34 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 9 Jun 2017 16:28:32 +1000 Subject: [PATCH 011/190] fdr/teddy: remove padding from structures --- src/fdr/fdr_internal.h | 11 ++++------- src/fdr/teddy_internal.h | 4 ---- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/src/fdr/fdr_internal.h b/src/fdr/fdr_internal.h index dd81c2dc..8109d1e8 100644 --- a/src/fdr/fdr_internal.h +++ b/src/fdr/fdr_internal.h @@ -71,18 +71,15 @@ struct FDR { u32 maxStringLen; u32 confOffset; u32 floodOffset; - - u8 stride; /* stride - how frequeuntly the data is consulted by the first + u8 stride; /* stride - how frequently the data is consulted by the first * stage matcher */ u8 domain; /* number of bits used to index into main FDR table. This value * is used only of debugging/asserts. */ u16 domainMask; /* pre-computed domain mask */ u32 tabSize; /* pre-computed hashtable size in bytes */ - u32 pad; - - m128 start; /* initial start state to use at offset 0. The state has been set - * up based on the min length of buckets to reduce the need for - * pointless confirms. */ + m128 start; /* initial start state to use at offset 0. The state has been + * set up based on the min length of buckets to reduce the need + * for pointless confirms. */ }; /** \brief FDR runtime arguments. diff --git a/src/fdr/teddy_internal.h b/src/fdr/teddy_internal.h index f3319bd2..359d1e13 100644 --- a/src/fdr/teddy_internal.h +++ b/src/fdr/teddy_internal.h @@ -38,10 +38,6 @@ struct Teddy { u32 maxStringLen; u32 confOffset; u32 floodOffset; - u32 link; - u32 pad1; - u32 pad2; - u32 pad3; }; #endif From d94bf2fd62848591dd79307915216cc589fae5c5 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 9 Jun 2017 16:29:56 +1000 Subject: [PATCH 012/190] fdr_confirm_compile: wrap comment --- src/fdr/fdr_confirm_compile.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index e9ec9dcf..b05d24a8 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -170,7 +170,9 @@ bytecode_ptr getFDRConfirm(const vector &lits, if (!make_confirm) { flags = FDRC_FLAG_NO_CONFIRM; if (lits[0].noruns) { - flags |= FDRC_FLAG_NOREPEAT; // messy - need to clean this up later as flags is sorta kinda obsoleted + // messy - need to clean this up later as flags is sorta kinda + // obsoleted + flags |= FDRC_FLAG_NOREPEAT; } mult = 0; soleLitSize = lits[0].s.size() - 1; From 64db576b9e9d2dea8c83f271cb69669f7aaab18c Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 19 Jun 2017 15:47:36 +1000 Subject: [PATCH 013/190] fdr_confirm_compile: literals are now < 8 bytes --- src/fdr/fdr_confirm_compile.cpp | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index b05d24a8..616ff86e 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -35,6 +35,7 @@ #include "util/alloc.h" #include "util/bitutils.h" #include "util/compare.h" +#include "util/container.h" #include "util/verify_types.h" #include @@ -47,19 +48,6 @@ namespace ue2 { using BC2CONF = map>; -// return the number of bytes beyond a length threshold in all strings in lits -static -size_t thresholdedSize(const vector &lits, size_t threshold) { - size_t tot = 0; - for (const auto &lit : lits) { - size_t sz = lit.s.size(); - if (sz > threshold) { - tot += ROUNDUP_N(sz - threshold, 8); - } - } - return tot; -} - static u64a make_u64a_mask(const vector &v) { assert(v.size() <= sizeof(u64a)); @@ -143,6 +131,11 @@ void fillLitInfo(const vector &lits, vector &tmpLitInfo, static bytecode_ptr getFDRConfirm(const vector &lits, bool make_small, bool make_confirm) { + // Every literal must fit within CONF_TYPE. + assert(all_of_in(lits, [](const hwlmLiteral &lit) { + return lit.s.size() <= sizeof(CONF_TYPE); + })); + vector tmpLitInfo(lits.size()); CONF_TYPE andmsk; fillLitInfo(lits, tmpLitInfo, andmsk); @@ -271,12 +264,11 @@ bytecode_ptr getFDRConfirm(const vector &lits, #endif const size_t bitsToLitIndexSize = (1U << nBits) * sizeof(u32); - const size_t totalLitSize = thresholdedSize(lits, sizeof(CONF_TYPE)); // this size can now be a worst-case as we can always be a bit smaller size_t size = ROUNDUP_N(sizeof(FDRConfirm), alignof(u32)) + ROUNDUP_N(bitsToLitIndexSize, alignof(LitInfo)) + - sizeof(LitInfo) * lits.size() + totalLitSize; + sizeof(LitInfo) * lits.size(); size = ROUNDUP_N(size, alignof(FDRConfirm)); auto fdrc = make_zeroed_bytecode_ptr(size); From 31141dd35b33cfba814a8294393fed4310bd4bf3 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 11 May 2017 17:07:26 +1000 Subject: [PATCH 014/190] determinise: use queue, improve api - Use a queue rather than always building the full vector of state sets. - Make more use of move, emplace, reserve. - Write directly into dstates argument. - Return bool rather than int. --- src/nfa/rdfa_merge.cpp | 4 +- src/nfagraph/ng_haig.cpp | 9 ++--- src/nfagraph/ng_mcclellan.cpp | 5 ++- src/rose/rose_build_anchored.cpp | 8 ++-- src/util/determinise.h | 66 +++++++++++++++++--------------- 5 files changed, 48 insertions(+), 44 deletions(-) diff --git a/src/nfa/rdfa_merge.cpp b/src/nfa/rdfa_merge.cpp index 50e9b62a..99b1930d 100644 --- a/src/nfa/rdfa_merge.cpp +++ b/src/nfa/rdfa_merge.cpp @@ -289,7 +289,7 @@ unique_ptr mergeTwoDfas(const raw_dfa *d1, const raw_dfa *d2, auto rdfa = ue2::make_unique(d1->kind); Automaton_Merge autom(d1, d2, rm, grey); - if (!determinise(autom, rdfa->states, max_states)) { + if (determinise(autom, rdfa->states, max_states)) { rdfa->start_anchored = autom.start_anchored; rdfa->start_floating = autom.start_floating; rdfa->alpha_size = autom.alphasize; @@ -374,7 +374,7 @@ unique_ptr mergeAllDfas(const vector &dfas, DEBUG_PRINTF("merging dfa\n"); - if (determinise(n, rdfa->states, max_states)) { + if (!determinise(n, rdfa->states, max_states)) { DEBUG_PRINTF("state limit (%zu) exceeded\n", max_states); return nullptr; /* over state limit */ } diff --git a/src/nfagraph/ng_haig.cpp b/src/nfagraph/ng_haig.cpp index e4be14c3..50522ff7 100644 --- a/src/nfagraph/ng_haig.cpp +++ b/src/nfagraph/ng_haig.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -518,7 +518,7 @@ bool doHaig(const NGHolder &g, som_type som, vector nfa_state_map; Auto n(g, som, triggers, unordered_som); try { - if (determinise(n, rdfa->states, state_limit, &nfa_state_map)) { + if (!determinise(n, rdfa->states, state_limit, &nfa_state_map)) { DEBUG_PRINTF("state limit exceeded\n"); return false; } @@ -726,9 +726,8 @@ unique_ptr attemptToMergeHaig(const vector &df NODE_START, dfas[0]->stream_som_loc_width); - int rv = determinise(n, rdfa->states, limit, &nfa_state_map); - if (rv) { - DEBUG_PRINTF("%d:state limit (%u) exceeded\n", rv, limit); + if (!determinise(n, rdfa->states, limit, &nfa_state_map)) { + DEBUG_PRINTF("state limit (%u) exceeded\n", limit); return nullptr; /* over state limit */ } diff --git a/src/nfagraph/ng_mcclellan.cpp b/src/nfagraph/ng_mcclellan.cpp index 9448a0bf..6ada273c 100644 --- a/src/nfagraph/ng_mcclellan.cpp +++ b/src/nfagraph/ng_mcclellan.cpp @@ -433,6 +433,7 @@ public: } return allExternalReports(*rm, test_reports); } + private: const ReportManager *rm; public: @@ -568,7 +569,7 @@ unique_ptr buildMcClellan(const NGHolder &graph, /* Fast path. Automaton_Graph uses a bitfield internally to represent * states and is quicker than Automaton_Big. */ Automaton_Graph n(rm, graph, single_trigger, triggers, prunable); - if (determinise(n, rdfa->states, state_limit)) { + if (!determinise(n, rdfa->states, state_limit)) { DEBUG_PRINTF("state limit exceeded\n"); return nullptr; /* over state limit */ } @@ -580,7 +581,7 @@ unique_ptr buildMcClellan(const NGHolder &graph, } else { /* Slow path. Too many states to use Automaton_Graph. */ Automaton_Big n(rm, graph, single_trigger, triggers, prunable); - if (determinise(n, rdfa->states, state_limit)) { + if (!determinise(n, rdfa->states, state_limit)) { DEBUG_PRINTF("state limit exceeded\n"); return nullptr; /* over state limit */ } diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index a2af160e..b5413a67 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -701,8 +701,8 @@ int addAutomaton(RoseBuildImpl &build, const NGHolder &h, ReportID *remap) { Automaton_Holder autom(h); - unique_ptr out_dfa = ue2::make_unique(NFA_OUTFIX_RAW); - if (!determinise(autom, out_dfa->states, MAX_DFA_STATES)) { + auto out_dfa = ue2::make_unique(NFA_OUTFIX_RAW); + if (determinise(autom, out_dfa->states, MAX_DFA_STATES)) { return finalise_out(build, h, autom, move(out_dfa), remap); } @@ -764,8 +764,8 @@ void buildSimpleDfas(const RoseBuildImpl &build, const vector &frag_map, auto h = populate_holder(simple.first, exit_ids); Automaton_Holder autom(*h); auto rdfa = ue2::make_unique(NFA_OUTFIX_RAW); - UNUSED int rv = determinise(autom, rdfa->states, MAX_DFA_STATES); - assert(!rv); + UNUSED bool rv = determinise(autom, rdfa->states, MAX_DFA_STATES); + assert(rv); rdfa->start_anchored = INIT_STATE; rdfa->start_floating = DEAD_STATE; rdfa->alpha_size = autom.alphasize; diff --git a/src/util/determinise.h b/src/util/determinise.h index d7bb592b..688af61b 100644 --- a/src/util/determinise.h +++ b/src/util/determinise.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,14 +38,13 @@ #include "container.h" #include "ue2common.h" -#include #include +#include +#include #include namespace ue2 { -#define DETERMINISE_RESERVE_SIZE 10 - /* Automaton details: * * const vector initial() @@ -73,42 +72,44 @@ namespace ue2 { * \param state_limit limit on the number of dfa states to construct * \param statesets_out a mapping from DFA state to the set of NFA states in * the automaton - * \return zero on success + * \return true on success, false if state limit exceeded */ template never_inline -int determinise(Auto &n, std::vector &dstates_out, dstate_id_t state_limit, +bool determinise(Auto &n, std::vector &dstates, size_t state_limit, std::vector *statesets_out = nullptr) { DEBUG_PRINTF("the determinator\n"); typedef typename Auto::StateSet StateSet; typedef typename Auto::StateMap DstateIdMap; DstateIdMap dstate_ids; - std::vector statesets; const size_t alphabet_size = n.alphasize; - std::vector dstates; - dstates.reserve(DETERMINISE_RESERVE_SIZE); - statesets.reserve(DETERMINISE_RESERVE_SIZE); + dstates.clear(); + dstates.reserve(state_limit); - dstate_ids[n.dead] = DEAD_STATE; + dstate_ids.emplace(n.dead, DEAD_STATE); dstates.push_back(ds(alphabet_size)); std::fill_n(dstates[0].next.begin(), alphabet_size, DEAD_STATE); - statesets.push_back(n.dead); + std::queue> q; + q.emplace(n.dead, DEAD_STATE); const std::vector &init = n.initial(); for (u32 i = 0; i < init.size(); i++) { - statesets.push_back(init[i]); + q.emplace(init[i], dstates.size()); assert(!contains(dstate_ids, init[i])); - dstate_ids[init[i]] = dstates.size(); + dstate_ids.emplace(init[i], dstates.size()); dstates.push_back(ds(alphabet_size)); } std::vector succs(alphabet_size, n.dead); - for (dstate_id_t curr_id = DEAD_STATE; curr_id < dstates.size(); - curr_id++) { - StateSet &curr = statesets[curr_id]; + + while (!q.empty()) { + auto m = std::move(q.front()); + q.pop(); + StateSet &curr = m.first; + dstate_id_t curr_id = m.second; DEBUG_PRINTF("curr: %hu\n", curr_id); @@ -139,43 +140,46 @@ int determinise(Auto &n, std::vector &dstates_out, dstate_id_t state_limit, if (s && succs[s] == succs[s - 1]) { succ_id = dstates[curr_id].next[s - 1]; } else { - typename DstateIdMap::const_iterator dstate_id_iter; - dstate_id_iter = dstate_ids.find(succs[s]); - - if (dstate_id_iter != dstate_ids.end()) { - succ_id = dstate_id_iter->second; - + auto p = dstate_ids.emplace(succs[s], dstates.size()); + succ_id = p.first->second; + if (!p.second) { /* succs[s] is already present */ if (succ_id > curr_id && !dstates[succ_id].daddy && n.unalpha[s] < N_CHARS) { dstates[succ_id].daddy = curr_id; } } else { - statesets.push_back(succs[s]); - succ_id = dstates.size(); - dstate_ids[succs[s]] = succ_id; dstates.push_back(ds(alphabet_size)); dstates.back().daddy = n.unalpha[s] < N_CHARS ? curr_id : 0; + q.emplace(succs[s], succ_id); } DEBUG_PRINTF("-->%hu on %02hx\n", succ_id, n.unalpha[s]); } if (succ_id >= state_limit) { - DEBUG_PRINTF("succ_id %hu >= state_limit %hu\n", + DEBUG_PRINTF("succ_id %hu >= state_limit %zu\n", succ_id, state_limit); - return -2; + dstates.clear(); + return false; } dstates[curr_id].next[s] = succ_id; } } - dstates_out = dstates; + // The dstates vector will persist in the raw_dfa. + dstates.shrink_to_fit(); + if (statesets_out) { - statesets_out->swap(statesets); + auto &statesets = *statesets_out; + statesets.resize(dstate_ids.size()); + for (auto &m : dstate_ids) { + statesets[m.second] = std::move(m.first); + } } + DEBUG_PRINTF("ok\n"); - return 0; + return true; } static inline From 0d7d52625cb013f27828db8d5542f66d57c021f3 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 15 May 2017 09:54:09 +1000 Subject: [PATCH 015/190] ng_haig: make StateMap an unordered_map --- src/nfagraph/ng_haig.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/nfagraph/ng_haig.cpp b/src/nfagraph/ng_haig.cpp index 50522ff7..4f96786c 100644 --- a/src/nfagraph/ng_haig.cpp +++ b/src/nfagraph/ng_haig.cpp @@ -42,6 +42,7 @@ #include "util/determinise.h" #include "util/graph.h" #include "util/graph_range.h" +#include "util/hash_dynamic_bitset.h" #include "util/make_unique.h" #include "util/ue2_containers.h" @@ -236,7 +237,7 @@ public: struct Big_Traits { using StateSet = dynamic_bitset<>; - using StateMap = map; + using StateMap = unordered_map; static StateSet init_states(u32 num) { return StateSet(num); From 85f7790a2155312760f6714ad0eda3d553c8607d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 15 May 2017 10:02:13 +1000 Subject: [PATCH 016/190] dfa: standardise 'using' instead of typedef --- src/nfa/rdfa_merge.cpp | 4 ++-- src/nfagraph/ng_haig.cpp | 8 ++++---- src/rose/rose_build_anchored.cpp | 6 +++--- src/util/determinise.h | 5 ++--- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/nfa/rdfa_merge.cpp b/src/nfa/rdfa_merge.cpp index 99b1930d..0905dc08 100644 --- a/src/nfa/rdfa_merge.cpp +++ b/src/nfa/rdfa_merge.cpp @@ -53,8 +53,8 @@ namespace { class Automaton_Merge { public: - typedef vector StateSet; - typedef ue2::unordered_map StateMap; + using StateSet = vector; + using StateMap = unordered_map; Automaton_Merge(const raw_dfa *rdfa1, const raw_dfa *rdfa2, const ReportManager *rm_in, const Grey &grey_in) diff --git a/src/nfagraph/ng_haig.cpp b/src/nfagraph/ng_haig.cpp index 4f96786c..9582a1e8 100644 --- a/src/nfagraph/ng_haig.cpp +++ b/src/nfagraph/ng_haig.cpp @@ -285,8 +285,8 @@ public: class Automaton_Haig_Merge { public: - typedef vector StateSet; - typedef ue2::unordered_map StateMap; + using StateSet = vector; + using StateMap = unordered_map; explicit Automaton_Haig_Merge(const vector &in) : nfas(in.begin(), in.end()), dead(in.size()) { @@ -515,7 +515,7 @@ bool doHaig(const NGHolder &g, som_type som, raw_som_dfa *rdfa) { u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from a fight */ - typedef typename Auto::StateSet StateSet; + using StateSet = typename Auto::StateSet; vector nfa_state_map; Auto n(g, som, triggers, unordered_som); try { @@ -721,7 +721,7 @@ unique_ptr attemptToMergeHaig(const vector &df } } - typedef Automaton_Haig_Merge::StateSet StateSet; + using StateSet = Automaton_Haig_Merge::StateSet; vector nfa_state_map; auto rdfa = ue2::make_unique(dfas[0]->kind, unordered_som, NODE_START, diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index b5413a67..74626a82 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -274,7 +274,7 @@ u32 anchoredStateSize(const anchored_matcher_info &atable) { namespace { -typedef bitfield nfa_state_set; +using nfa_state_set = bitfield; struct Holder_StateSet { Holder_StateSet() : wdelay(0) {} @@ -296,8 +296,8 @@ size_t hash_value(const Holder_StateSet &s) { class Automaton_Holder { public: - typedef Holder_StateSet StateSet; - typedef ue2::unordered_map StateMap; + using StateSet = Holder_StateSet; + using StateMap = unordered_map; explicit Automaton_Holder(const NGHolder &g_in) : g(g_in) { for (auto v : vertices_range(g)) { diff --git a/src/util/determinise.h b/src/util/determinise.h index 688af61b..eb56d970 100644 --- a/src/util/determinise.h +++ b/src/util/determinise.h @@ -79,9 +79,8 @@ never_inline bool determinise(Auto &n, std::vector &dstates, size_t state_limit, std::vector *statesets_out = nullptr) { DEBUG_PRINTF("the determinator\n"); - typedef typename Auto::StateSet StateSet; - typedef typename Auto::StateMap DstateIdMap; - DstateIdMap dstate_ids; + using StateSet = typename Auto::StateSet; + typename Auto::StateMap dstate_ids; const size_t alphabet_size = n.alphasize; From bdae3d5b80a3be2eae20b3e885e91f370cb919de Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 31 May 2017 13:07:22 +1000 Subject: [PATCH 017/190] dump: always allocate >=8 bytes for multibit The multibit runtime assumes that it is always safe to read 8 bytes, so we must over-allocate for smaller sizes. Caught by ASan. --- src/rose/rose_build_dump.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index b527db6c..a5467b31 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -681,10 +681,17 @@ vector sparseIterValues(const mmbit_sparse_iter *it, u32 num_bits) { return keys; } - vector bits(mmbit_size(num_bits), u8{0xff}); // All bits on. - vector state(MAX_SPARSE_ITER_STATES); - + // Populate a multibit structure with all-ones. Note that the multibit + // runtime assumes that it is always safe to read 8 bytes, so we must + // over-allocate for smaller sizes. + const size_t num_bytes = mmbit_size(num_bits); + vector bits(max(size_t{8}, num_bytes), u8{0xff}); // All bits on. const u8 *b = bits.data(); + if (num_bytes < 8) { + b += 8 - num_bytes; + } + + vector state(MAX_SPARSE_ITER_STATES); mmbit_sparse_state *s = state.data(); u32 idx = 0; From 7560e189ebb2445ce74585b852964c23a794736c Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 20 Jun 2017 17:11:18 +1000 Subject: [PATCH 018/190] rose: always use mandatory masks for lit fragments --- src/rose/rose_build_matchers.cpp | 99 ++++++++++++++++++++------------ 1 file changed, 62 insertions(+), 37 deletions(-) diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index 682a87c3..b693c70f 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -55,6 +55,7 @@ #include #include +#include using namespace std; using boost::adaptors::map_values; @@ -63,7 +64,7 @@ namespace ue2 { static const size_t MAX_ACCEL_STRING_LEN = 16; -#ifdef DEBUG +#if defined(DEBUG) || defined(DUMP_SUPPORT) static UNUSED string dumpMask(const vector &v) { ostringstream oss; @@ -231,28 +232,12 @@ bool maskFromPreds(const RoseBuildImpl &build, const rose_literal_id &id, } static -bool findHamsterMask(const RoseBuildImpl &build, const rose_literal_id &id, - const rose_literal_info &info, const RoseVertex v, - vector &msk, vector &cmp) { +bool addSurroundingMask(const RoseBuildImpl &build, const rose_literal_id &id, + const RoseVertex v, vector &msk, vector &cmp) { // Start with zero masks. msk.assign(HWLM_MASKLEN, 0); cmp.assign(HWLM_MASKLEN, 0); - // Masks can come from literal benefits (for mixed-case literals). - if (info.requires_benefits) { - assert(mixed_sensitivity(id.s)); - - size_t j = 0; - for (ue2_literal::const_reverse_iterator it = id.s.rbegin(), - ite = id.s.rend(); - it != ite && j < HWLM_MASKLEN; ++it, ++j) { - size_t offset = HWLM_MASKLEN - j - 1; - const CharReach &cr = *it; - make_and_cmp_mask(cr, &msk[offset], &cmp[offset]); - } - return true; - } - const LeftEngInfo &left = build.g[v].left; if (left && left.lag < HWLM_MASKLEN) { if (maskFromLeft(left, msk, cmp)) { @@ -293,9 +278,9 @@ bool hamsterMaskCombine(vector &msk, vector &cmp, } static -bool findHamsterMask(const RoseBuildImpl &build, const rose_literal_id &id, - const rose_literal_info &info, - vector &msk, vector &cmp) { +bool addSurroundingMask(const RoseBuildImpl &build, const rose_literal_id &id, + const rose_literal_info &info, vector &msk, + vector &cmp) { if (!build.cc.grey.roseHamsterMasks) { return false; } @@ -305,11 +290,14 @@ bool findHamsterMask(const RoseBuildImpl &build, const rose_literal_id &id, return false; } + msk.assign(HWLM_MASKLEN, 0); + cmp.assign(HWLM_MASKLEN, 0); + size_t num = 0; vector v_msk, v_cmp; for (RoseVertex v : info.vertices) { - if (!findHamsterMask(build, id, info, v, v_msk, v_cmp)) { + if (!addSurroundingMask(build, id, v, v_msk, v_cmp)) { DEBUG_PRINTF("no mask\n"); return false; } @@ -364,14 +352,6 @@ void findMoreLiteralMasks(RoseBuildImpl &build) { continue; } - if (!lit.msk.empty()) { - continue; - } - - const auto &lit_info = build.literal_info.at(id); - if (lit_info.requires_benefits) { - continue; - } candidates.push_back(id); } @@ -380,14 +360,15 @@ void findMoreLiteralMasks(RoseBuildImpl &build) { auto &lit_info = build.literal_info.at(id); vector msk, cmp; - if (!findHamsterMask(build, lit, lit_info, msk, cmp)) { + if (!addSurroundingMask(build, lit, lit_info, msk, cmp)) { continue; } - assert(!msk.empty()); - DEBUG_PRINTF("found advisory mask for lit_id=%u (%s)\n", id, + DEBUG_PRINTF("found surrounding mask for lit_id=%u (%s)\n", id, dumpString(lit.s).c_str()); u32 new_id = build.getLiteralId(lit.s, msk, cmp, lit.delay, lit.table); - assert(new_id != id); + if (new_id == id) { + continue; + } DEBUG_PRINTF("replacing with new lit_id=%u\n", new_id); // Note that our new literal may already exist and have vertices, etc. @@ -409,6 +390,48 @@ void findMoreLiteralMasks(RoseBuildImpl &build) { } } +// The mask already associated with the literal and any mask due to +// mixed-case is mandatory. +static +void addLiteralMask(const rose_literal_id &id, vector &msk, + vector &cmp) { + if (id.msk.empty() && !mixed_sensitivity(id.s)) { + return; + } + + while (msk.size() < HWLM_MASKLEN) { + msk.insert(msk.begin(), 0); + cmp.insert(cmp.begin(), 0); + } + + if (!id.msk.empty()) { + assert(id.msk.size() <= HWLM_MASKLEN); + assert(id.msk.size() == id.cmp.size()); + for (size_t i = 0; i < id.msk.size(); i++) { + size_t mand_offset = msk.size() - i - 1; + size_t lit_offset = id.msk.size() - i - 1; + msk[mand_offset] = id.msk[lit_offset]; + cmp[mand_offset] = id.cmp[lit_offset]; + } + } + + if (mixed_sensitivity(id.s)) { + auto it = id.s.rbegin(); + for (size_t i = 0, i_end = min(id.s.length(), size_t{HWLM_MASKLEN}); + i < i_end; ++i, ++it) { + const auto &c = *it; + if (!c.nocase) { + size_t offset = HWLM_MASKLEN - i - 1; + DEBUG_PRINTF("offset %zu must match 0x%02x exactly\n", offset, + c.c); + make_and_cmp_mask(c, &msk[offset], &cmp[offset]); + } + } + } + + normaliseLiteralMask(id.s, msk, cmp); +} + static bool isDirectHighlander(const RoseBuildImpl &build, const u32 id, const rose_literal_info &info) { @@ -716,8 +739,8 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, } } - const vector &msk = lit.msk; - const vector &cmp = lit.cmp; + vector msk = lit.msk; // copy + vector cmp = lit.cmp; // copy bool noruns = isNoRunsLiteral(build, id, info, max_len); size_t lit_hist_len = 0; @@ -740,6 +763,8 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, assert(!noruns); } + addLiteralMask(lit, msk, cmp); + const auto &s_final = lit_final.get_string(); bool nocase = lit_final.any_nocase(); From 72387e0de46e9763f5159c53bd92527783b761c5 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 20 Jun 2017 17:34:17 +1000 Subject: [PATCH 019/190] lookarounds: don't reconfirm bytes in hwlm mask --- src/rose/rose_build_program.cpp | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index 23a8b959..562ddb20 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -1288,19 +1288,28 @@ void makeCheckLitMaskInstruction(const RoseBuildImpl &build, u32 lit_id, vector look; - const ue2_literal &s = build.literals.at(lit_id).s; + const auto &lit = build.literals.at(lit_id); + const ue2_literal &s = lit.s; + const auto &msk = lit.msk; + DEBUG_PRINTF("building mask for lit %u: %s\n", lit_id, dumpString(s).c_str()); + assert(s.length() <= MAX_MASK2_WIDTH); - s32 i = 0 - s.length(); - for (const auto &e : s) { - if (!e.nocase) { - look.emplace_back(verify_s8(i), e); + + // Note: the literal matcher will confirm the HWLM mask in lit.msk, so we + // do not include those entries in the lookaround. + auto it = s.begin(); + for (s32 i = 0 - s.length(), i_end = 0 - msk.size(); i < i_end; ++i, ++it) { + if (!it->nocase) { + look.emplace_back(verify_s8(i), *it); } - i++; } - assert(!look.empty()); + if (look.empty()) { + return; // all caseful chars handled by HWLM mask. + } + makeLookaroundInstruction(look, program); } From 3bd0c7f6adac907ea59be30f8206e558aa84144d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 29 May 2017 12:45:37 +1000 Subject: [PATCH 020/190] unit-hyperscan: pure-literal/smwr coverage --- unit/CMakeLists.txt | 1 + unit/hyperscan/literals.cpp | 245 ++++++++++++++++++++++++++++++++++++ 2 files changed, 246 insertions(+) create mode 100644 unit/hyperscan/literals.cpp diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index a7658b26..45e2c7ba 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -42,6 +42,7 @@ set(unit_hyperscan_SOURCES hyperscan/expr_info.cpp hyperscan/extparam.cpp hyperscan/identical.cpp + hyperscan/literals.cpp hyperscan/main.cpp hyperscan/multi.cpp hyperscan/order.cpp diff --git a/unit/hyperscan/literals.cpp b/unit/hyperscan/literals.cpp new file mode 100644 index 00000000..86bd317c --- /dev/null +++ b/unit/hyperscan/literals.cpp @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "test_util.h" +#include "gtest/gtest.h" + +#include +#include +#include +#include + +using namespace std; +using namespace testing; + +class HyperscanLiteralTest + : public TestWithParam /* len min,max */, + bool /* add non-literal case */>> { +protected: + virtual void SetUp() { + tie(mode, all_flags, num, bounds, add_non_literal) = GetParam(); + rng.seed(29785643); + + if (mode & HS_MODE_STREAM && all_flags & HS_FLAG_SOM_LEFTMOST) { + mode |= HS_MODE_SOM_HORIZON_LARGE; + } + } + + // Returns (regex, corpus) + pair random_lit(unsigned min_len, unsigned max_len) { + boost::random::uniform_int_distribution<> len_dist(min_len, max_len); + size_t len = len_dist(rng); + + // Limit alphabet to [a-z] so that caseless tests include only alpha + // chars and can be entirely caseless. + boost::random::uniform_int_distribution<> dist('a', 'z'); + + ostringstream oss; + string corpus; + for (size_t i = 0; i < len; i++) { + char c = dist(rng); + oss << "\\x" << std::hex << std::setw(2) << std::setfill('0') + << ((unsigned)c & 0xff); + corpus.push_back(c); + } + return {oss.str(), corpus}; + } + + virtual void TearDown() {} + + boost::random::mt19937 rng; + unsigned mode; + unsigned all_flags; + unsigned num; + pair bounds; + bool add_non_literal; +}; + +static +int count_cb(unsigned, unsigned long long, unsigned long long, unsigned, + void *ctxt) { + size_t *count = (size_t *)ctxt; + (*count)++; + return 0; +} + +static +void do_scan_block(const vector &corpora, const hs_database_t *db, + hs_scratch_t *scratch) { + size_t count = 0; + for (const auto &s : corpora) { + size_t before = count; + hs_error_t err = + hs_scan(db, s.c_str(), s.size(), 0, scratch, count_cb, &count); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_LT(before, count); + } +} + +static +void do_scan_stream(const vector &corpora, const hs_database_t *db, + hs_scratch_t *scratch) { + size_t count = 0; + for (const auto &s : corpora) { + size_t before = count; + hs_stream_t *stream = nullptr; + hs_error_t err = hs_open_stream(db, 0, &stream); + ASSERT_EQ(HS_SUCCESS, err); + err = hs_scan_stream(stream, s.c_str(), s.size(), 0, scratch, count_cb, + &count); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_LT(before, count); + err = hs_close_stream(stream, scratch, dummy_cb, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + } +} + +static +void do_scan_vectored(const vector &corpora, const hs_database_t *db, + hs_scratch_t *scratch) { + size_t count = 0; + for (const auto &s : corpora) { + size_t before = count; + const char *const data[] = {s.c_str()}; + const unsigned int data_len[] = {(unsigned int)s.size()}; + hs_error_t err = hs_scan_vector(db, data, data_len, 1, 0, scratch, + count_cb, &count); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_LT(before, count); + } +} + +static +void do_scan(unsigned mode, const vector &corpora, + const hs_database_t *db) { + hs_scratch_t *scratch = nullptr; + hs_error_t err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + + if (mode & HS_MODE_BLOCK) { + do_scan_block(corpora, db, scratch); + } else if (mode & HS_MODE_STREAM) { + do_scan_stream(corpora, db, scratch); + } else if (mode & HS_MODE_VECTORED) { + do_scan_vectored(corpora, db, scratch); + } + + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST_P(HyperscanLiteralTest, Caseful) { + vector patterns; + vector corpora; + for (unsigned i = 0; i < num; i++) { + auto r = random_lit(bounds.first, bounds.second); + unsigned flags = all_flags; + patterns.emplace_back(std::move(r.first), flags, i); + corpora.emplace_back(std::move(r.second)); + } + + if (add_non_literal) { + patterns.emplace_back("hatstand.*teakettle", 0, num + 1); + corpora.push_back("hatstand teakettle"); + } + + auto *db = buildDB(patterns, mode); + ASSERT_TRUE(db != nullptr); + + do_scan(mode, corpora, db); + + hs_free_database(db); +} + +TEST_P(HyperscanLiteralTest, Caseless) { + vector patterns; + vector corpora; + for (unsigned i = 0; i < num; i++) { + auto r = random_lit(bounds.first, bounds.second); + unsigned flags = all_flags | HS_FLAG_CASELESS; + patterns.emplace_back(std::move(r.first), flags, i); + corpora.emplace_back(std::move(r.second)); + } + + if (add_non_literal) { + patterns.emplace_back("hatstand.*teakettle", 0, num + 1); + corpora.push_back("hatstand teakettle"); + } + + auto *db = buildDB(patterns, mode); + ASSERT_TRUE(db != nullptr); + + do_scan(mode, corpora, db); + + hs_free_database(db); +} + +TEST_P(HyperscanLiteralTest, MixedCase) { + vector patterns; + vector corpora; + for (unsigned i = 0; i < num; i++) { + auto r = random_lit(bounds.first, bounds.second); + unsigned flags = all_flags; + if (i % 2) { + flags |= HS_FLAG_CASELESS; + } + patterns.emplace_back(std::move(r.first), flags, i); + corpora.emplace_back(std::move(r.second)); + } + + if (add_non_literal) { + patterns.emplace_back("hatstand.*teakettle", 0, num + 1); + corpora.push_back("hatstand teakettle"); + } + + auto *db = buildDB(patterns, mode); + ASSERT_TRUE(db != nullptr); + + do_scan(mode, corpora, db); + + hs_free_database(db); +} + +static const unsigned test_modes[] = {HS_MODE_BLOCK, HS_MODE_STREAM, + HS_MODE_VECTORED}; + +static const unsigned test_flags[] = {0, HS_FLAG_SINGLEMATCH, + HS_FLAG_SOM_LEFTMOST}; + +static const unsigned test_sizes[] = {1, 10, 100, 500, 10000}; + +static const pair test_bounds[] = {{3u, 10u}, {10u, 100u}}; + +INSTANTIATE_TEST_CASE_P(LiteralTest, HyperscanLiteralTest, + Combine(ValuesIn(test_modes), ValuesIn(test_flags), + ValuesIn(test_sizes), ValuesIn(test_bounds), + Bool())); From c83f2ea389b551cb96fb27ebd0d3fe1b06343c54 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 22 Jun 2017 10:37:31 +1000 Subject: [PATCH 021/190] rose_build_matchers: be more careful w/ mixed-case Overhaul the way fragment literals are added to HWLM and accel, fix some bugs shaken out by stricter mask use. --- src/rose/rose_build_matchers.cpp | 197 +++++++++++++++++++------------ src/util/ue2string.cpp | 19 +-- src/util/ue2string.h | 34 +++++- 3 files changed, 155 insertions(+), 95 deletions(-) diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index b693c70f..836ac965 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -395,7 +395,11 @@ void findMoreLiteralMasks(RoseBuildImpl &build) { static void addLiteralMask(const rose_literal_id &id, vector &msk, vector &cmp) { - if (id.msk.empty() && !mixed_sensitivity(id.s)) { + const size_t suffix_len = min(id.s.length(), size_t{HWLM_MASKLEN}); + bool mixed_suffix = mixed_sensitivity_in(id.s.end() - suffix_len, + id.s.end()); + + if (id.msk.empty() && !mixed_suffix) { return; } @@ -415,10 +419,9 @@ void addLiteralMask(const rose_literal_id &id, vector &msk, } } - if (mixed_sensitivity(id.s)) { + if (mixed_suffix) { auto it = id.s.rbegin(); - for (size_t i = 0, i_end = min(id.s.length(), size_t{HWLM_MASKLEN}); - i < i_end; ++i, ++it) { + for (size_t i = 0; i < suffix_len; ++i, ++it) { const auto &c = *it; if (!c.nocase) { size_t offset = HWLM_MASKLEN - i - 1; @@ -683,6 +686,81 @@ struct MatcherProto { }; } +static +void addFragmentLiteral(const RoseBuildImpl &build, MatcherProto &mp, + const LitFragment &f, u32 id, bool delay_rebuild, + size_t max_len) { + const rose_literal_id &lit = build.literals.at(id); + assert(id < build.literal_info.size()); + const auto &info = build.literal_info.at(id); + + DEBUG_PRINTF("lit='%s' (len %zu)\n", dumpString(lit.s).c_str(), + lit.s.length()); + + vector msk = lit.msk; // copy + vector cmp = lit.cmp; // copy + bool noruns = isNoRunsLiteral(build, id, info, max_len); + + auto lit_final = lit.s; // copy + + if (lit_final.length() > ROSE_SHORT_LITERAL_LEN_MAX) { + DEBUG_PRINTF("truncating to tail of length %zu\n", + size_t{ROSE_SHORT_LITERAL_LEN_MAX}); + lit_final.erase(0, lit_final.length() - ROSE_SHORT_LITERAL_LEN_MAX); + // We shouldn't have set a threshold below 8 chars. + assert(msk.size() <= ROSE_SHORT_LITERAL_LEN_MAX); + assert(!noruns); + } + + addLiteralMask(lit, msk, cmp); + + const auto &s_final = lit_final.get_string(); + bool nocase = lit_final.any_nocase(); + + DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, cmp=%s\n", + f.fragment_id, escapeString(s_final).c_str(), (int)nocase, + noruns, dumpMask(msk).c_str(), dumpMask(cmp).c_str()); + + if (!maskIsConsistent(s_final, nocase, msk, cmp)) { + DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); + return; + } + + u32 prog_offset = + delay_rebuild ? f.delay_program_offset : f.lit_program_offset; + const auto &groups = f.groups; + + mp.lits.emplace_back(move(s_final), nocase, noruns, prog_offset, groups, + msk, cmp); +} + +static +void addAccelLiteral(MatcherProto &mp, const rose_literal_id &lit, + const rose_literal_info &info, size_t max_len) { + const auto &s = lit.s; // copy + + DEBUG_PRINTF("lit='%s' (len %zu)\n", dumpString(s).c_str(), s.length()); + + vector msk = lit.msk; // copy + vector cmp = lit.cmp; // copy + addLiteralMask(lit, msk, cmp); + + if (!maskIsConsistent(s.get_string(), s.any_nocase(), msk, cmp)) { + DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); + return; + } + + // Literals used for acceleration must be limited to max_len, as that's all + // we can see in history. + string s_final = lit.s.get_string(); + trim_to_suffix(s_final, max_len); + trim_to_suffix(msk, max_len); + trim_to_suffix(cmp, max_len); + + mp.accel_lits.emplace_back(s_final, lit.s.any_nocase(), msk, cmp, + info.group_mask); +} + /** * \brief Build up a vector of literals (and associated other data) for the * given table. @@ -702,26 +780,27 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, assert(build.cc.streaming); } + vector used_lit_ids; + for (const auto &f : fragments) { + assert(!f.lit_ids.empty()); + + // All literals that share a fragment are in the same table. + if (build.literals.at(f.lit_ids.front()).table != table) { + continue; // next fragment. + } + + DEBUG_PRINTF("fragment %u, %zu lit_ids\n", f.fragment_id, + f.lit_ids.size()); + + used_lit_ids.clear(); for (u32 id : f.lit_ids) { const rose_literal_id &lit = build.literals.at(id); - - if (lit.table != table) { - continue; /* wrong table */ - } - - if (lit.delay) { - continue; /* delay id's are virtual-ish */ - } - assert(id < build.literal_info.size()); const auto &info = build.literal_info.at(id); - - /* Note: requires_benefits are handled in the literal entries */ - const ue2_literal &s = lit.s; - - DEBUG_PRINTF("lit='%s' (len %zu)\n", escapeString(s).c_str(), - s.length()); + if (lit.delay) { + continue; /* delay id's are virtual-ish */ + } // When building the delay rebuild table, we only want to include // literals that have delayed variants. @@ -739,69 +818,39 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, } } - vector msk = lit.msk; // copy - vector cmp = lit.cmp; // copy - bool noruns = isNoRunsLiteral(build, id, info, max_len); + used_lit_ids.push_back(id); + } - size_t lit_hist_len = 0; + if (used_lit_ids.empty()) { + continue; // next fragment. + } + + // Build our fragment (for the HWLM matcher) from the first literal. + addFragmentLiteral(build, mp, f, used_lit_ids.front(), delay_rebuild, + max_len); + + for (u32 id : used_lit_ids) { + const rose_literal_id &lit = build.literals.at(id); + assert(id < build.literal_info.size()); + const auto &info = build.literal_info.at(id); + + // All literals contribute accel information. + addAccelLiteral(mp, lit, info, max_len); + + // All literals contribute to history requirement in streaming mode. if (build.cc.streaming) { - lit_hist_len = max(msk.size(), min(s.length(), max_len)); + size_t lit_hist_len = + max(lit.msk.size(), min(lit.s.length(), max_len)); lit_hist_len = lit_hist_len ? lit_hist_len - 1 : 0; + DEBUG_PRINTF("lit requires %zu bytes of history\n", + lit_hist_len); + assert(lit_hist_len <= build.cc.grey.maxHistoryAvailable); + mp.history_required = max(mp.history_required, lit_hist_len); } - DEBUG_PRINTF("lit requires %zu bytes of history\n", lit_hist_len); - assert(lit_hist_len <= build.cc.grey.maxHistoryAvailable); - - auto lit_final = s; // copy - - if (lit_final.length() > ROSE_SHORT_LITERAL_LEN_MAX) { - DEBUG_PRINTF("truncating to tail of length %zu\n", - size_t{ROSE_SHORT_LITERAL_LEN_MAX}); - lit_final.erase(0, lit_final.length() - - ROSE_SHORT_LITERAL_LEN_MAX); - // We shouldn't have set a threshold below 8 chars. - assert(msk.size() <= ROSE_SHORT_LITERAL_LEN_MAX); - assert(!noruns); - } - - addLiteralMask(lit, msk, cmp); - - const auto &s_final = lit_final.get_string(); - bool nocase = lit_final.any_nocase(); - - DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, " - "cmp=%s\n", f.fragment_id, - escapeString(s_final).c_str(), (int)nocase, noruns, - dumpMask(msk).c_str(), dumpMask(cmp).c_str()); - - if (!maskIsConsistent(s_final, nocase, msk, cmp)) { - DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); - continue; - } - - mp.accel_lits.emplace_back(s.get_string(), s.any_nocase(), msk, cmp, - info.group_mask); - mp.history_required = max(mp.history_required, lit_hist_len); - - u32 prog_offset = delay_rebuild ? f.delay_program_offset - : f.lit_program_offset; - const auto &groups = f.groups; - - mp.lits.emplace_back(move(s_final), nocase, noruns, prog_offset, - groups, msk, cmp); } } sort_and_unique(mp.lits); - - // Literals used for acceleration must be limited to max_len, as that's all - // we can see in history. - for_each(begin(mp.accel_lits), end(mp.accel_lits), - [&max_len](AccelString &a) { - trim_to_suffix(a.s, max_len); - trim_to_suffix(a.msk, max_len); - trim_to_suffix(a.cmp, max_len); - }); - sort_and_unique(mp.accel_lits); return mp; diff --git a/src/util/ue2string.cpp b/src/util/ue2string.cpp index 7c16aa58..bde975ad 100644 --- a/src/util/ue2string.cpp +++ b/src/util/ue2string.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -320,23 +320,6 @@ bool ue2_literal::any_nocase() const { return find(nocase.begin(), nocase.end(), true) != nocase.end(); } -bool mixed_sensitivity(const ue2_literal &s) { - bool cs = false; - bool nc = false; - for (ue2_literal::const_iterator it = s.begin(); it != s.end(); ++it) { - if (!ourisalpha(it->c)) { - continue; - } - if (it->nocase) { - nc = true; - } else { - cs = true; - } - } - - return cs && nc; -} - void make_nocase(ue2_literal *lit) { ue2_literal rv; diff --git a/src/util/ue2string.h b/src/util/ue2string.h index a90d47a3..d9fbadcd 100644 --- a/src/util/ue2string.h +++ b/src/util/ue2string.h @@ -35,6 +35,7 @@ #include "ue2common.h" #include "util/charreach.h" +#include "util/compare.h" #include "util/hash.h" #include @@ -226,9 +227,36 @@ size_t maxStringSelfOverlap(const ue2_literal &a); size_t minStringPeriod(const ue2_literal &a); size_t maxStringOverlap(const ue2_literal &a, const ue2_literal &b); -/** \brief True iff the literal cannot be considered entirely case-sensitive - * nor entirely case-insensitive */ -bool mixed_sensitivity(const ue2_literal &lit); +/** + * \brief True iff the range of a literal given cannot be considered entirely + * case-sensitive nor entirely case-insensitive. + */ +template +bool mixed_sensitivity_in(Iter begin, Iter end) { + bool cs = false; + bool nc = false; + for (auto it = begin; it != end; ++it) { + if (!ourisalpha(it->c)) { + continue; + } + if (it->nocase) { + nc = true; + } else { + cs = true; + } + } + + return cs && nc; +} + +/** + * \brief True iff the literal cannot be considered entirely case-sensitive + * nor entirely case-insensitive. + */ +inline +bool mixed_sensitivity(const ue2_literal &s) { + return mixed_sensitivity_in(s.begin(), s.end()); +} void make_nocase(ue2_literal *lit); From 84a09d35d6803390057937a22749089230b0be74 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 11 Apr 2017 17:05:40 +1000 Subject: [PATCH 022/190] teddy_compile: use faster small containers --- src/fdr/teddy_compile.cpp | 46 +++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/src/fdr/teddy_compile.cpp b/src/fdr/teddy_compile.cpp index 5a21927a..571817bb 100644 --- a/src/fdr/teddy_compile.cpp +++ b/src/fdr/teddy_compile.cpp @@ -44,6 +44,7 @@ #include "ue2common.h" #include "util/alloc.h" #include "util/compare.h" +#include "util/container.h" #include "util/noncopyable.h" #include "util/popcount.h" #include "util/target_info.h" @@ -87,20 +88,39 @@ public: }; class TeddySet { + /** \brief Max number of Teddy masks we use. */ + static constexpr size_t MAX_NUM_MASKS = 4; + + /** + * \brief Estimate of the max number of literals in a set, used to + * minimise allocations. + */ + static constexpr size_t LITS_PER_SET = 20; + + /** \brief Number of masks. */ u32 len; - // nibbleSets is a series of bitfields over 16 predicates - // that represent the whether shufti nibble set - // so for num_masks = 4 we will represent our strings by - // 8 u16s in the vector that indicate what a shufti bucket - // would have to look like - vector nibbleSets; - set litIds; + + /** + * \brief A series of bitfields over 16 predicates that represent the + * shufti nibble set. + * + * So for num_masks = 4 we will represent our strings by 8 u16s in the + * vector that indicate what a shufti bucket would have to look like. + */ + small_vector nibbleSets; + + /** + * \brief Sorted, unique set of literals. We maintain our own set in a + * sorted vector to minimise allocations. + */ + small_vector litIds; + public: explicit TeddySet(u32 len_in) : len(len_in), nibbleSets(len_in * 2, 0) {} - const set & getLits() const { return litIds; } size_t litCount() const { return litIds.size(); } + const small_vector &getLits() const { return litIds; } - bool operator<(const TeddySet & s) const { + bool operator<(const TeddySet &s) const { return litIds < s.litIds; } @@ -120,7 +140,7 @@ public: } #endif - bool identicalTail(const TeddySet & ts) const { + bool identicalTail(const TeddySet &ts) const { return nibbleSets == ts.nibbleSets; } @@ -141,14 +161,16 @@ public: nibbleSets[i*2] = nibbleSets[i*2+1] = 0xffff; } } - litIds.insert(lit_id); + litIds.push_back(lit_id); + sort_and_unique(litIds); } void merge(const TeddySet &ts) { for (u32 i = 0; i < nibbleSets.size(); i++) { nibbleSets[i] |= ts.nibbleSets[i]; } - litIds.insert(ts.litIds.begin(), ts.litIds.end()); + litIds.insert(litIds.end(), ts.litIds.begin(), ts.litIds.end()); + sort_and_unique(litIds); } // return a value p from 0 .. MAXINT64 that gives p/MAXINT64 From cc4a5cc36f4785c243ba8c033f5393510449c624 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 22 Jun 2017 15:21:22 +1000 Subject: [PATCH 023/190] teddy_compile: style fixes, whitespace --- src/fdr/teddy_compile.cpp | 96 +++++++++++++++++++++------------------ 1 file changed, 52 insertions(+), 44 deletions(-) diff --git a/src/fdr/teddy_compile.cpp b/src/fdr/teddy_compile.cpp index 571817bb..663d0483 100644 --- a/src/fdr/teddy_compile.cpp +++ b/src/fdr/teddy_compile.cpp @@ -70,6 +70,9 @@ namespace { //#define TEDDY_DEBUG +/** \brief Max number of Teddy masks we use. */ +static constexpr size_t MAX_NUM_MASKS = 4; + class TeddyCompiler : noncopyable { const TeddyEngineDescription ŋ const Grey &grey; @@ -84,13 +87,10 @@ public: } bytecode_ptr build(); - bool pack(map > &bucketToLits); + bool pack(map> &bucketToLits); }; class TeddySet { - /** \brief Max number of Teddy masks we use. */ - static constexpr size_t MAX_NUM_MASKS = 4; - /** * \brief Estimate of the max number of literals in a set, used to * minimise allocations. @@ -136,7 +136,7 @@ public: printf("%u ", id); } printf("\n"); - printf("Flood prone : %s\n", isRunProne()?"yes":"no"); + printf("Flood prone : %s\n", isRunProne() ? "yes" : "no"); } #endif @@ -151,28 +151,21 @@ public: u8 c = s[s.size() - i - 1]; u8 c_hi = (c >> 4) & 0xf; u8 c_lo = c & 0xf; - nibbleSets[i*2] = 1 << c_lo; + nibbleSets[i * 2] = 1 << c_lo; if (lit.nocase && ourisalpha(c)) { - nibbleSets[i*2+1] = (1 << (c_hi&0xd)) | (1 << (c_hi|0x2)); + nibbleSets[i * 2 + 1] = + (1 << (c_hi & 0xd)) | (1 << (c_hi | 0x2)); } else { - nibbleSets[i*2+1] = 1 << c_hi; + nibbleSets[i * 2 + 1] = 1 << c_hi; } } else { - nibbleSets[i*2] = nibbleSets[i*2+1] = 0xffff; + nibbleSets[i * 2] = nibbleSets[i * 2 + 1] = 0xffff; } } litIds.push_back(lit_id); sort_and_unique(litIds); } - void merge(const TeddySet &ts) { - for (u32 i = 0; i < nibbleSets.size(); i++) { - nibbleSets[i] |= ts.nibbleSets[i]; - } - litIds.insert(litIds.end(), ts.litIds.begin(), ts.litIds.end()); - sort_and_unique(litIds); - } - // return a value p from 0 .. MAXINT64 that gives p/MAXINT64 // likelihood of this TeddySet firing a first-stage accept // if it was given a bucket of its own and random data were @@ -189,15 +182,15 @@ public: // a small fixed cost + the cost of traversing some sort of followup // (assumption is that the followup is linear) u64a heuristic() const { - return probability() * (2+litCount()); + return probability() * (2 + litCount()); } bool isRunProne() const { u16 lo_and = 0xffff; u16 hi_and = 0xffff; for (u32 i = 0; i < len; i++) { - lo_and &= nibbleSets[i*2]; - hi_and &= nibbleSets[i*2+1]; + lo_and &= nibbleSets[i * 2]; + hi_and &= nibbleSets[i * 2 + 1]; } // we're not flood-prone if there's no way to get // through with a flood @@ -206,10 +199,25 @@ public: } return true; } + + friend TeddySet merge(const TeddySet &a, const TeddySet &b) { + assert(a.nibbleSets.size() == b.nibbleSets.size()); + + TeddySet m(a); + + for (size_t i = 0; i < m.nibbleSets.size(); i++) { + m.nibbleSets[i] |= b.nibbleSets[i]; + } + + m.litIds.insert(m.litIds.end(), b.litIds.begin(), b.litIds.end()); + sort_and_unique(m.litIds); + + return m; + } }; bool TeddyCompiler::pack(map > &bucketToLits) { + std::vector> &bucketToLits) { set sts; for (u32 i = 0; i < lits.size(); i++) { @@ -222,7 +230,8 @@ bool TeddyCompiler::pack(map TeddyCompiler::build() { + assert(eng.numMasks <= MAX_NUM_MASKS); + if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) { DEBUG_PRINTF("too many literals: %zu\n", lits.size()); return nullptr; @@ -315,14 +322,14 @@ bytecode_ptr TeddyCompiler::build() { printf("lit %zu (len = %zu, %s) is ", i, lits[i].s.size(), lits[i].nocase ? "caseless" : "caseful"); for (size_t j = 0; j < lits[i].s.size(); j++) { - printf("%02x", ((u32)lits[i].s[j])&0xff); + printf("%02x", ((u32)lits[i].s[j]) & 0xff); } printf("\n"); } #endif - map > bucketToLits; - if(eng.needConfirm(lits)) { + map> bucketToLits; + if (eng.needConfirm(lits)) { if (!pack(bucketToLits)) { DEBUG_PRINTF("more lits (%zu) than buckets (%u), can't pack.\n", lits.size(), eng.getNumBuckets()); @@ -383,15 +390,17 @@ bytecode_ptr TeddyCompiler::build() { // fill in masks for (u32 j = 0; j < eng.numMasks; j++) { - u32 msk_id_lo = j * 2 * maskWidth + (bucket_id / 8); - u32 msk_id_hi = (j * 2 + 1) * maskWidth + (bucket_id / 8); + const u32 msk_id_lo = j * 2 * maskWidth + (bucket_id / 8); + const u32 msk_id_hi = (j * 2 + 1) * maskWidth + (bucket_id / 8); + const u32 lo_base = msk_id_lo * 16; + const u32 hi_base = msk_id_hi * 16; // if we don't have a char at this position, fill in i // locations in these masks with '1' if (j >= sz) { for (u32 n = 0; n < 16; n++) { - baseMsk[msk_id_lo * 16 + n] |= bmsk; - baseMsk[msk_id_hi * 16 + n] |= bmsk; + baseMsk[lo_base + n] |= bmsk; + baseMsk[hi_base + n] |= bmsk; } } else { u8 c = l.s[sz - 1 - j]; @@ -410,29 +419,28 @@ bytecode_ptr TeddyCompiler::build() { for (u8 cm = 0; cm < 0x10; cm++) { if ((cm & m_lo) == (cmp_lo & m_lo)) { - baseMsk[msk_id_lo * 16 + cm] |= bmsk; + baseMsk[lo_base + cm] |= bmsk; } if ((cm & m_hi) == (cmp_hi & m_hi)) { - baseMsk[msk_id_hi * 16 + cm] |= bmsk; + baseMsk[hi_base + cm] |= bmsk; } } - } else{ + } else { if (l.nocase && ourisalpha(c)) { u32 cmHalfClear = (0xdf >> hiShift) & 0xf; - u32 cmHalfSet = (0x20 >> hiShift) & 0xf; - baseMsk[msk_id_hi * 16 + (n_hi & cmHalfClear)] |= bmsk; - baseMsk[msk_id_hi * 16 + (n_hi | cmHalfSet )] |= bmsk; + u32 cmHalfSet = (0x20 >> hiShift) & 0xf; + baseMsk[hi_base + (n_hi & cmHalfClear)] |= bmsk; + baseMsk[hi_base + (n_hi | cmHalfSet)] |= bmsk; } else { - baseMsk[msk_id_hi * 16 + n_hi] |= bmsk; + baseMsk[hi_base + n_hi] |= bmsk; } - baseMsk[msk_id_lo * 16 + n_lo] |= bmsk; + baseMsk[lo_base + n_lo] |= bmsk; } } } } } - #ifdef TEDDY_DEBUG for (u32 i = 0; i < eng.numMasks * 2; i++) { for (u32 j = 0; j < 16; j++) { From 2dc1f9d62973a79cbf21f719925876e15bda4306 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 23 Jun 2017 11:30:24 +1000 Subject: [PATCH 024/190] rose_build_matchers: fix fragment noruns calc --- src/rose/rose_build_matchers.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index 836ac965..57269747 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -556,6 +556,17 @@ bool isNoRunsLiteral(const RoseBuildImpl &build, const u32 id, return true; } +static +bool isNoRunsFragment(const RoseBuildImpl &build, const LitFragment &f, + const size_t max_len) { + // For the fragment to be marked "no runs", every literal it fires must + // need no further confirmation work. + return all_of_in(f.lit_ids, [&](u32 lit_id) { + const auto &info = build.literal_info.at(lit_id); + return isNoRunsLiteral(build, lit_id, info, max_len); + }); +} + static const raw_puff &getChainedPuff(const RoseBuildImpl &build, const Report &report) { @@ -691,15 +702,15 @@ void addFragmentLiteral(const RoseBuildImpl &build, MatcherProto &mp, const LitFragment &f, u32 id, bool delay_rebuild, size_t max_len) { const rose_literal_id &lit = build.literals.at(id); - assert(id < build.literal_info.size()); - const auto &info = build.literal_info.at(id); DEBUG_PRINTF("lit='%s' (len %zu)\n", dumpString(lit.s).c_str(), lit.s.length()); vector msk = lit.msk; // copy vector cmp = lit.cmp; // copy - bool noruns = isNoRunsLiteral(build, id, info, max_len); + + bool noruns = isNoRunsFragment(build, f, max_len); + DEBUG_PRINTF("fragment is %s\n", noruns ? "noruns" : "not noruns"); auto lit_final = lit.s; // copy From 3d4d39b8a98acdedae2e721ae35ce90a19e0e976 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 22 Jun 2017 16:51:35 +1000 Subject: [PATCH 025/190] clear_deeper_reports: clear dfa if dead If we remove all the reports from the DFA because they're too deep, then clear the DFA without depending on Hopcroft minimisation. --- src/nfa/mcclellancompile_util.cpp | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/nfa/mcclellancompile_util.cpp b/src/nfa/mcclellancompile_util.cpp index 17e022fe..8bb258d3 100644 --- a/src/nfa/mcclellancompile_util.cpp +++ b/src/nfa/mcclellancompile_util.cpp @@ -187,7 +187,22 @@ bool clear_deeper_reports(raw_dfa &raw, u32 max_offset) { } } - return changed; + if (!changed) { + return false; + } + + // We may have cleared all reports from the DFA, in which case it should + // become empty. + if (all_of_in(raw.states, [](const dstate &ds) { + return ds.reports.empty() && ds.reports_eod.empty(); + })) { + DEBUG_PRINTF("no reports left at all, dfa is dead\n"); + raw.states.clear(); + raw.start_anchored = DEAD_STATE; + raw.start_floating = DEAD_STATE; + } + + return true; } set all_reports(const raw_dfa &rdfa) { From 75e4aefabec32a85596dcebc67287c231f3ec23d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 23 Jun 2017 13:01:47 +1000 Subject: [PATCH 026/190] dfa: don't clear states, just mark dfa dead The previous change caused some assertion issues. --- src/nfa/dfa_min.cpp | 5 +++++ src/nfa/mcclellancompile.cpp | 2 ++ src/nfa/mcclellancompile_util.cpp | 6 +++++- src/nfa/mcclellancompile_util.h | 7 +++++++ 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/nfa/dfa_min.cpp b/src/nfa/dfa_min.cpp index f309cc53..c97ca5fb 100644 --- a/src/nfa/dfa_min.cpp +++ b/src/nfa/dfa_min.cpp @@ -59,6 +59,7 @@ #include "dfa_min.h" #include "grey.h" +#include "mcclellancompile_util.h" #include "rdfa.h" #include "ue2common.h" #include "util/container.h" @@ -299,6 +300,10 @@ void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey) { return; } + if (is_dead(rdfa)) { + DEBUG_PRINTF("dfa is empty\n"); + } + UNUSED const size_t states_before = rdfa.states.size(); HopcroftInfo info(rdfa); diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index e875477b..43b555af 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -964,6 +964,8 @@ bytecode_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, const CompileContext &cc, bool trust_daddy_states, set *accel_states) { + assert(!is_dead(raw)); + u16 total_daddy = 0; dfa_info info(strat); bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256; diff --git a/src/nfa/mcclellancompile_util.cpp b/src/nfa/mcclellancompile_util.cpp index 8bb258d3..317c5889 100644 --- a/src/nfa/mcclellancompile_util.cpp +++ b/src/nfa/mcclellancompile_util.cpp @@ -197,7 +197,6 @@ bool clear_deeper_reports(raw_dfa &raw, u32 max_offset) { return ds.reports.empty() && ds.reports_eod.empty(); })) { DEBUG_PRINTF("no reports left at all, dfa is dead\n"); - raw.states.clear(); raw.start_anchored = DEAD_STATE; raw.start_floating = DEAD_STATE; } @@ -287,4 +286,9 @@ bool can_die_early(const raw_dfa &raw, u32 age_limit) { return can_die_early(raw, raw.start_anchored, visited, age_limit); } +bool is_dead(const raw_dfa &rdfa) { + return rdfa.start_anchored == DEAD_STATE && + rdfa.start_floating == DEAD_STATE; +} + } // namespace ue2 diff --git a/src/nfa/mcclellancompile_util.h b/src/nfa/mcclellancompile_util.h index d681e06b..bc730cdd 100644 --- a/src/nfa/mcclellancompile_util.h +++ b/src/nfa/mcclellancompile_util.h @@ -59,6 +59,13 @@ size_t hash_dfa(const raw_dfa &rdfa); bool can_die_early(const raw_dfa &raw, u32 age_limit); +/** + * \brief Returns true if this DFA cannot match, i.e. its start state is + * DEAD_STATE. + */ +bool is_dead(const raw_dfa &rdfa); + + } // namespace ue2 #endif From f6adc4f46437e41200abe2b869f24242237abb4a Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 30 May 2017 17:30:56 +1000 Subject: [PATCH 027/190] ng_calc_components: skip if shell has 1 path only --- src/nfagraph/ng_calc_components.cpp | 41 +++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/nfagraph/ng_calc_components.cpp b/src/nfagraph/ng_calc_components.cpp index bfe73eb2..e1682b65 100644 --- a/src/nfagraph/ng_calc_components.cpp +++ b/src/nfagraph/ng_calc_components.cpp @@ -220,6 +220,38 @@ vector findShellEdges(const NGHolder &g, return shell_edges; } +/** + * True if all edges out of vertices in the head shell lead to at most a single + * outside vertex. + */ +static +bool shellHasOnePath(const NGHolder &g, + const flat_set &head_shell) { + if (head_shell.empty()) { + DEBUG_PRINTF("no head shell\n"); + return false; + } + + NFAVertex succ = NGHolder::null_vertex(); + for (auto u : head_shell) { + for (auto v : adjacent_vertices_range(u, g)) { + if (contains(head_shell, v)) { + continue; + } + if (!succ) { + succ = v; + continue; + } + if (succ == v) { + continue; + } + return false; + } + } + DEBUG_PRINTF("head shell has only one path through it\n"); + return true; +} + /** * Common code called by calc- and recalc- below. Splits the given holder into * one or more connected components, adding them to the comps deque. @@ -250,11 +282,20 @@ void splitIntoComponents(unique_ptr g, return; } + // Find edges connecting the head and tail shells directly. vector shell_edges = findShellEdges(*g, head_shell, tail_shell); DEBUG_PRINTF("%zu vertices in head, %zu in tail, %zu shell edges\n", head_shell.size(), tail_shell.size(), shell_edges.size()); + // If there's only one way out of the head shell and no shell edges, we + // aren't going to find more than one component. + if (shell_edges.empty() && shellHasOnePath(*g, head_shell)) { + DEBUG_PRINTF("single component\n"); + comps.push_back(std::move(g)); + return; + } + ue2::unordered_map old2new; auto ug = createUnGraph(*g, true, true, old2new); From 63973175ed34221491603a44156f89c726550c8d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 7 Jun 2017 14:15:10 +1000 Subject: [PATCH 028/190] ng_calc_components: check tail shell too --- src/nfagraph/ng_calc_components.cpp | 54 ++++++++++++++++++----------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/src/nfagraph/ng_calc_components.cpp b/src/nfagraph/ng_calc_components.cpp index e1682b65..7ac57dab 100644 --- a/src/nfagraph/ng_calc_components.cpp +++ b/src/nfagraph/ng_calc_components.cpp @@ -220,38 +220,52 @@ vector findShellEdges(const NGHolder &g, return shell_edges; } -/** - * True if all edges out of vertices in the head shell lead to at most a single - * outside vertex. - */ -static -bool shellHasOnePath(const NGHolder &g, - const flat_set &head_shell) { - if (head_shell.empty()) { - DEBUG_PRINTF("no head shell\n"); +template +bool shellHasOnePath(const NGHolder &g, const flat_set &shell, + GetAdjRange adj_range_func) { + if (shell.empty()) { + DEBUG_PRINTF("no shell\n"); return false; } - NFAVertex succ = NGHolder::null_vertex(); - for (auto u : head_shell) { - for (auto v : adjacent_vertices_range(u, g)) { - if (contains(head_shell, v)) { + NFAVertex exit_vertex = NGHolder::null_vertex(); + for (auto u : shell) { + for (auto v : adj_range_func(u, g)) { + if (contains(shell, v)) { continue; } - if (!succ) { - succ = v; + if (!exit_vertex) { + exit_vertex = v; continue; } - if (succ == v) { + if (exit_vertex == v) { continue; } return false; } } - DEBUG_PRINTF("head shell has only one path through it\n"); + return true; } +/** + * True if all edges out of vertices in the head shell lead to at most a single + * outside vertex, or the inverse for the tail shell. + */ +static +bool shellHasOnePath(const NGHolder &g, const flat_set &head_shell, + const flat_set &tail_shell) { + if (shellHasOnePath(g, head_shell, adjacent_vertices_range)) { + DEBUG_PRINTF("head shell has only one path through it\n"); + return true; + } + if (shellHasOnePath(g, tail_shell, inv_adjacent_vertices_range)) { + DEBUG_PRINTF("tail shell has only one path into it\n"); + return true; + } + return false; +} + /** * Common code called by calc- and recalc- below. Splits the given holder into * one or more connected components, adding them to the comps deque. @@ -288,9 +302,9 @@ void splitIntoComponents(unique_ptr g, DEBUG_PRINTF("%zu vertices in head, %zu in tail, %zu shell edges\n", head_shell.size(), tail_shell.size(), shell_edges.size()); - // If there's only one way out of the head shell and no shell edges, we - // aren't going to find more than one component. - if (shell_edges.empty() && shellHasOnePath(*g, head_shell)) { + // If there are no shell edges and only one path out of the head shell or + // into the tail shell, we aren't going to find more than one component. + if (shell_edges.empty() && shellHasOnePath(*g, head_shell, tail_shell)) { DEBUG_PRINTF("single component\n"); comps.push_back(std::move(g)); return; From 5a7d5958d17f951989bb0ad4cba715b96b8abe3c Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 30 May 2017 17:09:03 +1000 Subject: [PATCH 029/190] ng_violet: skip analysis on graphs with wide reach --- src/nfagraph/ng_violet.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index 4195045c..c9460b93 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -2952,6 +2952,15 @@ RoseInGraph doInitialVioletTransform(const NGHolder &h, bool last_chance, return vg; } + /* Avoid running the Violet analysis at all on graphs with no vertices with + * small reach, since we will not be able to extract any literals. */ + if (all_of_in(vertices_range(h), [&](NFAVertex v) { + return is_special(v, h) || h[v].char_reach.count() >= 200; + })) { + DEBUG_PRINTF("fail, no vertices with small reach\n"); + return vg; + } + DEBUG_PRINTF("hello world\n"); /* Step 1: avoid outfixes as we always have to run them. */ From 5837f68b9a48959bb3841fe27e4224db03be6f0f Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 31 May 2017 10:27:24 +1000 Subject: [PATCH 030/190] ng_literal_decorated: pre-check for narrow reach --- src/nfagraph/ng_literal_decorated.cpp | 5 +++++ src/nfagraph/ng_util.cpp | 6 ++++++ src/nfagraph/ng_util.h | 6 ++++++ src/nfagraph/ng_violet.cpp | 4 +--- 4 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/nfagraph/ng_literal_decorated.cpp b/src/nfagraph/ng_literal_decorated.cpp index 89c01a6c..3ba810f9 100644 --- a/src/nfagraph/ng_literal_decorated.cpp +++ b/src/nfagraph/ng_literal_decorated.cpp @@ -210,6 +210,11 @@ bool handleDecoratedLiterals(RoseBuild &rose, const NGHolder &g, return false; } + if (!hasNarrowReachVertex(g)) { + DEBUG_PRINTF("no narrow reach vertices\n"); + return false; + } + if (hasLargeDegreeVertex(g)) { DEBUG_PRINTF("large degree\n"); return false; diff --git a/src/nfagraph/ng_util.cpp b/src/nfagraph/ng_util.cpp index 0776fa04..c0ad6199 100644 --- a/src/nfagraph/ng_util.cpp +++ b/src/nfagraph/ng_util.cpp @@ -257,6 +257,12 @@ bool hasBigCycles(const NGHolder &g) { return false; } +bool hasNarrowReachVertex(const NGHolder &g, size_t max_reach_count) { + return any_of_in(vertices_range(g), [&](NFAVertex v) { + return !is_special(v, g) && g[v].char_reach.count() < max_reach_count; + }); +} + bool can_never_match(const NGHolder &g) { assert(edge(g.accept, g.acceptEod, g).second); if (in_degree(g.accept, g) == 0 && in_degree(g.acceptEod, g) == 1) { diff --git a/src/nfagraph/ng_util.h b/src/nfagraph/ng_util.h index 1d3a6f32..4c529a83 100644 --- a/src/nfagraph/ng_util.h +++ b/src/nfagraph/ng_util.h @@ -233,6 +233,12 @@ bool hasReachableCycle(const NGHolder &g, NFAVertex src); /** True if g has any cycles which are not self-loops. */ bool hasBigCycles(const NGHolder &g); +/** + * \brief True if g has at least one non-special vertex with reach smaller than + * max_reach_count. The default of 200 is pretty conservative. + */ +bool hasNarrowReachVertex(const NGHolder &g, size_t max_reach_count = 200); + /** Returns the set of all vertices that appear in any of the graph's cycles. */ std::set findVerticesInCycles(const NGHolder &g); diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index c9460b93..2e1171ab 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -2954,9 +2954,7 @@ RoseInGraph doInitialVioletTransform(const NGHolder &h, bool last_chance, /* Avoid running the Violet analysis at all on graphs with no vertices with * small reach, since we will not be able to extract any literals. */ - if (all_of_in(vertices_range(h), [&](NFAVertex v) { - return is_special(v, h) || h[v].char_reach.count() >= 200; - })) { + if (!hasNarrowReachVertex(h)) { DEBUG_PRINTF("fail, no vertices with small reach\n"); return vg; } From e27e76a595557b28f782a83bc8c7189411b1cc93 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 5 Jun 2017 15:39:08 +1000 Subject: [PATCH 031/190] ng_literal_decorated: remove unused header --- src/nfagraph/ng_literal_decorated.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/nfagraph/ng_literal_decorated.cpp b/src/nfagraph/ng_literal_decorated.cpp index 3ba810f9..1a8cafac 100644 --- a/src/nfagraph/ng_literal_decorated.cpp +++ b/src/nfagraph/ng_literal_decorated.cpp @@ -45,8 +45,6 @@ #include #include -#include - using namespace std; namespace ue2 { From d0e4a703ed27752193ef76fd8131adc8f743fe72 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 2 Jun 2017 11:26:59 +1000 Subject: [PATCH 032/190] ng_literal_analysis: reserve space for fwd_edges --- src/nfagraph/ng_literal_analysis.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/nfagraph/ng_literal_analysis.cpp b/src/nfagraph/ng_literal_analysis.cpp index a6664b07..19660580 100644 --- a/src/nfagraph/ng_literal_analysis.cpp +++ b/src/nfagraph/ng_literal_analysis.cpp @@ -488,12 +488,14 @@ const char *describeColor(boost::default_color_type c) { */ static vector add_reverse_edges_and_index(LitGraph &lg) { + const size_t edge_count = num_edges(lg); vector fwd_edges; + fwd_edges.reserve(edge_count); for (const auto &e : edges_range(lg)) { fwd_edges.push_back(e); } - vector rev_map(2 * num_edges(lg)); + vector rev_map(2 * edge_count); for (const auto &e : fwd_edges) { LitVertex u = source(e, lg); From 92c28d28c142da31d79df9fc6af7b9bc9ef9a052 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 5 Jun 2017 16:53:55 +1000 Subject: [PATCH 033/190] ng_mcclellan: use flat_set in triggerAllowed() --- src/nfagraph/ng_mcclellan.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/nfagraph/ng_mcclellan.cpp b/src/nfagraph/ng_mcclellan.cpp index 6ada273c..7bb8335c 100644 --- a/src/nfagraph/ng_mcclellan.cpp +++ b/src/nfagraph/ng_mcclellan.cpp @@ -283,10 +283,8 @@ static bool triggerAllowed(const NGHolder &g, const NFAVertex v, const vector > &all_triggers, const vector &trigger) { - set curr; - set next; - - curr.insert(v); + flat_set curr({v}); + flat_set next; for (auto it = trigger.rbegin(); it != trigger.rend(); ++it) { next.clear(); From fe2654b086fbd65404d02ad88f8e525d6bcf624c Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 20 Jun 2017 13:49:46 +1000 Subject: [PATCH 034/190] ng_small_literal_set: pre-check for narrow reach --- src/nfagraph/ng_small_literal_set.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/nfagraph/ng_small_literal_set.cpp b/src/nfagraph/ng_small_literal_set.cpp index 1d7be65b..fb191efa 100644 --- a/src/nfagraph/ng_small_literal_set.cpp +++ b/src/nfagraph/ng_small_literal_set.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,8 +33,8 @@ #include "ng_small_literal_set.h" #include "grey.h" -#include "ng_util.h" #include "ng_holder.h" +#include "ng_util.h" #include "rose/rose_build.h" #include "util/compare.h" #include "util/compile_context.h" @@ -222,6 +222,11 @@ bool handleSmallLiteralSets(RoseBuild &rose, const NGHolder &g, return false; } + if (!hasNarrowReachVertex(g, MAX_LITERAL_SET_SIZE * 2 + 1)) { + DEBUG_PRINTF("vertex with wide reach found\n"); + return false; + } + DEBUG_PRINTF("looking for literals\n"); map> literals; From 7ec757c7cef6f2f7f65ac6da210fb36e37cdfb4a Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 1 Jun 2017 14:40:04 +1000 Subject: [PATCH 035/190] ng_cyclic_redundancy: persist colour map --- src/nfagraph/ng_cyclic_redundancy.cpp | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/nfagraph/ng_cyclic_redundancy.cpp b/src/nfagraph/ng_cyclic_redundancy.cpp index 9ae4458c..e4138a4f 100644 --- a/src/nfagraph/ng_cyclic_redundancy.cpp +++ b/src/nfagraph/ng_cyclic_redundancy.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -65,6 +65,7 @@ #include "util/graph_range.h" #include "util/ue2_containers.h" +#include #include #include @@ -126,14 +127,16 @@ class SearchVisitor : public boost::default_dfs_visitor { template static bool searchForward(const Graph &g, const CharReach &reach, + vector &colours, const flat_set &s, typename Graph::vertex_descriptor w) { - map colours; + fill(colours.begin(), colours.end(), boost::white_color); + auto colour_map = + make_iterator_property_map(colours.begin(), get(vertex_index, g)); try { - depth_first_visit(g, w, SearchVisitor(reach), - make_assoc_property_map(colours), - VertexInSet(s)); - } catch (SearchFailed&) { + depth_first_visit(g, w, SearchVisitor(reach), colour_map, + VertexInSet(s)); + } catch (SearchFailed &) { return false; } @@ -162,6 +165,9 @@ bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v, typedef typename Graph::vertex_descriptor vertex_descriptor; + // Colour map used for depth_first_visit(). + vector colours(num_vertices(g)); + // precalc successors of v. flat_set succ_v; insert(&succ_v, adjacent_vertices(v, g)); @@ -200,7 +206,7 @@ bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v, DEBUG_PRINTF(" - checking w %zu\n", g[w].index); - if (!searchForward(g, reach, s, w)) { + if (!searchForward(g, reach, colours, s, w)) { continue; } @@ -234,6 +240,8 @@ bool cyclicPathRedundancyPass(Graph &g, NGHolder &raw) { } bool removeCyclicPathRedundancy(NGHolder &g) { + assert(hasCorrectlyNumberedVertices(g)); + // Forward pass. bool f_changed = cyclicPathRedundancyPass(g, g); if (f_changed) { From 95e3fd3f32a63e2eecdf05bf1033db4dfc00ad20 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 31 May 2017 16:07:26 +1000 Subject: [PATCH 036/190] ng_misc_opt: remove dead code --- src/nfagraph/ng_misc_opt.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/nfagraph/ng_misc_opt.cpp b/src/nfagraph/ng_misc_opt.cpp index 29939fec..dde5eb95 100644 --- a/src/nfagraph/ng_misc_opt.cpp +++ b/src/nfagraph/ng_misc_opt.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -554,9 +554,6 @@ bool mergeCyclicDotStars(NGHolder &g) { */ static flat_set findDependentVertices(const NGHolder &g, NFAVertex v) { - auto v_pred = preds(v, g); - flat_set may_be_on; - /* We need to exclude any vertex that may be reached on a path which is * incompatible with the vertex v being on. */ From b09e3acd0430859db65ebf6e93f0f4abf7e3e989 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 31 May 2017 16:11:52 +1000 Subject: [PATCH 037/190] ng_misc_opt: improve performance in large cases --- src/nfagraph/ng_misc_opt.cpp | 67 +++++++++++++++++++++++------------- src/util/graph.h | 25 +++++++++++++- 2 files changed, 67 insertions(+), 25 deletions(-) diff --git a/src/nfagraph/ng_misc_opt.cpp b/src/nfagraph/ng_misc_opt.cpp index dde5eb95..c55a02e6 100644 --- a/src/nfagraph/ng_misc_opt.cpp +++ b/src/nfagraph/ng_misc_opt.cpp @@ -72,6 +72,7 @@ #include "util/ue2_containers.h" #include "ue2common.h" +#include #include #include @@ -549,11 +550,28 @@ bool mergeCyclicDotStars(NGHolder &g) { return true; } +struct PrunePathsInfo { + explicit PrunePathsInfo(const NGHolder &g) + : color_map(num_vertices(g)), bad(num_vertices(g)) {} + + void clear() { + no_explore.clear(); + fill(color_map.begin(), color_map.end(), boost::white_color); + bad.reset(); + } + + flat_set no_explore; + vector color_map; + boost::dynamic_bitset<> bad; +}; + /** - * Returns the set of vertices that cannot be on if v is not on. + * Finds the set of vertices that cannot be on if v is not on, setting their + * indices in bitset PrunePathsInfo::bad. */ static -flat_set findDependentVertices(const NGHolder &g, NFAVertex v) { +void findDependentVertices(const NGHolder &g, PrunePathsInfo &info, + NFAVertex v) { /* We need to exclude any vertex that may be reached on a path which is * incompatible with the vertex v being on. */ @@ -567,38 +585,31 @@ flat_set findDependentVertices(const NGHolder &g, NFAVertex v) { * check down edges. Alternately can just filter these edges out of the * graph first. */ - flat_set no_explore; for (NFAVertex t : adjacent_vertices_range(v, g)) { for (NFAEdge e : in_edges_range(t, g)) { NFAVertex s = source(e, g); if (edge(s, v, g).second) { - no_explore.insert(e); + info.no_explore.insert(e); } } } - auto filtered_g = make_filtered_graph(g, make_bad_edge_filter(&no_explore)); + auto filtered_g = + make_filtered_graph(g, make_bad_edge_filter(&info.no_explore)); - vector color_raw(num_vertices(g)); - auto color = make_iterator_property_map(color_raw.begin(), + auto color = make_iterator_property_map(info.color_map.begin(), get(vertex_index, g)); - flat_set bad; + + // We use a bitset to track bad vertices, rather than filling a (potentially + // very large) set structure. + auto recorder = make_vertex_index_bitset_recorder(info.bad); + for (NFAVertex b : vertices_range(g)) { if (b != g.start && g[b].char_reach.isSubsetOf(g[v].char_reach)) { continue; } - boost::depth_first_visit(filtered_g, b, make_vertex_recorder(bad), - color); + boost::depth_first_visit(filtered_g, b, recorder, color); } - - flat_set rv; - for (NFAVertex u : vertices_range(g)) { - if (!contains(bad, u)) { - DEBUG_PRINTF("%zu is good\n", g[u].index); - rv.insert(u); - } - } - return rv; } static @@ -614,14 +625,16 @@ bool sometimesEnabledConcurrently(NFAVertex main_cyclic, NFAVertex v, } static -bool pruneUsingSuccessors(NGHolder &g, NFAVertex u, som_type som) { +bool pruneUsingSuccessors(NGHolder &g, PrunePathsInfo &info, NFAVertex u, + som_type som) { if (som && (is_virtual_start(u, g) || u == g.startDs)) { return false; } bool changed = false; DEBUG_PRINTF("using cyclic %zu as base\n", g[u].index); - auto children = findDependentVertices(g, u); + info.clear(); + findDependentVertices(g, info, u); vector u_succs; for (NFAVertex v : adjacent_vertices_range(u, g)) { if (som && is_virtual_start(v, g)) { @@ -631,22 +644,25 @@ bool pruneUsingSuccessors(NGHolder &g, NFAVertex u, som_type som) { } u_succs.push_back(v); } + stable_sort(u_succs.begin(), u_succs.end(), [&](NFAVertex a, NFAVertex b) { return g[a].char_reach.count() > g[b].char_reach.count(); }); + + flat_set dead; + for (NFAVertex v : u_succs) { DEBUG_PRINTF(" using %zu as killer\n", g[v].index); /* Need to distinguish between vertices that are switched on after the * cyclic vs vertices that are switched on concurrently with the cyclic * if (subject to a suitable reach) */ bool v_peer_of_cyclic = willBeEnabledConcurrently(u, v, g); - set dead; for (NFAVertex s : adjacent_vertices_range(v, g)) { DEBUG_PRINTF(" looking at preds of %zu\n", g[s].index); for (NFAEdge e : in_edges_range(s, g)) { NFAVertex p = source(e, g); - if (!contains(children, p) || p == v || p == u + if (info.bad.test(g[p].index) || p == v || p == u || p == g.accept) { DEBUG_PRINTF("%zu not a cand\n", g[p].index); continue; @@ -684,6 +700,7 @@ bool pruneUsingSuccessors(NGHolder &g, NFAVertex u, som_type som) { } } remove_edges(dead, g); + dead.clear(); } DEBUG_PRINTF("changed %d\n", (int)changed); @@ -693,9 +710,11 @@ bool pruneUsingSuccessors(NGHolder &g, NFAVertex u, som_type som) { bool prunePathsRedundantWithSuccessorOfCyclics(NGHolder &g, som_type som) { /* TODO: the reverse form of this is also possible */ bool changed = false; + PrunePathsInfo info(g); + for (NFAVertex v : vertices_range(g)) { if (hasSelfLoop(v, g) && g[v].char_reach.all()) { - changed |= pruneUsingSuccessors(g, v, som); + changed |= pruneUsingSuccessors(g, info, v, som); } } diff --git a/src/util/graph.h b/src/util/graph.h index 4c2876f1..39e86487 100644 --- a/src/util/graph.h +++ b/src/util/graph.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -235,6 +235,29 @@ vertex_recorder make_vertex_recorder(Cont &o) { return vertex_recorder(o); } +/** + * \brief A vertex recorder visitor that sets the bits in the given bitset + * type (e.g. boost::dynamic_bitset) corresponding to the indices of the + * vertices encountered. + */ +template +class vertex_index_bitset_recorder : public boost::default_dfs_visitor { +public: + explicit vertex_index_bitset_recorder(Bitset &o) : out(o) {} + template + void discover_vertex(typename Graph::vertex_descriptor v, const Graph &g) { + assert(g[v].index < out.size()); + out.set(g[v].index); + } + Bitset &out; +}; + +template +vertex_index_bitset_recorder +make_vertex_index_bitset_recorder(Bitset &o) { + return vertex_index_bitset_recorder(o); +} + template std::pair add_edge_if_not_present(typename Graph::vertex_descriptor u, From dbd3f66e8771b4316dc025d89cb1d958dc3008c4 Mon Sep 17 00:00:00 2001 From: "Chang, Harry" Date: Sun, 22 Jan 2017 12:23:25 -0800 Subject: [PATCH 038/190] Reinforced Teddy with 1-byte approach, based on "shift-or" and AVX2. --- src/fdr/fdr_confirm.h | 6 +- src/fdr/fdr_confirm_compile.cpp | 50 +- src/fdr/fdr_confirm_runtime.h | 79 +-- src/fdr/teddy.c | 969 +++++++++++++-------------- src/fdr/teddy_avx2.c | 697 ++++++------------- src/fdr/teddy_compile.cpp | 243 +++++-- src/fdr/teddy_engine_description.cpp | 12 - src/fdr/teddy_engine_description.h | 1 - src/fdr/teddy_internal.h | 19 + src/fdr/teddy_runtime_common.h | 227 +++++-- 10 files changed, 1070 insertions(+), 1233 deletions(-) diff --git a/src/fdr/fdr_confirm.h b/src/fdr/fdr_confirm.h index e160b96d..d975747e 100644 --- a/src/fdr/fdr_confirm.h +++ b/src/fdr/fdr_confirm.h @@ -78,12 +78,8 @@ struct LitInfo { struct FDRConfirm { CONF_TYPE andmsk; CONF_TYPE mult; - u32 nBitsOrSoleID; // if flags is NO_CONFIRM then this is soleID - u32 flags; // sole meaning is 'non-zero means no-confirm' (that is all) + u32 nBits; hwlm_group_t groups; - u32 soleLitSize; - u32 soleLitCmp; - u32 soleLitMsk; }; static really_inline diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index 616ff86e..a6eee4cf 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -130,7 +130,7 @@ void fillLitInfo(const vector &lits, vector &tmpLitInfo, static bytecode_ptr getFDRConfirm(const vector &lits, - bool make_small, bool make_confirm) { + bool make_small) { // Every literal must fit within CONF_TYPE. assert(all_of_in(lits, [](const hwlmLiteral &lit) { return lit.s.size() <= sizeof(CONF_TYPE); @@ -153,42 +153,6 @@ bytecode_ptr getFDRConfirm(const vector &lits, } CONF_TYPE mult = (CONF_TYPE)0x0b4e0ef37bc32127ULL; - u32 flags = 0; - // we use next three variables for 'confirmless' case to speed-up - // confirmation process - u32 soleLitSize = 0; - u32 soleLitCmp = 0; - u32 soleLitMsk = 0; - - if (!make_confirm) { - flags = FDRC_FLAG_NO_CONFIRM; - if (lits[0].noruns) { - // messy - need to clean this up later as flags is sorta kinda - // obsoleted - flags |= FDRC_FLAG_NOREPEAT; - } - mult = 0; - soleLitSize = lits[0].s.size() - 1; - // we can get to this point only in confirmless case; - // it means that we have only one literal per FDRConfirm (no packing), - // with no literal mask and size of literal is less or equal - // to the number of masks of Teddy engine; - // maximum number of masks for Teddy is 4, so the size of - // literal is definitely less or equal to size of u32 - assert(lits[0].s.size() <= sizeof(u32)); - for (u32 i = 0; i < lits[0].s.size(); i++) { - u32 shiftLoc = (sizeof(u32) - i - 1) * 8; - u8 c = lits[0].s[lits[0].s.size() - i - 1]; - if (lits[0].nocase && ourisalpha(c)) { - soleLitCmp |= (u32)(c & CASE_CLEAR) << shiftLoc; - soleLitMsk |= (u32)CASE_CLEAR << shiftLoc; - } - else { - soleLitCmp |= (u32)c << shiftLoc; - soleLitMsk |= (u32)0xff << shiftLoc; - } - } - } // we can walk the vector and assign elements from the vectors to a // map by hash value @@ -276,11 +240,7 @@ bytecode_ptr getFDRConfirm(const vector &lits, fdrc->andmsk = andmsk; fdrc->mult = mult; - fdrc->nBitsOrSoleID = (flags & FDRC_FLAG_NO_CONFIRM) ? lits[0].id : nBits; - fdrc->flags = flags; - fdrc->soleLitSize = soleLitSize; - fdrc->soleLitCmp = soleLitCmp; - fdrc->soleLitMsk = soleLitMsk; + fdrc->nBits = nBits; fdrc->groups = gm; @@ -334,12 +294,8 @@ setupFullConfs(const vector &lits, const EngineDescription &eng, map> &bucketToLits, bool make_small) { - bool makeConfirm = true; unique_ptr teddyDescr = getTeddyDescription(eng.getID()); - if (teddyDescr) { - makeConfirm = teddyDescr->needConfirm(lits); - } BC2CONF bc2Conf; u32 totalConfirmSize = 0; @@ -351,7 +307,7 @@ setupFullConfs(const vector &lits, } DEBUG_PRINTF("b %d sz %zu\n", b, vl.size()); - auto fc = getFDRConfirm(vl, make_small, makeConfirm); + auto fc = getFDRConfirm(vl, make_small); totalConfirmSize += fc.size(); bc2Conf.emplace(b, move(fc)); } diff --git a/src/fdr/fdr_confirm_runtime.h b/src/fdr/fdr_confirm_runtime.h index ea644bfb..d75408f4 100644 --- a/src/fdr/fdr_confirm_runtime.h +++ b/src/fdr/fdr_confirm_runtime.h @@ -43,11 +43,12 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a size_t i, hwlmcb_rv_t *control, u32 *last_match, u64a conf_key) { assert(i < a->len); + assert(i >= a->start_offset); assert(ISALIGNED(fdrc)); const u8 * buf = a->buf; u32 c = CONF_HASH_CALL(conf_key, fdrc->andmsk, fdrc->mult, - fdrc->nBitsOrSoleID); + fdrc->nBits); u32 start = getConfirmLitIndex(fdrc)[c]; if (likely(!start)) { return; @@ -94,80 +95,4 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a } while (oldNext); } -// 'light-weight' confirmation function which is used by 1-mask Teddy; -// in the 'confirmless' case it simply calls callback function, -// otherwise it calls 'confWithBit' function for the full confirmation procedure -static really_inline -void confWithBit1(const struct FDRConfirm *fdrc, - const struct FDR_Runtime_Args *a, size_t i, - hwlmcb_rv_t *control, u32 *last_match, u64a conf_key) { - assert(i < a->len); - assert(ISALIGNED(fdrc)); - - if (unlikely(fdrc->mult)) { - confWithBit(fdrc, a, i, control, last_match, conf_key); - return; - } else { - u32 id = fdrc->nBitsOrSoleID; - - if ((*last_match == id) && (fdrc->flags & FDRC_FLAG_NOREPEAT)) { - return; - } - *last_match = id; - *control = a->cb(i, i, id, a->ctxt); - } -} - -// This is 'light-weight' confirmation function which is used by 2-3-4-mask Teddy -// In the 'confirmless' case it makes fast 32-bit comparison, -// otherwise it calls 'confWithBit' function for the full confirmation procedure -static really_inline -void confWithBitMany(const struct FDRConfirm *fdrc, - const struct FDR_Runtime_Args *a, size_t i, CautionReason r, - hwlmcb_rv_t *control, u32 *last_match, u64a conf_key) { - assert(i < a->len); - assert(ISALIGNED(fdrc)); - - if (i < a->start_offset) { - return; - } - - if (unlikely(fdrc->mult)) { - confWithBit(fdrc, a, i, control, last_match, conf_key); - return; - } else { - const u32 id = fdrc->nBitsOrSoleID; - const u32 len = fdrc->soleLitSize; - - if ((*last_match == id) && (fdrc->flags & FDRC_FLAG_NOREPEAT)) { - return; - } - - if (r == VECTORING && len > i - a->start_offset) { - if (len > i + a->len_history) { - return; - } - - u32 cmp = (u32)a->buf[i] << 24; - - if (len <= i) { - for (u32 j = 1; j <= len; j++) { - cmp |= (u32)a->buf[i - j] << (24 - (j * 8)); - } - } else { - for (u32 j = 1; j <= i; j++) { - cmp |= (u32)a->buf[i - j] << (24 - (j * 8)); - } - cmp |= (u32)(a->histBytes >> (40 + i * 8)); - } - - if ((fdrc->soleLitMsk & cmp) != fdrc->soleLitCmp) { - return; - } - } - *last_match = id; - *control = a->cb(i - len, i, id, a->ctxt); - } -} - #endif diff --git a/src/fdr/teddy.c b/src/fdr/teddy.c index 636c741b..da5096a0 100644 --- a/src/fdr/teddy.c +++ b/src/fdr/teddy.c @@ -38,90 +38,294 @@ #include "util/simd_utils.h" const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = { - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff} + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} }; +#if defined(__AVX2__) // reinforced teddy + #ifdef ARCH_64_BIT #define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn) \ do { \ - if (unlikely(isnonzero128(var))) { \ + if (unlikely(diff256(var, ones256()))) { \ + m128 lo = movdq_lo(var); \ + m128 hi = movdq_hi(var); \ + u64a part1 = movq(lo); \ + u64a part2 = movq(rshiftbyte_m128(lo, 8)); \ + u64a part3 = movq(hi); \ + u64a part4 = movq(rshiftbyte_m128(hi, 8)); \ + if (unlikely(part1 != ones_u64a)) { \ + part1 = ~part1; \ + conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \ + &control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + if (unlikely(part2 != ones_u64a)) { \ + part2 = ~part2; \ + conf_fn(&part2, bucket, offset + 8, confBase, reason, a, ptr, \ + &control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + if (unlikely(part3 != ones_u64a)) { \ + part3 = ~part3; \ + conf_fn(&part3, bucket, offset + 16, confBase, reason, a, ptr, \ + &control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + if (unlikely(part4 != ones_u64a)) { \ + part4 = ~part4; \ + conf_fn(&part4, bucket, offset + 24, confBase, reason, a, ptr, \ + &control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + } \ +} while(0) +#else +#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn) \ +do { \ + if (unlikely(diff256(var, ones256()))) { \ + m128 lo = movdq_lo(var); \ + m128 hi = movdq_hi(var); \ + u32 part1 = movd(lo); \ + u32 part2 = movd(rshiftbyte_m128(lo, 4)); \ + u32 part3 = movd(rshiftbyte_m128(lo, 8)); \ + u32 part4 = movd(rshiftbyte_m128(lo, 12)); \ + u32 part5 = movd(hi); \ + u32 part6 = movd(rshiftbyte_m128(hi, 4)); \ + u32 part7 = movd(rshiftbyte_m128(hi, 8)); \ + u32 part8 = movd(rshiftbyte_m128(hi, 12)); \ + if (unlikely(part1 != ones_u32)) { \ + part1 = ~part1; \ + conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \ + &control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + if (unlikely(part2 != ones_u32)) { \ + part2 = ~part2; \ + conf_fn(&part2, bucket, offset + 4, confBase, reason, a, ptr, \ + &control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + if (unlikely(part3 != ones_u32)) { \ + part3 = ~part3; \ + conf_fn(&part3, bucket, offset + 8, confBase, reason, a, ptr, \ + &control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + if (unlikely(part4 != ones_u32)) { \ + part4 = ~part4; \ + conf_fn(&part4, bucket, offset + 12, confBase, reason, a, ptr, \ + &control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + if (unlikely(part5 != ones_u32)) { \ + part5 = ~part5; \ + conf_fn(&part5, bucket, offset + 16, confBase, reason, a, ptr, \ + &control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + if (unlikely(part6 != ones_u32)) { \ + part6 = ~part6; \ + conf_fn(&part6, bucket, offset + 20, confBase, reason, a, ptr, \ + &control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + if (unlikely(part7 != ones_u32)) { \ + part7 = ~part7; \ + conf_fn(&part7, bucket, offset + 24, confBase, reason, a, ptr, \ + &control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + if (unlikely(part8 != ones_u32)) { \ + part8 = ~part8; \ + conf_fn(&part8, bucket, offset + 28, confBase, reason, a, ptr, \ + &control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ + } \ +} while(0) +#endif + +#define PREP_SHUF_MASK_NO_REINFORCEMENT(val) \ + m256 lo = and256(val, *lo_mask); \ + m256 hi = and256(rshift64_m256(val, 4), *lo_mask) + +#define PREP_SHUF_MASK \ + PREP_SHUF_MASK_NO_REINFORCEMENT(load256(ptr)); \ + *c_128 = *(ptr + 15); \ + m256 r_msk = set64x4(0ULL, r_msk_base[*c_128], 0ULL, r_msk_base[*c_0]); \ + *c_0 = *(ptr + 31) + +#define SHIFT_OR_M1 \ + or256(pshufb_m256(dup_mask[0], lo), pshufb_m256(dup_mask[1], hi)) + +#define SHIFT_OR_M2 \ + or256(lshift128_m256(or256(pshufb_m256(dup_mask[2], lo), \ + pshufb_m256(dup_mask[3], hi)), \ + 1), SHIFT_OR_M1) + +#define SHIFT_OR_M3 \ + or256(lshift128_m256(or256(pshufb_m256(dup_mask[4], lo), \ + pshufb_m256(dup_mask[5], hi)), \ + 2), SHIFT_OR_M2) + +#define SHIFT_OR_M4 \ + or256(lshift128_m256(or256(pshufb_m256(dup_mask[6], lo), \ + pshufb_m256(dup_mask[7], hi)), \ + 3), SHIFT_OR_M3) + +static really_inline +m256 prep_conf_teddy_no_reinforcement_m1(const m256 *lo_mask, + const m256 *dup_mask, + const m256 val) { + PREP_SHUF_MASK_NO_REINFORCEMENT(val); + return SHIFT_OR_M1; +} + +static really_inline +m256 prep_conf_teddy_no_reinforcement_m2(const m256 *lo_mask, + const m256 *dup_mask, + const m256 val) { + PREP_SHUF_MASK_NO_REINFORCEMENT(val); + return SHIFT_OR_M2; +} + +static really_inline +m256 prep_conf_teddy_no_reinforcement_m3(const m256 *lo_mask, + const m256 *dup_mask, + const m256 val) { + PREP_SHUF_MASK_NO_REINFORCEMENT(val); + return SHIFT_OR_M3; +} + +static really_inline +m256 prep_conf_teddy_no_reinforcement_m4(const m256 *lo_mask, + const m256 *dup_mask, + const m256 val) { + PREP_SHUF_MASK_NO_REINFORCEMENT(val); + return SHIFT_OR_M4; +} + +static really_inline +m256 prep_conf_teddy_m1(const m256 *lo_mask, const m256 *dup_mask, + const u8 *ptr, const u64a *r_msk_base, + u32 *c_0, u32 *c_128) { + PREP_SHUF_MASK; + return or256(SHIFT_OR_M1, r_msk); +} + +static really_inline +m256 prep_conf_teddy_m2(const m256 *lo_mask, const m256 *dup_mask, + const u8 *ptr, const u64a *r_msk_base, + u32 *c_0, u32 *c_128) { + PREP_SHUF_MASK; + return or256(SHIFT_OR_M2, r_msk); +} + +static really_inline +m256 prep_conf_teddy_m3(const m256 *lo_mask, const m256 *dup_mask, + const u8 *ptr, const u64a *r_msk_base, + u32 *c_0, u32 *c_128) { + PREP_SHUF_MASK; + return or256(SHIFT_OR_M3, r_msk); +} + +static really_inline +m256 prep_conf_teddy_m4(const m256 *lo_mask, const m256 *dup_mask, + const u8 *ptr, const u64a *r_msk_base, + u32 *c_0, u32 *c_128) { + PREP_SHUF_MASK; + return or256(SHIFT_OR_M4, r_msk); +} + +#else // not defined __AVX2__ + +#ifdef ARCH_64_BIT +#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn) \ +do { \ + if (unlikely(diff128(var, ones128()))) { \ u64a lo = movq(var); \ u64a hi = movq(rshiftbyte_m128(var, 8)); \ - if (unlikely(lo)) { \ + if (unlikely(lo != ones_u64a)) { \ + lo = ~lo; \ conf_fn(&lo, bucket, offset, confBase, reason, a, ptr, \ &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ - if (unlikely(hi)) { \ + if (unlikely(hi != ones_u64a)) { \ + hi = ~hi; \ conf_fn(&hi, bucket, offset + 8, confBase, reason, a, ptr, \ &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ } \ -} while (0); +} while(0) #else #define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn) \ do { \ - if (unlikely(isnonzero128(var))) { \ + if (unlikely(diff128(var, ones128()))) { \ u32 part1 = movd(var); \ u32 part2 = movd(rshiftbyte_m128(var, 4)); \ u32 part3 = movd(rshiftbyte_m128(var, 8)); \ u32 part4 = movd(rshiftbyte_m128(var, 12)); \ - if (unlikely(part1)) { \ + if (unlikely(part1 != ones_u32)) { \ + part1 = ~part1; \ conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \ &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ - if (unlikely(part2)) { \ + if (unlikely(part2 != ones_u32)) { \ + part2 = ~part2; \ conf_fn(&part2, bucket, offset + 4, confBase, reason, a, ptr, \ &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ - if (unlikely(part3)) { \ + if (unlikely(part3 != ones_u32)) { \ + part3 = ~part3; \ conf_fn(&part3, bucket, offset + 8, confBase, reason, a, ptr, \ &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ - if (unlikely(part4)) { \ + if (unlikely(part4 != ones_u32)) { \ + part4 = ~part4; \ conf_fn(&part4, bucket, offset + 12, confBase, reason, a, ptr, \ &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ } \ -} while (0); +} while(0) #endif static really_inline @@ -129,8 +333,8 @@ m128 prep_conf_teddy_m1(const m128 *maskBase, m128 val) { m128 mask = set16x8(0xf); m128 lo = and128(val, mask); m128 hi = and128(rshift64_m128(val, 4), mask); - return and128(pshufb_m128(maskBase[0 * 2], lo), - pshufb_m128(maskBase[0 * 2 + 1], hi)); + return or128(pshufb_m128(maskBase[0 * 2], lo), + pshufb_m128(maskBase[0 * 2 + 1], hi)); } static really_inline @@ -140,11 +344,11 @@ m128 prep_conf_teddy_m2(const m128 *maskBase, m128 *old_1, m128 val) { m128 hi = and128(rshift64_m128(val, 4), mask); m128 r = prep_conf_teddy_m1(maskBase, val); - m128 res_1 = and128(pshufb_m128(maskBase[1*2], lo), - pshufb_m128(maskBase[1*2+1], hi)); - m128 res_shifted_1 = palignr(res_1, *old_1, 16-1); + m128 res_1 = or128(pshufb_m128(maskBase[1 * 2], lo), + pshufb_m128(maskBase[1 * 2 + 1], hi)); + m128 res_shifted_1 = palignr(res_1, *old_1, 16 - 1); *old_1 = res_1; - return and128(r, res_shifted_1); + return or128(r, res_shifted_1); } static really_inline @@ -155,11 +359,11 @@ m128 prep_conf_teddy_m3(const m128 *maskBase, m128 *old_1, m128 *old_2, m128 hi = and128(rshift64_m128(val, 4), mask); m128 r = prep_conf_teddy_m2(maskBase, old_1, val); - m128 res_2 = and128(pshufb_m128(maskBase[2*2], lo), - pshufb_m128(maskBase[2*2+1], hi)); - m128 res_shifted_2 = palignr(res_2, *old_2, 16-2); + m128 res_2 = or128(pshufb_m128(maskBase[2 * 2], lo), + pshufb_m128(maskBase[2 * 2 + 1], hi)); + m128 res_shifted_2 = palignr(res_2, *old_2, 16 - 2); *old_2 = res_2; - return and128(r, res_shifted_2); + return or128(r, res_shifted_2); } static really_inline @@ -170,487 +374,260 @@ m128 prep_conf_teddy_m4(const m128 *maskBase, m128 *old_1, m128 *old_2, m128 hi = and128(rshift64_m128(val, 4), mask); m128 r = prep_conf_teddy_m3(maskBase, old_1, old_2, val); - m128 res_3 = and128(pshufb_m128(maskBase[3*2], lo), - pshufb_m128(maskBase[3*2+1], hi)); - m128 res_shifted_3 = palignr(res_3, *old_3, 16-3); + m128 res_3 = or128(pshufb_m128(maskBase[3 * 2], lo), + pshufb_m128(maskBase[3 * 2 + 1], hi)); + m128 res_shifted_3 = palignr(res_3, *old_3, 16 - 3); *old_3 = res_3; - return and128(r, res_shifted_3); + return or128(r, res_shifted_3); } +#endif // __AVX2__ + +#if defined(__AVX2__) // reinforced teddy + +#define PREP_CONF_FN_NO_REINFORCEMENT(val, n) \ + prep_conf_teddy_no_reinforcement_m##n(&lo_mask, dup_mask, val) + +#define PREP_CONF_FN(ptr, n) \ + prep_conf_teddy_m##n(&lo_mask, dup_mask, ptr, r_msk_base, &c_0, &c_128) + +#define PREPARE_MASKS_1 \ + dup_mask[0] = set2x128(maskBase[0]); \ + dup_mask[1] = set2x128(maskBase[1]); + +#define PREPARE_MASKS_2 \ + PREPARE_MASKS_1 \ + dup_mask[2] = set2x128(maskBase[2]); \ + dup_mask[3] = set2x128(maskBase[3]); + +#define PREPARE_MASKS_3 \ + PREPARE_MASKS_2 \ + dup_mask[4] = set2x128(maskBase[4]); \ + dup_mask[5] = set2x128(maskBase[5]); + +#define PREPARE_MASKS_4 \ + PREPARE_MASKS_3 \ + dup_mask[6] = set2x128(maskBase[6]); \ + dup_mask[7] = set2x128(maskBase[7]); + +#define PREPARE_MASKS(n) \ + m256 lo_mask = set32x8(0xf); \ + m256 dup_mask[n * 2]; \ + PREPARE_MASKS_##n + +#else // not defined __AVX2__ + +#define FDR_EXEC_TEDDY_RES_OLD_1 + +#define FDR_EXEC_TEDDY_RES_OLD_2 \ + m128 res_old_1 = zeroes128(); + +#define FDR_EXEC_TEDDY_RES_OLD_3 \ + m128 res_old_1 = zeroes128(); \ + m128 res_old_2 = zeroes128(); + +#define FDR_EXEC_TEDDY_RES_OLD_4 \ + m128 res_old_1 = zeroes128(); \ + m128 res_old_2 = zeroes128(); \ + m128 res_old_3 = zeroes128(); + +#define FDR_EXEC_TEDDY_RES_OLD(n) FDR_EXEC_TEDDY_RES_OLD_##n + +#define PREP_CONF_FN_1(mask_base, val) \ + prep_conf_teddy_m1(mask_base, val) + +#define PREP_CONF_FN_2(mask_base, val) \ + prep_conf_teddy_m2(mask_base, &res_old_1, val) + +#define PREP_CONF_FN_3(mask_base, val) \ + prep_conf_teddy_m3(mask_base, &res_old_1, &res_old_2, val) + +#define PREP_CONF_FN_4(mask_base, val) \ + prep_conf_teddy_m4(mask_base, &res_old_1, &res_old_2, &res_old_3, val) + +#define PREP_CONF_FN(mask_base, val, n) \ + PREP_CONF_FN_##n(mask_base, val) +#endif // __AVX2__ + + +#if defined(__AVX2__) // reinforced teddy +#define FDR_EXEC_TEDDY(fdr, a, control, n_msk, conf_fn) \ +do { \ + const u8 *buf_end = a->buf + a->len; \ + const u8 *ptr = a->buf + a->start_offset; \ + u32 floodBackoff = FLOOD_BACKOFF_START; \ + const u8 *tryFloodDetect = a->firstFloodDetect; \ + u32 last_match = (u32)-1; \ + const struct Teddy *teddy = (const struct Teddy *)fdr; \ + const size_t iterBytes = 64; \ + DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", \ + a->buf, a->len, a->start_offset); \ + \ + const m128 *maskBase = getMaskBase(teddy); \ + PREPARE_MASKS(n_msk); \ + const u32 *confBase = getConfBase(teddy); \ + \ + const u64a *r_msk_base = getReinforcedMaskBase(teddy, n_msk); \ + u32 c_0 = 0x100; \ + u32 c_128 = 0x100; \ + const u8 *mainStart = ROUNDUP_PTR(ptr, 32); \ + DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); \ + if (ptr < mainStart) { \ + ptr = mainStart - 32; \ + m256 p_mask; \ + m256 val_0 = vectoredLoad256(&p_mask, ptr, a->start_offset, \ + a->buf, buf_end, \ + a->buf_history, a->len_history, n_msk); \ + m256 r_0 = PREP_CONF_FN_NO_REINFORCEMENT(val_0, n_msk); \ + c_0 = *(ptr + 31); \ + r_0 = or256(r_0, p_mask); \ + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn); \ + ptr += 32; \ + } \ + \ + if (ptr + 32 <= buf_end) { \ + m256 r_0 = PREP_CONF_FN(ptr, n_msk); \ + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn); \ + ptr += 32; \ + } \ + \ + for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { \ + __builtin_prefetch(ptr + (iterBytes * 4)); \ + CHECK_FLOOD; \ + m256 r_0 = PREP_CONF_FN(ptr, n_msk); \ + CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, conf_fn); \ + m256 r_1 = PREP_CONF_FN(ptr + 32, n_msk); \ + CONFIRM_TEDDY(r_1, 8, 32, NOT_CAUTIOUS, conf_fn); \ + } \ + \ + if (ptr + 32 <= buf_end) { \ + m256 r_0 = PREP_CONF_FN(ptr, n_msk); \ + CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, conf_fn); \ + ptr += 32; \ + } \ + \ + assert(ptr + 32 > buf_end); \ + if (ptr < buf_end) { \ + m256 p_mask; \ + m256 val_0 = vectoredLoad256(&p_mask, ptr, 0, ptr, buf_end, \ + a->buf_history, a->len_history, n_msk); \ + m256 r_0 = PREP_CONF_FN_NO_REINFORCEMENT(val_0, n_msk); \ + r_0 = or256(r_0, p_mask); \ + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn); \ + } \ + \ + return HWLM_SUCCESS; \ +} while(0) +#else // not defined __AVX2__ +#define FDR_EXEC_TEDDY(fdr, a, control, n_msk, conf_fn) \ +do { \ + const u8 *buf_end = a->buf + a->len; \ + const u8 *ptr = a->buf + a->start_offset; \ + u32 floodBackoff = FLOOD_BACKOFF_START; \ + const u8 *tryFloodDetect = a->firstFloodDetect; \ + u32 last_match = (u32)-1; \ + const struct Teddy *teddy = (const struct Teddy *)fdr; \ + const size_t iterBytes = 32; \ + DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", \ + a->buf, a->len, a->start_offset); \ + \ + const m128 *maskBase = getMaskBase(teddy); \ + const u32 *confBase = getConfBase(teddy); \ + \ + FDR_EXEC_TEDDY_RES_OLD(n_msk); \ + const u8 *mainStart = ROUNDUP_PTR(ptr, 16); \ + DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); \ + if (ptr < mainStart) { \ + ptr = mainStart - 16; \ + m128 p_mask; \ + m128 val_0 = vectoredLoad128(&p_mask, ptr, a->start_offset, \ + a->buf, buf_end, \ + a->buf_history, a->len_history, n_msk); \ + m128 r_0 = PREP_CONF_FN(maskBase, val_0, n_msk); \ + r_0 = or128(r_0, p_mask); \ + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn); \ + ptr += 16; \ + } \ + \ + if (ptr + 16 <= buf_end) { \ + m128 r_0 = PREP_CONF_FN(maskBase, load128(ptr), n_msk); \ + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn); \ + ptr += 16; \ + } \ + \ + for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { \ + __builtin_prefetch(ptr + (iterBytes * 4)); \ + CHECK_FLOOD; \ + m128 r_0 = PREP_CONF_FN(maskBase, load128(ptr), n_msk); \ + CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, conf_fn); \ + m128 r_1 = PREP_CONF_FN(maskBase, load128(ptr + 16), n_msk); \ + CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, conf_fn); \ + } \ + \ + if (ptr + 16 <= buf_end) { \ + m128 r_0 = PREP_CONF_FN(maskBase, load128(ptr), n_msk); \ + CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, conf_fn); \ + ptr += 16; \ + } \ + \ + assert(ptr + 16 > buf_end); \ + if (ptr < buf_end) { \ + m128 p_mask; \ + m128 val_0 = vectoredLoad128(&p_mask, ptr, 0, ptr, buf_end, \ + a->buf_history, a->len_history, n_msk); \ + m128 r_0 = PREP_CONF_FN(maskBase, val_0, n_msk); \ + r_0 = or128(r_0, p_mask); \ + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn); \ + } \ + \ + return HWLM_SUCCESS; \ +} while(0) +#endif // __AVX2__ + hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr, const struct FDR_Runtime_Args *a, hwlm_group_t control) { - const u8 *buf_end = a->buf + a->len; - const u8 *ptr = a->buf + a->start_offset; - u32 floodBackoff = FLOOD_BACKOFF_START; - const u8 *tryFloodDetect = a->firstFloodDetect; - u32 last_match = (u32)-1; - const struct Teddy *teddy = (const struct Teddy *)fdr; - const size_t iterBytes = 32; - DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", - a->buf, a->len, a->start_offset); - - const m128 *maskBase = getMaskBase(teddy); - const u32 *confBase = getConfBase(teddy); - - const u8 *mainStart = ROUNDUP_PTR(ptr, 16); - DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); - if (ptr < mainStart) { - ptr = mainStart - 16; - m128 p_mask; - m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 1); - m128 r_0 = prep_conf_teddy_m1(maskBase, val_0); - r_0 = and128(r_0, p_mask); - CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy); - ptr += 16; - } - - if (ptr + 16 < buf_end) { - m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr)); - CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy); - ptr += 16; - } - - for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { - __builtin_prefetch(ptr + (iterBytes*4)); - CHECK_FLOOD; - m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr)); - CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit1_teddy); - m128 r_1 = prep_conf_teddy_m1(maskBase, load128(ptr + 16)); - CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit1_teddy); - } - - for (; ptr < buf_end; ptr += 16) { - m128 p_mask; - m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 1); - m128 r_0 = prep_conf_teddy_m1(maskBase, val_0); - r_0 = and128(r_0, p_mask); - CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit1_teddy); - } - - return HWLM_SUCCESS; + FDR_EXEC_TEDDY(fdr, a, control, 1, do_confWithBit_teddy); } hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr, const struct FDR_Runtime_Args *a, hwlm_group_t control) { - const u8 *buf_end = a->buf + a->len; - const u8 *ptr = a->buf + a->start_offset; - u32 floodBackoff = FLOOD_BACKOFF_START; - const u8 *tryFloodDetect = a->firstFloodDetect; - u32 last_match = (u32)-1; - const struct Teddy *teddy = (const struct Teddy *)fdr; - const size_t iterBytes = 32; - DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", - a->buf, a->len, a->start_offset); - - const m128 *maskBase = getMaskBase(teddy); - const u32 *confBase = getConfBase(teddy); - - const u8 *mainStart = ROUNDUP_PTR(ptr, 16); - DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); - if (ptr < mainStart) { - ptr = mainStart - 16; - m128 p_mask; - m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 1); - m128 r_0 = prep_conf_teddy_m1(maskBase, val_0); - r_0 = and128(r_0, p_mask); - CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); - ptr += 16; - } - - if (ptr + 16 < buf_end) { - m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr)); - CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); - ptr += 16; - } - - for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { - __builtin_prefetch(ptr + (iterBytes*4)); - CHECK_FLOOD; - m128 r_0 = prep_conf_teddy_m1(maskBase, load128(ptr)); - CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy); - m128 r_1 = prep_conf_teddy_m1(maskBase, load128(ptr + 16)); - CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy); - } - - for (; ptr < buf_end; ptr += 16) { - m128 p_mask; - m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 1); - m128 r_0 = prep_conf_teddy_m1(maskBase, val_0); - r_0 = and128(r_0, p_mask); - CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); - } - - return HWLM_SUCCESS; + FDR_EXEC_TEDDY(fdr, a, control, 1, do_confWithBit_teddy); } hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr, const struct FDR_Runtime_Args *a, hwlm_group_t control) { - const u8 *buf_end = a->buf + a->len; - const u8 *ptr = a->buf + a->start_offset; - u32 floodBackoff = FLOOD_BACKOFF_START; - const u8 *tryFloodDetect = a->firstFloodDetect; - u32 last_match = (u32)-1; - const struct Teddy *teddy = (const struct Teddy *)fdr; - const size_t iterBytes = 32; - DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", - a->buf, a->len, a->start_offset); - - const m128 *maskBase = getMaskBase(teddy); - const u32 *confBase = getConfBase(teddy); - - m128 res_old_1 = ones128(); - const u8 *mainStart = ROUNDUP_PTR(ptr, 16); - DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); - if (ptr < mainStart) { - ptr = mainStart - 16; - m128 p_mask; - m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 2); - m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0); - r_0 = and128(r_0, p_mask); - CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); - ptr += 16; - } - - if (ptr + 16 < buf_end) { - m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr)); - CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); - ptr += 16; - } - - for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { - __builtin_prefetch(ptr + (iterBytes*4)); - CHECK_FLOOD; - m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr)); - CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy); - m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr + 16)); - CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy); - } - - for (; ptr < buf_end; ptr += 16) { - m128 p_mask; - m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 2); - m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0); - r_0 = and128(r_0, p_mask); - CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); - } - - return HWLM_SUCCESS; + FDR_EXEC_TEDDY(fdr, a, control, 2, do_confWithBit_teddy); } hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr, const struct FDR_Runtime_Args *a, hwlm_group_t control) { - const u8 *buf_end = a->buf + a->len; - const u8 *ptr = a->buf + a->start_offset; - u32 floodBackoff = FLOOD_BACKOFF_START; - const u8 *tryFloodDetect = a->firstFloodDetect; - u32 last_match = (u32)-1; - const struct Teddy *teddy = (const struct Teddy *)fdr; - const size_t iterBytes = 32; - DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", - a->buf, a->len, a->start_offset); - - const m128 *maskBase = getMaskBase(teddy); - const u32 *confBase = getConfBase(teddy); - - m128 res_old_1 = ones128(); - const u8 *mainStart = ROUNDUP_PTR(ptr, 16); - DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); - if (ptr < mainStart) { - ptr = mainStart - 16; - m128 p_mask; - m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 2); - m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0); - r_0 = and128(r_0, p_mask); - CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); - ptr += 16; - } - - if (ptr + 16 < buf_end) { - m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr)); - CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); - ptr += 16; - } - - for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { - __builtin_prefetch(ptr + (iterBytes*4)); - CHECK_FLOOD; - m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr)); - CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy); - m128 r_1 = prep_conf_teddy_m2(maskBase, &res_old_1, load128(ptr + 16)); - CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy); - } - - for (; ptr < buf_end; ptr += 16) { - m128 p_mask; - m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 2); - m128 r_0 = prep_conf_teddy_m2(maskBase, &res_old_1, val_0); - r_0 = and128(r_0, p_mask); - CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); - } - - return HWLM_SUCCESS; + FDR_EXEC_TEDDY(fdr, a, control, 2, do_confWithBit_teddy); } hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr, const struct FDR_Runtime_Args *a, hwlm_group_t control) { - const u8 *buf_end = a->buf + a->len; - const u8 *ptr = a->buf + a->start_offset; - u32 floodBackoff = FLOOD_BACKOFF_START; - const u8 *tryFloodDetect = a->firstFloodDetect; - u32 last_match = (u32)-1; - const struct Teddy *teddy = (const struct Teddy *)fdr; - const size_t iterBytes = 32; - DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", - a->buf, a->len, a->start_offset); - - const m128 *maskBase = getMaskBase(teddy); - const u32 *confBase = getConfBase(teddy); - - m128 res_old_1 = ones128(); - m128 res_old_2 = ones128(); - const u8 *mainStart = ROUNDUP_PTR(ptr, 16); - DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); - if (ptr < mainStart) { - ptr = mainStart - 16; - m128 p_mask; - m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 3); - m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, - val_0); - r_0 = and128(r_0, p_mask); - CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); - ptr += 16; - } - - if (ptr + 16 < buf_end) { - m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, - load128(ptr)); - CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); - ptr += 16; - } - - for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { - __builtin_prefetch(ptr + (iterBytes*4)); - CHECK_FLOOD; - m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, - load128(ptr)); - CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy); - m128 r_1 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, - load128(ptr + 16)); - CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy); - } - - for (; ptr < buf_end; ptr += 16) { - m128 p_mask; - m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 3); - m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, val_0); - r_0 = and128(r_0, p_mask); - CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); - } - - return HWLM_SUCCESS; + FDR_EXEC_TEDDY(fdr, a, control, 3, do_confWithBit_teddy); } hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr, const struct FDR_Runtime_Args *a, hwlm_group_t control) { - const u8 *buf_end = a->buf + a->len; - const u8 *ptr = a->buf + a->start_offset; - u32 floodBackoff = FLOOD_BACKOFF_START; - const u8 *tryFloodDetect = a->firstFloodDetect; - u32 last_match = (u32)-1; - const struct Teddy *teddy = (const struct Teddy *)fdr; - const size_t iterBytes = 32; - DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", - a->buf, a->len, a->start_offset); - - const m128 *maskBase = getMaskBase(teddy); - const u32 *confBase = getConfBase(teddy); - - m128 res_old_1 = ones128(); - m128 res_old_2 = ones128(); - const u8 *mainStart = ROUNDUP_PTR(ptr, 16); - DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); - if (ptr < mainStart) { - ptr = mainStart - 16; - m128 p_mask; - m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 3); - m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, - val_0); - r_0 = and128(r_0, p_mask); - CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); - ptr += 16; - } - - if (ptr + 16 < buf_end) { - m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, - load128(ptr)); - CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); - ptr += 16; - } - - for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { - __builtin_prefetch(ptr + (iterBytes*4)); - CHECK_FLOOD; - m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, - load128(ptr)); - CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy); - m128 r_1 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, - load128(ptr + 16)); - CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy); - } - - for (; ptr < buf_end; ptr += 16) { - m128 p_mask; - m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 3); - m128 r_0 = prep_conf_teddy_m3(maskBase, &res_old_1, &res_old_2, val_0); - r_0 = and128(r_0, p_mask); - CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); - } - - return HWLM_SUCCESS; + FDR_EXEC_TEDDY(fdr, a, control, 3, do_confWithBit_teddy); } hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr, const struct FDR_Runtime_Args *a, hwlm_group_t control) { - const u8 *buf_end = a->buf + a->len; - const u8 *ptr = a->buf + a->start_offset; - u32 floodBackoff = FLOOD_BACKOFF_START; - const u8 *tryFloodDetect = a->firstFloodDetect; - u32 last_match = (u32)-1; - const struct Teddy *teddy = (const struct Teddy *)fdr; - const size_t iterBytes = 32; - DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", - a->buf, a->len, a->start_offset); - - const m128 *maskBase = getMaskBase(teddy); - const u32 *confBase = getConfBase(teddy); - - m128 res_old_1 = ones128(); - m128 res_old_2 = ones128(); - m128 res_old_3 = ones128(); - const u8 *mainStart = ROUNDUP_PTR(ptr, 16); - DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); - if (ptr < mainStart) { - ptr = mainStart - 16; - m128 p_mask; - m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 4); - m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, val_0); - r_0 = and128(r_0, p_mask); - CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); - ptr += 16; - } - - if (ptr + 16 < buf_end) { - m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, load128(ptr)); - CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); - ptr += 16; - } - - for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { - __builtin_prefetch(ptr + (iterBytes*4)); - CHECK_FLOOD; - m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, load128(ptr)); - CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy); - m128 r_1 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, load128(ptr + 16)); - CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy); - } - - for (; ptr < buf_end; ptr += 16) { - m128 p_mask; - m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 4); - m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, val_0); - r_0 = and128(r_0, p_mask); - CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBitMany_teddy); - } - - return HWLM_SUCCESS; + FDR_EXEC_TEDDY(fdr, a, control, 4, do_confWithBit_teddy); } hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr, const struct FDR_Runtime_Args *a, hwlm_group_t control) { - const u8 *buf_end = a->buf + a->len; - const u8 *ptr = a->buf + a->start_offset; - u32 floodBackoff = FLOOD_BACKOFF_START; - const u8 *tryFloodDetect = a->firstFloodDetect; - u32 last_match = (u32)-1; - const struct Teddy *teddy = (const struct Teddy *)fdr; - const size_t iterBytes = 32; - DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", - a->buf, a->len, a->start_offset); - - const m128 *maskBase = getMaskBase(teddy); - const u32 *confBase = getConfBase(teddy); - - m128 res_old_1 = ones128(); - m128 res_old_2 = ones128(); - m128 res_old_3 = ones128(); - const u8 *mainStart = ROUNDUP_PTR(ptr, 16); - DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); - if (ptr < mainStart) { - ptr = mainStart - 16; - m128 p_mask; - m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 4); - m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, val_0); - r_0 = and128(r_0, p_mask); - CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); - ptr += 16; - } - - if (ptr + 16 < buf_end) { - m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, load128(ptr)); - CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); - ptr += 16; - } - - for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { - __builtin_prefetch(ptr + (iterBytes*4)); - CHECK_FLOOD; - m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, load128(ptr)); - CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, do_confWithBit_teddy); - m128 r_1 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, load128(ptr + 16)); - CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, do_confWithBit_teddy); - } - - for (; ptr < buf_end; ptr += 16) { - m128 p_mask; - m128 val_0 = vectoredLoad128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 4); - m128 r_0 = prep_conf_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, val_0); - r_0 = and128(r_0, p_mask); - CONFIRM_TEDDY(r_0, 8, 0, VECTORING, do_confWithBit_teddy); - } - - return HWLM_SUCCESS; + FDR_EXEC_TEDDY(fdr, a, control, 4, do_confWithBit_teddy); } diff --git a/src/fdr/teddy_avx2.c b/src/fdr/teddy_avx2.c index 89117b0b..11ea0f8e 100644 --- a/src/fdr/teddy_avx2.c +++ b/src/fdr/teddy_avx2.c @@ -40,10 +40,79 @@ #if defined(HAVE_AVX2) +const u8 ALIGN_AVX_DIRECTIVE p_mask_arr256[33][64] = { + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} +}; + #ifdef ARCH_64_BIT #define CONFIRM_FAT_TEDDY(var, bucket, offset, reason, conf_fn) \ do { \ - if (unlikely(isnonzero256(var))) { \ + if (unlikely(diff256(var, ones256()))) { \ m256 swap = swap128in256(var); \ m256 r = interleave256lo(var, swap); \ u64a part1 = extractlow64from256(r); \ @@ -51,32 +120,36 @@ do { \ r = interleave256hi(var, swap); \ u64a part3 = extractlow64from256(r); \ u64a part4 = extract64from256(r, 1); \ - if (unlikely(part1)) { \ + if (unlikely(part1 != ones_u64a)) { \ + part1 = ~part1; \ conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \ &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ - if (unlikely(part2)) { \ + if (unlikely(part2 != ones_u64a)) { \ + part2 = ~part2; \ conf_fn(&part2, bucket, offset + 4, confBase, reason, a, ptr, \ &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ - if (unlikely(part3)) { \ + if (unlikely(part3 != ones_u64a)) { \ + part3 = ~part3; \ conf_fn(&part3, bucket, offset + 8, confBase, reason, a, ptr, \ &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ - if (unlikely(part4)) { \ + if (unlikely(part4 != ones_u64a)) { \ + part4 = ~part4; \ conf_fn(&part4, bucket, offset + 12, confBase, reason, a, ptr, \ &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ } \ -} while (0); +} while(0) #else #define CONFIRM_FAT_TEDDY(var, bucket, offset, reason, conf_fn) \ do { \ - if (unlikely(isnonzero256(var))) { \ + if (unlikely(diff256(var, ones256()))) { \ m256 swap = swap128in256(var); \ m256 r = interleave256lo(var, swap); \ u32 part1 = extractlow32from256(r); \ @@ -88,56 +161,65 @@ do { \ u32 part6 = extract32from256(r, 1); \ u32 part7 = extract32from256(r, 2); \ u32 part8 = extract32from256(r, 3); \ - if (unlikely(part1)) { \ + if (unlikely(part1 != ones_u32)) { \ + part1 = ~part1; \ conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \ &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ - if (unlikely(part2)) { \ + if (unlikely(part2 != ones_u32)) { \ + part2 = ~part2; \ conf_fn(&part2, bucket, offset + 2, confBase, reason, a, ptr, \ &control, &last_match); \ } \ - if (unlikely(part3)) { \ + if (unlikely(part3 != ones_u32)) { \ + part3 = ~part3; \ conf_fn(&part3, bucket, offset + 4, confBase, reason, a, ptr, \ &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ - if (unlikely(part4)) { \ + if (unlikely(part4 != ones_u32)) { \ + part4 = ~part4; \ conf_fn(&part4, bucket, offset + 6, confBase, reason, a, ptr, \ &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ - if (unlikely(part5)) { \ + if (unlikely(part5 != ones_u32)) { \ + part5 = ~part5; \ conf_fn(&part5, bucket, offset + 8, confBase, reason, a, ptr, \ &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ - if (unlikely(part6)) { \ + if (unlikely(part6 != ones_u32)) { \ + part6 = ~part6; \ conf_fn(&part6, bucket, offset + 10, confBase, reason, a, ptr, \ &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ - if (unlikely(part7)) { \ + if (unlikely(part7 != ones_u32)) { \ + part7 = ~part7; \ conf_fn(&part7, bucket, offset + 12, confBase, reason, a, ptr, \ &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ - if (unlikely(part8)) { \ + if (unlikely(part8 != ones_u32)) { \ + part8 = ~part8; \ conf_fn(&part8, bucket, offset + 14, confBase, reason, a, ptr, \ &control, &last_match); \ CHECK_HWLM_TERMINATE_MATCHING; \ } \ } \ -} while (0); +} while(0) #endif static really_inline -m256 vectoredLoad2x128(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi, +m256 vectoredLoad2x128(m256 *p_mask, const u8 *ptr, const size_t start_offset, + const u8 *lo, const u8 *hi, const u8 *buf_history, size_t len_history, const u32 nMasks) { m128 p_mask128; - m256 ret = set2x128(vectoredLoad128(&p_mask128, ptr, lo, hi, buf_history, - len_history, nMasks)); + m256 ret = set2x128(vectoredLoad128(&p_mask128, ptr, start_offset, lo, hi, + buf_history, len_history, nMasks)); *p_mask = set2x128(p_mask128); return ret; } @@ -147,8 +229,8 @@ m256 prep_conf_fat_teddy_m1(const m256 *maskBase, m256 val) { m256 mask = set32x8(0xf); m256 lo = and256(val, mask); m256 hi = and256(rshift64_m256(val, 4), mask); - return and256(pshufb_m256(maskBase[0*2], lo), - pshufb_m256(maskBase[0*2+1], hi)); + return or256(pshufb_m256(maskBase[0 * 2], lo), + pshufb_m256(maskBase[0 * 2 + 1], hi)); } static really_inline @@ -158,11 +240,11 @@ m256 prep_conf_fat_teddy_m2(const m256 *maskBase, m256 *old_1, m256 val) { m256 hi = and256(rshift64_m256(val, 4), mask); m256 r = prep_conf_fat_teddy_m1(maskBase, val); - m256 res_1 = and256(pshufb_m256(maskBase[1*2], lo), - pshufb_m256(maskBase[1*2+1], hi)); - m256 res_shifted_1 = vpalignr(res_1, *old_1, 16-1); + m256 res_1 = or256(pshufb_m256(maskBase[1 * 2], lo), + pshufb_m256(maskBase[1 * 2 + 1], hi)); + m256 res_shifted_1 = vpalignr(res_1, *old_1, 16 - 1); *old_1 = res_1; - return and256(r, res_shifted_1); + return or256(r, res_shifted_1); } static really_inline @@ -173,11 +255,11 @@ m256 prep_conf_fat_teddy_m3(const m256 *maskBase, m256 *old_1, m256 *old_2, m256 hi = and256(rshift64_m256(val, 4), mask); m256 r = prep_conf_fat_teddy_m2(maskBase, old_1, val); - m256 res_2 = and256(pshufb_m256(maskBase[2*2], lo), - pshufb_m256(maskBase[2*2+1], hi)); - m256 res_shifted_2 = vpalignr(res_2, *old_2, 16-2); + m256 res_2 = or256(pshufb_m256(maskBase[2 * 2], lo), + pshufb_m256(maskBase[2 * 2 + 1], hi)); + m256 res_shifted_2 = vpalignr(res_2, *old_2, 16 - 2); *old_2 = res_2; - return and256(r, res_shifted_2); + return or256(r, res_shifted_2); } static really_inline @@ -188,11 +270,11 @@ m256 prep_conf_fat_teddy_m4(const m256 *maskBase, m256 *old_1, m256 *old_2, m256 hi = and256(rshift64_m256(val, 4), mask); m256 r = prep_conf_fat_teddy_m3(maskBase, old_1, old_2, val); - m256 res_3 = and256(pshufb_m256(maskBase[3*2], lo), - pshufb_m256(maskBase[3*2+1], hi)); - m256 res_shifted_3 = vpalignr(res_3, *old_3, 16-3); + m256 res_3 = or256(pshufb_m256(maskBase[3 * 2], lo), + pshufb_m256(maskBase[3 * 2 + 1], hi)); + m256 res_shifted_3 = vpalignr(res_3, *old_3, 16 - 3); *old_3 = res_3; - return and256(r, res_shifted_3); + return or256(r, res_shifted_3); } static really_inline @@ -200,486 +282,151 @@ const m256 *getMaskBase_avx2(const struct Teddy *teddy) { return (const m256 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy))); } +#define FDR_EXEC_FAT_TEDDY_RES_OLD_1 \ +do { \ +} while(0) + +#define FDR_EXEC_FAT_TEDDY_RES_OLD_2 \ + m256 res_old_1 = zeroes256(); + +#define FDR_EXEC_FAT_TEDDY_RES_OLD_3 \ + m256 res_old_1 = zeroes256(); \ + m256 res_old_2 = zeroes256(); + +#define FDR_EXEC_FAT_TEDDY_RES_OLD_4 \ + m256 res_old_1 = zeroes256(); \ + m256 res_old_2 = zeroes256(); \ + m256 res_old_3 = zeroes256(); + +#define FDR_EXEC_FAT_TEDDY_RES_OLD(n) FDR_EXEC_FAT_TEDDY_RES_OLD_##n + +#define PREP_CONF_FAT_FN_1(mask_base, val) \ + prep_conf_fat_teddy_m1(mask_base, val) + +#define PREP_CONF_FAT_FN_2(mask_base, val) \ + prep_conf_fat_teddy_m2(mask_base, &res_old_1, val) + +#define PREP_CONF_FAT_FN_3(mask_base, val) \ + prep_conf_fat_teddy_m3(mask_base, &res_old_1, &res_old_2, val) + +#define PREP_CONF_FAT_FN_4(mask_base, val) \ + prep_conf_fat_teddy_m4(mask_base, &res_old_1, &res_old_2, &res_old_3, val) + +#define PREP_CONF_FAT_FN(mask_base, val, n) \ + PREP_CONF_FAT_FN_##n(mask_base, val) + +#define FDR_EXEC_FAT_TEDDY(fdr, a, control, n_msk, conf_fn) \ +do { \ + const u8 *buf_end = a->buf + a->len; \ + const u8 *ptr = a->buf + a->start_offset; \ + u32 floodBackoff = FLOOD_BACKOFF_START; \ + const u8 *tryFloodDetect = a->firstFloodDetect; \ + u32 last_match = (u32)-1; \ + const struct Teddy *teddy = (const struct Teddy *)fdr; \ + const size_t iterBytes = 32; \ + DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", \ + a->buf, a->len, a->start_offset); \ + \ + const m256 *maskBase = getMaskBase_avx2(teddy); \ + const u32 *confBase = getConfBase(teddy); \ + \ + FDR_EXEC_FAT_TEDDY_RES_OLD(n_msk); \ + const u8 *mainStart = ROUNDUP_PTR(ptr, 16); \ + DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); \ + if (ptr < mainStart) { \ + ptr = mainStart - 16; \ + m256 p_mask; \ + m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->start_offset, \ + a->buf, buf_end, \ + a->buf_history, a->len_history, \ + n_msk); \ + m256 r_0 = PREP_CONF_FAT_FN(maskBase, val_0, n_msk); \ + r_0 = or256(r_0, p_mask); \ + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, conf_fn); \ + ptr += 16; \ + } \ + \ + if (ptr + 16 <= buf_end) { \ + m256 r_0 = PREP_CONF_FAT_FN(maskBase, load2x128(ptr), n_msk); \ + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, conf_fn); \ + ptr += 16; \ + } \ + \ + for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { \ + __builtin_prefetch(ptr + (iterBytes * 4)); \ + CHECK_FLOOD; \ + m256 r_0 = PREP_CONF_FAT_FN(maskBase, load2x128(ptr), n_msk); \ + CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, conf_fn); \ + m256 r_1 = PREP_CONF_FAT_FN(maskBase, load2x128(ptr + 16), n_msk); \ + CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, conf_fn); \ + } \ + \ + if (ptr + 16 <= buf_end) { \ + m256 r_0 = PREP_CONF_FAT_FN(maskBase, load2x128(ptr), n_msk); \ + CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, conf_fn); \ + ptr += 16; \ + } \ + \ + assert(ptr + 16 > buf_end); \ + if (ptr < buf_end) { \ + m256 p_mask; \ + m256 val_0 = vectoredLoad2x128(&p_mask, ptr, 0, ptr, buf_end, \ + a->buf_history, a->len_history, \ + n_msk); \ + m256 r_0 = PREP_CONF_FAT_FN(maskBase, val_0, n_msk); \ + r_0 = or256(r_0, p_mask); \ + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, conf_fn); \ + } \ + \ + return HWLM_SUCCESS; \ +} while(0) + hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr, const struct FDR_Runtime_Args *a, hwlm_group_t control) { - const u8 *buf_end = a->buf + a->len; - const u8 *ptr = a->buf + a->start_offset; - u32 floodBackoff = FLOOD_BACKOFF_START; - const u8 *tryFloodDetect = a->firstFloodDetect; - u32 last_match = (u32)-1; - const struct Teddy *teddy = (const struct Teddy *)fdr; - const size_t iterBytes = 32; - DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", - a->buf, a->len, a->start_offset); - - const m256 *maskBase = getMaskBase_avx2(teddy); - const u32 *confBase = getConfBase(teddy); - - const u8 *mainStart = ROUNDUP_PTR(ptr, 16); - DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); - if (ptr < mainStart) { - ptr = mainStart - 16; - m256 p_mask; - m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 1); - m256 r_0 = prep_conf_fat_teddy_m1(maskBase, val_0); - r_0 = and256(r_0, p_mask); - CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit1_teddy); - ptr += 16; - } - - if (ptr + 16 < buf_end) { - m256 r_0 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr)); - CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit1_teddy); - ptr += 16; - } - - for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { - __builtin_prefetch(ptr + (iterBytes*4)); - CHECK_FLOOD; - m256 r_0 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr)); - CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit1_teddy); - m256 r_1 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr + 16)); - CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit1_teddy); - } - - for (; ptr < buf_end; ptr += 16) { - m256 p_mask; - m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 1); - m256 r_0 = prep_conf_fat_teddy_m1(maskBase, val_0); - r_0 = and256(r_0, p_mask); - CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit1_teddy); - } - - return HWLM_SUCCESS; + FDR_EXEC_FAT_TEDDY(fdr, a, control, 1, do_confWithBit_teddy); } hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr, const struct FDR_Runtime_Args *a, hwlm_group_t control) { - const u8 *buf_end = a->buf + a->len; - const u8 *ptr = a->buf + a->start_offset; - u32 floodBackoff = FLOOD_BACKOFF_START; - const u8 *tryFloodDetect = a->firstFloodDetect; - u32 last_match = (u32)-1; - const struct Teddy *teddy = (const struct Teddy *)fdr; - const size_t iterBytes = 32; - DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", - a->buf, a->len, a->start_offset); - - const m256 *maskBase = getMaskBase_avx2(teddy); - const u32 *confBase = getConfBase(teddy); - - const u8 *mainStart = ROUNDUP_PTR(ptr, 16); - DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); - if (ptr < mainStart) { - ptr = mainStart - 16; - m256 p_mask; - m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 1); - m256 r_0 = prep_conf_fat_teddy_m1(maskBase, val_0); - r_0 = and256(r_0, p_mask); - CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); - ptr += 16; - } - - if (ptr + 16 < buf_end) { - m256 r_0 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr)); - CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); - ptr += 16; - } - - for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { - __builtin_prefetch(ptr + (iterBytes*4)); - CHECK_FLOOD; - m256 r_0 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr)); - CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy); - m256 r_1 = prep_conf_fat_teddy_m1(maskBase, load2x128(ptr + 16)); - CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy); - } - - for (; ptr < buf_end; ptr += 16) { - m256 p_mask; - m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 1); - m256 r_0 = prep_conf_fat_teddy_m1(maskBase, val_0); - r_0 = and256(r_0, p_mask); - CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); - } - - return HWLM_SUCCESS; + FDR_EXEC_FAT_TEDDY(fdr, a, control, 1, do_confWithBit_teddy); } hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr, const struct FDR_Runtime_Args *a, hwlm_group_t control) { - const u8 *buf_end = a->buf + a->len; - const u8 *ptr = a->buf + a->start_offset; - u32 floodBackoff = FLOOD_BACKOFF_START; - const u8 *tryFloodDetect = a->firstFloodDetect; - u32 last_match = (u32)-1; - const struct Teddy *teddy = (const struct Teddy *)fdr; - const size_t iterBytes = 32; - DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", - a->buf, a->len, a->start_offset); - - const m256 *maskBase = getMaskBase_avx2(teddy); - const u32 *confBase = getConfBase(teddy); - - m256 res_old_1 = ones256(); - const u8 *mainStart = ROUNDUP_PTR(ptr, 16); - DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); - if (ptr < mainStart) { - ptr = mainStart - 16; - m256 p_mask; - m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 2); - m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, val_0); - r_0 = and256(r_0, p_mask); - CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); - ptr += 16; - } - - if (ptr + 16 < buf_end) { - m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, load2x128(ptr)); - CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); - ptr += 16; - } - - for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { - __builtin_prefetch(ptr + (iterBytes*4)); - CHECK_FLOOD; - m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, load2x128(ptr)); - CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy); - m256 r_1 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, - load2x128(ptr + 16)); - CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy); - } - - for (; ptr < buf_end; ptr += 16) { - m256 p_mask; - m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 2); - m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, val_0); - r_0 = and256(r_0, p_mask); - CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); - } - - return HWLM_SUCCESS; + FDR_EXEC_FAT_TEDDY(fdr, a, control, 2, do_confWithBit_teddy); } hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr, const struct FDR_Runtime_Args *a, hwlm_group_t control) { - const u8 *buf_end = a->buf + a->len; - const u8 *ptr = a->buf + a->start_offset; - u32 floodBackoff = FLOOD_BACKOFF_START; - const u8 *tryFloodDetect = a->firstFloodDetect; - u32 last_match = (u32)-1; - const struct Teddy *teddy = (const struct Teddy *)fdr; - const size_t iterBytes = 32; - DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", - a->buf, a->len, a->start_offset); - - const m256 *maskBase = getMaskBase_avx2(teddy); - const u32 *confBase = getConfBase(teddy); - - m256 res_old_1 = ones256(); - const u8 *mainStart = ROUNDUP_PTR(ptr, 16); - DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); - if (ptr < mainStart) { - ptr = mainStart - 16; - m256 p_mask; - m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 2); - m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, val_0); - r_0 = and256(r_0, p_mask); - CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); - ptr += 16; - } - - if (ptr + 16 < buf_end) { - m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, load2x128(ptr)); - CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); - ptr += 16; - } - - for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { - __builtin_prefetch(ptr + (iterBytes*4)); - CHECK_FLOOD; - m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, load2x128(ptr)); - CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy); - m256 r_1 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, - load2x128(ptr + 16)); - CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy); - } - - for (; ptr < buf_end; ptr += 16) { - m256 p_mask; - m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 2); - m256 r_0 = prep_conf_fat_teddy_m2(maskBase, &res_old_1, val_0); - r_0 = and256(r_0, p_mask); - CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); - } - - return HWLM_SUCCESS; + FDR_EXEC_FAT_TEDDY(fdr, a, control, 2, do_confWithBit_teddy); } hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr, const struct FDR_Runtime_Args *a, hwlm_group_t control) { - const u8 *buf_end = a->buf + a->len; - const u8 *ptr = a->buf + a->start_offset; - u32 floodBackoff = FLOOD_BACKOFF_START; - const u8 *tryFloodDetect = a->firstFloodDetect; - u32 last_match = (u32)-1; - const struct Teddy *teddy = (const struct Teddy *)fdr; - const size_t iterBytes = 32; - DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", - a->buf, a->len, a->start_offset); - - const m256 *maskBase = getMaskBase_avx2(teddy); - const u32 *confBase = getConfBase(teddy); - - m256 res_old_1 = ones256(); - m256 res_old_2 = ones256(); - const u8 *mainStart = ROUNDUP_PTR(ptr, 16); - DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); - if (ptr < mainStart) { - ptr = mainStart - 16; - m256 p_mask; - m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 3); - m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, - val_0); - r_0 = and256(r_0, p_mask); - CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); - ptr += 16; - } - - if (ptr + 16 < buf_end) { - m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, - load2x128(ptr)); - CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); - ptr += 16; - } - - for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { - __builtin_prefetch(ptr + (iterBytes*4)); - CHECK_FLOOD; - m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, - load2x128(ptr)); - CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy); - m256 r_1 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, - load2x128(ptr + 16)); - CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy); - } - - for (; ptr < buf_end; ptr += 16) { - m256 p_mask; - m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 3); - m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, - val_0); - r_0 = and256(r_0, p_mask); - CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); - } - - return HWLM_SUCCESS; + FDR_EXEC_FAT_TEDDY(fdr, a, control, 3, do_confWithBit_teddy); } hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr, const struct FDR_Runtime_Args *a, hwlm_group_t control) { - const u8 *buf_end = a->buf + a->len; - const u8 *ptr = a->buf + a->start_offset; - u32 floodBackoff = FLOOD_BACKOFF_START; - const u8 *tryFloodDetect = a->firstFloodDetect; - u32 last_match = (u32)-1; - const struct Teddy *teddy = (const struct Teddy *)fdr; - const size_t iterBytes = 32; - DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", - a->buf, a->len, a->start_offset); - - const m256 *maskBase = getMaskBase_avx2(teddy); - const u32 *confBase = getConfBase(teddy); - - m256 res_old_1 = ones256(); - m256 res_old_2 = ones256(); - const u8 *mainStart = ROUNDUP_PTR(ptr, 16); - DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); - if (ptr < mainStart) { - ptr = mainStart - 16; - m256 p_mask; - m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 3); - m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, - val_0); - r_0 = and256(r_0, p_mask); - CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); - ptr += 16; - } - - if (ptr + 16 < buf_end) { - m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, - load2x128(ptr)); - CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); - ptr += 16; - } - - for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { - __builtin_prefetch(ptr + (iterBytes*4)); - CHECK_FLOOD; - m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, - load2x128(ptr)); - CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy); - m256 r_1 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, - load2x128(ptr + 16)); - CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy); - } - - for (; ptr < buf_end; ptr += 16) { - m256 p_mask; - m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 3); - m256 r_0 = prep_conf_fat_teddy_m3(maskBase, &res_old_1, &res_old_2, - val_0); - r_0 = and256(r_0, p_mask); - CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); - } - - return HWLM_SUCCESS; + FDR_EXEC_FAT_TEDDY(fdr, a, control, 3, do_confWithBit_teddy); } hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr, const struct FDR_Runtime_Args *a, hwlm_group_t control) { - const u8 *buf_end = a->buf + a->len; - const u8 *ptr = a->buf + a->start_offset; - u32 floodBackoff = FLOOD_BACKOFF_START; - const u8 *tryFloodDetect = a->firstFloodDetect; - u32 last_match = (u32)-1; - const struct Teddy *teddy = (const struct Teddy *)fdr; - const size_t iterBytes = 32; - DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", - a->buf, a->len, a->start_offset); - - const m256 *maskBase = getMaskBase_avx2(teddy); - const u32 *confBase = getConfBase(teddy); - - m256 res_old_1 = ones256(); - m256 res_old_2 = ones256(); - m256 res_old_3 = ones256(); - const u8 *mainStart = ROUNDUP_PTR(ptr, 16); - DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); - if (ptr < mainStart) { - ptr = mainStart - 16; - m256 p_mask; - m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 4); - m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, val_0); - r_0 = and256(r_0, p_mask); - CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); - ptr += 16; - } - - if (ptr + 16 < buf_end) { - m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, load2x128(ptr)); - CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); - ptr += 16; - } - - for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { - __builtin_prefetch(ptr + (iterBytes*4)); - CHECK_FLOOD; - m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, load2x128(ptr)); - CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBitMany_teddy); - m256 r_1 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, load2x128(ptr + 16)); - CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBitMany_teddy); - } - - for (; ptr < buf_end; ptr += 16) { - m256 p_mask; - m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 4); - m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, val_0); - r_0 = and256(r_0, p_mask); - CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBitMany_teddy); - } - - return HWLM_SUCCESS; + FDR_EXEC_FAT_TEDDY(fdr, a, control, 4, do_confWithBit_teddy); } hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr, const struct FDR_Runtime_Args *a, hwlm_group_t control) { - const u8 *buf_end = a->buf + a->len; - const u8 *ptr = a->buf + a->start_offset; - u32 floodBackoff = FLOOD_BACKOFF_START; - const u8 *tryFloodDetect = a->firstFloodDetect; - u32 last_match = (u32)-1; - const struct Teddy *teddy = (const struct Teddy *)fdr; - const size_t iterBytes = 32; - DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", - a->buf, a->len, a->start_offset); - - const m256 *maskBase = getMaskBase_avx2(teddy); - const u32 *confBase = getConfBase(teddy); - - m256 res_old_1 = ones256(); - m256 res_old_2 = ones256(); - m256 res_old_3 = ones256(); - const u8 *mainStart = ROUNDUP_PTR(ptr, 16); - DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); - if (ptr < mainStart) { - ptr = mainStart - 16; - m256 p_mask; - m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 4); - m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, val_0); - r_0 = and256(r_0, p_mask); - CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); - ptr += 16; - } - - if (ptr + 16 < buf_end) { - m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, load2x128(ptr)); - CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); - ptr += 16; - } - - for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { - __builtin_prefetch(ptr + (iterBytes*4)); - CHECK_FLOOD; - m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, load2x128(ptr)); - CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, do_confWithBit_teddy); - m256 r_1 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, load2x128(ptr + 16)); - CONFIRM_FAT_TEDDY(r_1, 16, 16, NOT_CAUTIOUS, do_confWithBit_teddy); - } - - for (; ptr < buf_end; ptr += 16) { - m256 p_mask; - m256 val_0 = vectoredLoad2x128(&p_mask, ptr, a->buf, buf_end, - a->buf_history, a->len_history, 4); - m256 r_0 = prep_conf_fat_teddy_m4(maskBase, &res_old_1, &res_old_2, - &res_old_3, val_0); - r_0 = and256(r_0, p_mask); - CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, do_confWithBit_teddy); - } - - return HWLM_SUCCESS; + FDR_EXEC_FAT_TEDDY(fdr, a, control, 4, do_confWithBit_teddy); } #endif // HAVE_AVX2 diff --git a/src/fdr/teddy_compile.cpp b/src/fdr/teddy_compile.cpp index 663d0483..14f19354 100644 --- a/src/fdr/teddy_compile.cpp +++ b/src/fdr/teddy_compile.cpp @@ -309,74 +309,65 @@ bool TeddyCompiler::pack(map TeddyCompiler::build() { - assert(eng.numMasks <= MAX_NUM_MASKS); +// this entry has all-zero mask to skip reinforcement +#define NO_REINFORCEMENT N_CHARS - if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) { - DEBUG_PRINTF("too many literals: %zu\n", lits.size()); - return nullptr; +// this means every entry in reinforcement table +#define ALL_CHAR_SET N_CHARS + +// each item's reinforcement mask has REINFORCED_MSK_LEN bytes +#define REINFORCED_MSK_LEN 8 + +static +void initReinforcedTable(u8 *reinforcedMsk) { + u64a *mask = (u64a *)reinforcedMsk; + fill_n(mask, N_CHARS, 0x00ffffffffffffffULL); +} + +static +void fillReinforcedMskZero(u8 *reinforcedMsk) { + u8 *mc = reinforcedMsk + NO_REINFORCEMENT * REINFORCED_MSK_LEN; + fill_n(mc, REINFORCED_MSK_LEN, 0x00); +} + +static +void fillReinforcedMsk(u8 *reinforcedMsk, u16 c, u32 j, u8 bmsk) { + assert(j > 0); + if (c == ALL_CHAR_SET) { + for (size_t i = 0; i < N_CHARS; i++) { + u8 *mc = reinforcedMsk + i * REINFORCED_MSK_LEN; + mc[j - 1] &= ~bmsk; + } + } else { + u8 *mc = reinforcedMsk + c * REINFORCED_MSK_LEN; + mc[j - 1] &= ~bmsk; } +} #ifdef TEDDY_DEBUG - for (size_t i = 0; i < lits.size(); i++) { - printf("lit %zu (len = %zu, %s) is ", i, lits[i].s.size(), - lits[i].nocase ? "caseless" : "caseful"); - for (size_t j = 0; j < lits[i].s.size(); j++) { - printf("%02x", ((u32)lits[i].s[j]) & 0xff); +static +void dumpReinforcedMaskTable(const u8 *msks) { + for (u32 i = 0; i <= N_CHARS; i++) { + printf("0x%02x: ", i); + for (u32 j = 0; j < REINFORCED_MSK_LEN; j++) { + u8 val = msks[i * REINFORCED_MSK_LEN + j]; + for (u32 k = 0; k < 8; k++) { + printf("%s", ((val >> k) & 0x1) ? "1" : "0"); + } + printf(" "); } printf("\n"); } +} #endif - map> bucketToLits; - if (eng.needConfirm(lits)) { - if (!pack(bucketToLits)) { - DEBUG_PRINTF("more lits (%zu) than buckets (%u), can't pack.\n", - lits.size(), eng.getNumBuckets()); - return nullptr; - } - } else { - for (u32 i = 0; i < lits.size(); i++) { - bucketToLits[i].push_back(i); - } - } - u32 maskWidth = eng.getNumBuckets() / 8; - - size_t headerSize = sizeof(Teddy); - size_t maskLen = eng.numMasks * 16 * 2 * maskWidth; - - auto floodTable = setupFDRFloodControl(lits, eng, grey); - auto confirmTable = setupFullConfs(lits, eng, bucketToLits, make_small); - - // Note: we place each major structure here on a cacheline boundary. - size_t size = ROUNDUP_CL(headerSize) + ROUNDUP_CL(maskLen) + - ROUNDUP_CL(confirmTable.size()) + floodTable.size(); - - auto fdr = make_zeroed_bytecode_ptr(size, 64); - assert(fdr); // otherwise would have thrown std::bad_alloc - Teddy *teddy = (Teddy *)fdr.get(); // ugly - u8 *teddy_base = (u8 *)teddy; - - // Write header. - teddy->size = size; - teddy->engineID = eng.getID(); - teddy->maxStringLen = verify_u32(maxLen(lits)); - - // Write confirm structures. - u8 *ptr = teddy_base + ROUNDUP_CL(headerSize) + ROUNDUP_CL(maskLen); - assert(ISALIGNED_CL(ptr)); - teddy->confOffset = verify_u32(ptr - teddy_base); - memcpy(ptr, confirmTable.get(), confirmTable.size()); - ptr += ROUNDUP_CL(confirmTable.size()); - - // Write flood control structures. - assert(ISALIGNED_CL(ptr)); - teddy->floodOffset = verify_u32(ptr - teddy_base); - memcpy(ptr, floodTable.get(), floodTable.size()); - ptr += floodTable.size(); - - // Write teddy masks. - u8 *baseMsk = teddy_base + ROUNDUP_CL(headerSize); +static +void fillNibbleMasks(const map> &bucketToLits, + const vector &lits, + u32 numMasks, u32 maskWidth, size_t maskLen, + u8 *baseMsk) { + memset(baseMsk, 0xff, maskLen); for (const auto &b2l : bucketToLits) { const u32 &bucket_id = b2l.first; @@ -389,7 +380,7 @@ bytecode_ptr TeddyCompiler::build() { const u32 sz = verify_u32(l.s.size()); // fill in masks - for (u32 j = 0; j < eng.numMasks; j++) { + for (u32 j = 0; j < numMasks; j++) { const u32 msk_id_lo = j * 2 * maskWidth + (bucket_id / 8); const u32 msk_id_hi = (j * 2 + 1) * maskWidth + (bucket_id / 8); const u32 lo_base = msk_id_lo * 16; @@ -399,8 +390,8 @@ bytecode_ptr TeddyCompiler::build() { // locations in these masks with '1' if (j >= sz) { for (u32 n = 0; n < 16; n++) { - baseMsk[lo_base + n] |= bmsk; - baseMsk[hi_base + n] |= bmsk; + baseMsk[lo_base + n] &= ~bmsk; + baseMsk[hi_base + n] &= ~bmsk; } } else { u8 c = l.s[sz - 1 - j]; @@ -419,27 +410,139 @@ bytecode_ptr TeddyCompiler::build() { for (u8 cm = 0; cm < 0x10; cm++) { if ((cm & m_lo) == (cmp_lo & m_lo)) { - baseMsk[lo_base + cm] |= bmsk; + baseMsk[lo_base + cm] &= ~bmsk; } if ((cm & m_hi) == (cmp_hi & m_hi)) { - baseMsk[hi_base + cm] |= bmsk; + baseMsk[hi_base + cm] &= ~bmsk; } } } else { if (l.nocase && ourisalpha(c)) { u32 cmHalfClear = (0xdf >> hiShift) & 0xf; u32 cmHalfSet = (0x20 >> hiShift) & 0xf; - baseMsk[hi_base + (n_hi & cmHalfClear)] |= bmsk; - baseMsk[hi_base + (n_hi | cmHalfSet)] |= bmsk; + baseMsk[hi_base + (n_hi & cmHalfClear)] &= ~bmsk; + baseMsk[hi_base + (n_hi | cmHalfSet)] &= ~bmsk; } else { - baseMsk[hi_base + n_hi] |= bmsk; + baseMsk[hi_base + n_hi] &= ~bmsk; } - baseMsk[lo_base + n_lo] |= bmsk; + baseMsk[lo_base + n_lo] &= ~bmsk; } } } } } +} + +static +void fillReinforcedTable(const map> &bucketToLits, + const vector &lits, + u8 *reinforcedMsk) { + initReinforcedTable(reinforcedMsk); + + for (const auto &b2l : bucketToLits) { + const u32 &bucket_id = b2l.first; + const vector &ids = b2l.second; + const u8 bmsk = 1U << (bucket_id % 8); + + for (const LiteralIndex &lit_id : ids) { + const hwlmLiteral &l = lits[lit_id]; + DEBUG_PRINTF("putting lit %u into bucket %u\n", lit_id, bucket_id); + const u32 sz = verify_u32(l.s.size()); + + // fill in reinforced masks + for (u32 j = 1; j < REINFORCED_MSK_LEN; j++) { + if (sz - 1 < j) { + fillReinforcedMsk(reinforcedMsk, ALL_CHAR_SET, j, bmsk); + } else { + u8 c = l.s[sz - 1 - j]; + if (l.nocase && ourisalpha(c)) { + u8 c_up = c & 0xdf; + fillReinforcedMsk(reinforcedMsk, c_up, j, bmsk); + u8 c_lo = c | 0x20; + fillReinforcedMsk(reinforcedMsk, c_lo, j, bmsk); + } else { + fillReinforcedMsk(reinforcedMsk, c, j, bmsk); + } + } + } + } + } + + fillReinforcedMskZero(reinforcedMsk); +} + +bytecode_ptr TeddyCompiler::build() { + assert(eng.numMasks <= MAX_NUM_MASKS); + + if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) { + DEBUG_PRINTF("too many literals: %zu\n", lits.size()); + return nullptr; + } + +#ifdef TEDDY_DEBUG + for (size_t i = 0; i < lits.size(); i++) { + printf("lit %zu (len = %zu, %s) is ", i, lits[i].s.size(), + lits[i].nocase ? "caseless" : "caseful"); + for (size_t j = 0; j < lits[i].s.size(); j++) { + printf("%02x", ((u32)lits[i].s[j])&0xff); + } + printf("\n"); + } +#endif + + map> bucketToLits; + if (!pack(bucketToLits)) { + DEBUG_PRINTF("more lits (%zu) than buckets (%u), can't pack.\n", + lits.size(), eng.getNumBuckets()); + return nullptr; + } + u32 maskWidth = eng.getNumBuckets() / 8; + + size_t headerSize = sizeof(Teddy); + size_t maskLen = eng.numMasks * 16 * 2 * maskWidth; + size_t reinforcedMaskLen = (N_CHARS + 1) * REINFORCED_MSK_LEN; + + auto floodTable = setupFDRFloodControl(lits, eng, grey); + auto confirmTable = setupFullConfs(lits, eng, bucketToLits, make_small); + + // Note: we place each major structure here on a cacheline boundary. + size_t size = ROUNDUP_CL(headerSize) + ROUNDUP_CL(maskLen) + + ROUNDUP_CL(reinforcedMaskLen) + + ROUNDUP_CL(confirmTable.size()) + floodTable.size(); + + auto fdr = make_zeroed_bytecode_ptr(size, 64); + assert(fdr); // otherwise would have thrown std::bad_alloc + Teddy *teddy = (Teddy *)fdr.get(); // ugly + u8 *teddy_base = (u8 *)teddy; + + // Write header. + teddy->size = size; + teddy->engineID = eng.getID(); + teddy->maxStringLen = verify_u32(maxLen(lits)); + + // Write confirm structures. + u8 *ptr = teddy_base + ROUNDUP_CL(headerSize) + ROUNDUP_CL(maskLen) + + ROUNDUP_CL(reinforcedMaskLen); + assert(ISALIGNED_CL(ptr)); + teddy->confOffset = verify_u32(ptr - teddy_base); + memcpy(ptr, confirmTable.get(), confirmTable.size()); + ptr += ROUNDUP_CL(confirmTable.size()); + + // Write flood control structures. + assert(ISALIGNED_CL(ptr)); + teddy->floodOffset = verify_u32(ptr - teddy_base); + memcpy(ptr, floodTable.get(), floodTable.size()); + ptr += floodTable.size(); + + // Write teddy masks. + u8 *baseMsk = teddy_base + ROUNDUP_CL(headerSize); + fillNibbleMasks(bucketToLits, lits, eng.numMasks, maskWidth, maskLen, + baseMsk); + + // Write reinforcement masks. + u8 *reinforcedMsk = baseMsk + ROUNDUP_CL(maskLen); + fillReinforcedTable(bucketToLits, lits, reinforcedMsk); #ifdef TEDDY_DEBUG for (u32 i = 0; i < eng.numMasks * 2; i++) { @@ -452,6 +555,10 @@ bytecode_ptr TeddyCompiler::build() { } printf("\n"); } + + printf("\n===============================================\n" + "reinforced mask table for low boundary (original)\n\n"); + dumpReinforcedMaskTable(reinforcedMsk); #endif return fdr; diff --git a/src/fdr/teddy_engine_description.cpp b/src/fdr/teddy_engine_description.cpp index f7559b13..88ae0f53 100644 --- a/src/fdr/teddy_engine_description.cpp +++ b/src/fdr/teddy_engine_description.cpp @@ -51,18 +51,6 @@ u32 TeddyEngineDescription::getDefaultFloodSuffixLength() const { return numMasks; } -bool TeddyEngineDescription::needConfirm(const vector &lits) const { - if (packed || lits.size() > getNumBuckets()) { - return true; - } - for (const auto &lit : lits) { - if (lit.s.size() > numMasks || !lit.msk.empty()) { - return true; - } - } - return false; -} - void getTeddyDescriptions(vector *out) { static const TeddyEngineDef defns[] = { { 3, 0 | HS_CPU_FEATURES_AVX2, 1, 16, false }, diff --git a/src/fdr/teddy_engine_description.h b/src/fdr/teddy_engine_description.h index 3979a5d3..95931613 100644 --- a/src/fdr/teddy_engine_description.h +++ b/src/fdr/teddy_engine_description.h @@ -55,7 +55,6 @@ public: explicit TeddyEngineDescription(const TeddyEngineDef &def); u32 getDefaultFloodSuffixLength() const override; - bool needConfirm(const std::vector &lits) const; }; std::unique_ptr diff --git a/src/fdr/teddy_internal.h b/src/fdr/teddy_internal.h index 359d1e13..d1752452 100644 --- a/src/fdr/teddy_internal.h +++ b/src/fdr/teddy_internal.h @@ -26,6 +26,25 @@ * POSSIBILITY OF SUCH DAMAGE. */ +/* Teddy bytecode layout: + * * |-----| + * * | | struct Teddy + * * |-----| + * * | | teddy masks + * * | | + * * |-----| + * * | | reinforcement mask table + * * | | + * * |-----| + * * | | confirm + * * | | + * * | | + * * |-----| + * * | | flood control + * * | | + * * |-----| + */ + #ifndef TEDDY_INTERNAL_H #define TEDDY_INTERNAL_H diff --git a/src/fdr/teddy_runtime_common.h b/src/fdr/teddy_runtime_common.h index f63df724..c1333964 100644 --- a/src/fdr/teddy_runtime_common.h +++ b/src/fdr/teddy_runtime_common.h @@ -38,8 +38,12 @@ #include "ue2common.h" #include "util/bitutils.h" #include "util/simd_utils.h" +#include "util/uniform_ops.h" extern const u8 ALIGN_DIRECTIVE p_mask_arr[17][32]; +#if defined(__AVX2__) +extern const u8 ALIGN_DIRECTIVE p_mask_arr256[33][64]; +#endif #ifdef ARCH_64_BIT #define TEDDY_CONF_TYPE u64a @@ -110,8 +114,27 @@ void copyRuntBlock128(u8 *dst, const u8 *src, size_t len) { } // Note: p_mask is an output param that initialises a poison mask. +// *p_mask = load128(p_mask_arr[n] + 16 - m) means: +// m byte 0xff in the beginning, followed by n byte 0x00, +// then followed by the rest bytes 0xff. +// ptr >= lo: +// no history. +// for end/short zone, ptr==lo and start_offset==0 +// for start zone, see below +// lo ptr hi hi +// |----------|-------|----------------|............| +// start 0 start+offset end(<=16) +// p_mask ffff..ff0000...........00ffff.......... +// ptr < lo: +// only start zone. +// history +// ptr lo hi hi +// |----------|-------|----------------|............| +// 0 start start+offset end(<=16) +// p_mask ffff.....ffffff..ff0000...........00ffff.......... static really_inline -m128 vectoredLoad128(m128 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi, +m128 vectoredLoad128(m128 *p_mask, const u8 *ptr, const size_t start_offset, + const u8 *lo, const u8 *hi, const u8 *buf_history, size_t len_history, const u32 nMasks) { union { @@ -123,27 +146,34 @@ m128 vectoredLoad128(m128 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi, uintptr_t copy_start; uintptr_t copy_len; - if (ptr >= lo) { + if (ptr >= lo) { // short/end/start zone + uintptr_t start = (uintptr_t)(ptr - lo); uintptr_t avail = (uintptr_t)(hi - ptr); if (avail >= 16) { - *p_mask = load128(p_mask_arr[16] + 16); + assert(start_offset - start <= 16); + *p_mask = loadu128(p_mask_arr[16 - start_offset + start] + + 16 - start_offset + start); return loadu128(ptr); } - *p_mask = load128(p_mask_arr[avail] + 16); + assert(start_offset - start <= avail); + *p_mask = loadu128(p_mask_arr[avail - start_offset + start] + + 16 - start_offset + start); copy_start = 0; copy_len = avail; - } else { + } else { // start zone uintptr_t need = MIN((uintptr_t)(lo - ptr), MIN(len_history, nMasks - 1)); uintptr_t start = (uintptr_t)(lo - ptr); uintptr_t i; - for (i = start - need; ptr + i < lo; i++) { - u.val8[i] = buf_history[len_history - (lo - (ptr + i))]; + for (i = start - need; i < start; i++) { + u.val8[i] = buf_history[len_history - (start - i)]; } uintptr_t end = MIN(16, (uintptr_t)(hi - ptr)); - *p_mask = loadu128(p_mask_arr[end - start] + 16 - start); - copy_start = i; - copy_len = end - i; + assert(start + start_offset <= end); + *p_mask = loadu128(p_mask_arr[end - start - start_offset] + + 16 - start - start_offset); + copy_start = start; + copy_len = end - start; } // Runt block from the buffer. @@ -152,6 +182,135 @@ m128 vectoredLoad128(m128 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi, return u.val128; } +#if defined(__AVX2__) +/* + * \brief Copy a block of [0,31] bytes efficiently. + * + * This function is a workaround intended to stop some compilers from + * synthesizing a memcpy function call out of the copy of a small number of + * bytes that we do in vectoredLoad256. + */ +static really_inline +void copyRuntBlock256(u8 *dst, const u8 *src, size_t len) { + switch (len) { + case 0: + break; + case 1: + *dst = *src; + break; + case 2: + unaligned_store_u16(dst, unaligned_load_u16(src)); + break; + case 3: + unaligned_store_u16(dst, unaligned_load_u16(src)); + dst[2] = src[2]; + break; + case 4: + unaligned_store_u32(dst, unaligned_load_u32(src)); + break; + case 5: + case 6: + case 7: + /* Perform copy with two overlapping 4-byte chunks. */ + unaligned_store_u32(dst + len - 4, unaligned_load_u32(src + len - 4)); + unaligned_store_u32(dst, unaligned_load_u32(src)); + break; + case 8: + unaligned_store_u64a(dst, unaligned_load_u64a(src)); + break; + case 9: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + /* Perform copy with two overlapping 8-byte chunks. */ + unaligned_store_u64a(dst + len - 8, unaligned_load_u64a(src + len - 8)); + unaligned_store_u64a(dst, unaligned_load_u64a(src)); + break; + case 16: + storeu128(dst, loadu128(src)); + break; + default: + /* Perform copy with two overlapping 16-byte chunks. */ + assert(len < 32); + storeu128(dst + len - 16, loadu128(src + len - 16)); + storeu128(dst, loadu128(src)); + break; + } +} + +// Note: p_mask is an output param that initialises a poison mask. +// *p_mask = load256(p_mask_arr256[n] + 32 - m) means: +// m byte 0xff in the beginning, followed by n byte 0x00, +// then followed by the rest bytes 0xff. +// ptr >= lo: +// no history. +// for end/short zone, ptr==lo and start_offset==0 +// for start zone, see below +// lo ptr hi hi +// |----------|-------|----------------|............| +// start 0 start+offset end(<=32) +// p_mask ffff..ff0000...........00ffff.......... +// ptr < lo: +// only start zone. +// history +// ptr lo hi hi +// |----------|-------|----------------|............| +// 0 start start+offset end(<=32) +// p_mask ffff.....ffffff..ff0000...........00ffff.......... +static really_inline +m256 vectoredLoad256(m256 *p_mask, const u8 *ptr, const size_t start_offset, + const u8 *lo, const u8 *hi, + const u8 *buf_history, size_t len_history, + const u32 nMasks) { + union { + u8 val8[32]; + m256 val256; + } u; + u.val256 = zeroes256(); + + uintptr_t copy_start; + uintptr_t copy_len; + + if (ptr >= lo) { // short/end/start zone + uintptr_t start = (uintptr_t)(ptr - lo); + uintptr_t avail = (uintptr_t)(hi - ptr); + if (avail >= 32) { + assert(start_offset - start <= 32); + *p_mask = loadu256(p_mask_arr256[32 - start_offset + start] + + 32 - start_offset + start); + return loadu256(ptr); + } + assert(start_offset - start <= avail); + *p_mask = loadu256(p_mask_arr256[avail - start_offset + start] + + 32 - start_offset + start); + copy_start = 0; + copy_len = avail; + } else { //start zone + uintptr_t need = MIN((uintptr_t)(lo - ptr), + MIN(len_history, nMasks - 1)); + uintptr_t start = (uintptr_t)(lo - ptr); + uintptr_t i; + for (i = start - need; i < start; i++) { + u.val8[i] = buf_history[len_history - (start - i)]; + } + uintptr_t end = MIN(32, (uintptr_t)(hi - ptr)); + assert(start + start_offset <= end); + *p_mask = loadu256(p_mask_arr256[end - start - start_offset] + + 32 - start - start_offset); + copy_start = start; + copy_len = end - start; + } + + // Runt block from the buffer. + copyRuntBlock256(&u.val8[copy_start], &ptr[copy_start], copy_len); + + return u.val256; +} +#endif // __AVX2__ + static really_inline u64a getConfVal(const struct FDR_Runtime_Args *a, const u8 *ptr, u32 byte, CautionReason reason) { @@ -196,53 +355,17 @@ void do_confWithBit_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset, } while (unlikely(*conf)); } -static really_inline -void do_confWithBit1_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset, - const u32 *confBase, CautionReason reason, - const struct FDR_Runtime_Args *a, const u8 *ptr, - hwlmcb_rv_t *control, u32 *last_match) { - do { - u32 bit = TEDDY_FIND_AND_CLEAR_LSB(conf); - u32 byte = bit / bucket + offset; - u32 idx = bit % bucket; - u32 cf = confBase[idx]; - const struct FDRConfirm *fdrc = (const struct FDRConfirm *) - ((const u8 *)confBase + cf); - if (!(fdrc->groups & *control)) { - continue; - } - u64a confVal = getConfVal(a, ptr, byte, reason); - confWithBit1(fdrc, a, ptr - a->buf + byte, control, last_match, - confVal); - } while (unlikely(*conf)); -} - -static really_inline -void do_confWithBitMany_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset, - const u32 *confBase, CautionReason reason, - const struct FDR_Runtime_Args *a, const u8 *ptr, - hwlmcb_rv_t *control, u32 *last_match) { - do { - u32 bit = TEDDY_FIND_AND_CLEAR_LSB(conf); - u32 byte = bit / bucket + offset; - u32 idx = bit % bucket; - u32 cf = confBase[idx]; - const struct FDRConfirm *fdrc = (const struct FDRConfirm *) - ((const u8 *)confBase + cf); - if (!(fdrc->groups & *control)) { - continue; - } - u64a confVal = getConfVal(a, ptr, byte, reason); - confWithBitMany(fdrc, a, ptr - a->buf + byte, reason, control, - last_match, confVal); - } while (unlikely(*conf)); -} - static really_inline const m128 *getMaskBase(const struct Teddy *teddy) { return (const m128 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy))); } +static really_inline +const u64a *getReinforcedMaskBase(const struct Teddy *teddy, u8 numMask) { + return (const u64a *)((const u8 *)getMaskBase(teddy) + + ROUNDUP_CL(2 * numMask * sizeof(m128))); +} + static really_inline const u32 *getConfBase(const struct Teddy *teddy) { return (const u32 *)((const u8 *)teddy + teddy->confOffset); From a17ef3e48a78ac29241d43edb8061f6e8a4be1b8 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 15 Jun 2017 13:28:54 +1000 Subject: [PATCH 039/190] fdr_dump: dump FDRConfirm structures for fdr --- src/fdr/fdr_dump.cpp | 75 +++++++++++++++++++++++++++++++++----------- 1 file changed, 56 insertions(+), 19 deletions(-) diff --git a/src/fdr/fdr_dump.cpp b/src/fdr/fdr_dump.cpp index 7e794bb3..3fe9062a 100644 --- a/src/fdr/fdr_dump.cpp +++ b/src/fdr/fdr_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,10 +30,12 @@ #include "fdr_compile.h" #include "fdr_compile_internal.h" +#include "fdr_confirm.h" #include "fdr_dump.h" #include "fdr_engine_description.h" #include "fdr_internal.h" #include "teddy_engine_description.h" +#include "teddy_internal.h" #include "ue2common.h" #include @@ -58,33 +60,68 @@ bool fdrIsTeddy(const FDR *fdr) { return !getFdrDescription(engine); } -void fdrPrintStats(const FDR *fdr, FILE *f) { - const bool isTeddy = fdrIsTeddy(fdr); +static +void dumpConfirms(const void *fdr_base, u32 conf_offset, u32 num_confirms, + FILE *f) { + const u32 *conf = (const u32 *)((const char *)fdr_base + conf_offset); + for (u32 i = 0; i < num_confirms; i++) { + const auto *fdrc = (const FDRConfirm *)((const char *)conf + conf[i]); + fprintf(f, " confirm %u\n", i); + fprintf(f, " andmsk 0x%016llx\n", fdrc->andmsk); + fprintf(f, " mult 0x%016llx\n", fdrc->mult); + fprintf(f, " nbits %u\n", fdrc->nBits); + fprintf(f, " groups 0x%016llx\n", fdrc->groups); + } +} - if (isTeddy) { - fprintf(f, "TEDDY: %u\n", fdr->engineID); - } else { - fprintf(f, "FDR: %u\n", fdr->engineID); +static +void dumpTeddy(const Teddy *teddy, FILE *f) { + fprintf(f, "TEDDY: %u\n", teddy->engineID); + auto des = getTeddyDescription(teddy->engineID); + if (!des) { + fprintf(f, " \n"); + return; } - if (isTeddy) { - auto des = getTeddyDescription(fdr->engineID); - if (des) { - fprintf(f, " masks %u\n", des->numMasks); - fprintf(f, " buckets %u\n", des->getNumBuckets()); - fprintf(f, " packed %s\n", des->packed ? "true" : "false"); - } else { - fprintf(f, " \n"); - } - } else { - fprintf(f, " domain %u\n", fdr->domain); - fprintf(f, " stride %u\n", fdr->stride); + fprintf(f, " masks %u\n", des->numMasks); + fprintf(f, " buckets %u\n", des->getNumBuckets()); + fprintf(f, " packed %s\n", des->packed ? "true" : "false"); + fprintf(f, " strings ???\n"); + fprintf(f, " size %zu bytes\n", fdrSize((const FDR *)teddy)); + fprintf(f, " max length %u\n", teddy->maxStringLen); + fprintf(f, " floodoff %u (%x)\n", teddy->floodOffset, + teddy->floodOffset); + fprintf(f, "\n"); + + dumpConfirms(teddy, teddy->confOffset, des->getNumBuckets(), f); +} + +static +void dumpFDR(const FDR *fdr, FILE *f) { + fprintf(f, "FDR: %u\n", fdr->engineID); + auto des = getFdrDescription(fdr->engineID); + if (!des) { + fprintf(f, " \n"); + return; } + fprintf(f, " domain %u\n", fdr->domain); + fprintf(f, " stride %u\n", fdr->stride); fprintf(f, " strings ???\n"); fprintf(f, " size %zu bytes\n", fdrSize(fdr)); fprintf(f, " max length %u\n", fdr->maxStringLen); fprintf(f, " floodoff %u (%x)\n", fdr->floodOffset, fdr->floodOffset); + fprintf(f, "\n"); + + dumpConfirms(fdr, fdr->confOffset, des->getNumBuckets(), f); +} + +void fdrPrintStats(const FDR *fdr, FILE *f) { + if (fdrIsTeddy(fdr)) { + dumpTeddy((const Teddy *)fdr, f); + } else { + dumpFDR(fdr, f); + } } } // namespace ue2 From e4788aae1a627388e28bab95133f14329a120d50 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 23 Jun 2017 16:32:10 +1000 Subject: [PATCH 040/190] fdr/teddy: store and dump number of strings --- src/fdr/fdr_compile.cpp | 1 + src/fdr/fdr_dump.cpp | 4 ++-- src/fdr/fdr_internal.h | 1 + src/fdr/teddy_compile.cpp | 1 + src/fdr/teddy_internal.h | 1 + 5 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index cd0013e4..6f2de3d9 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -175,6 +175,7 @@ bytecode_ptr FDRCompiler::setupFDR() { fdr->size = size; fdr->engineID = eng.getID(); fdr->maxStringLen = verify_u32(maxLen(lits)); + fdr->numStrings = verify_u32(lits.size()); assert(eng.bits > 8 && eng.bits < 16); // we allow domains 9 to 15 only fdr->domain = eng.bits; fdr->domainMask = (1 << eng.bits) - 1; diff --git a/src/fdr/fdr_dump.cpp b/src/fdr/fdr_dump.cpp index 3fe9062a..ae81f257 100644 --- a/src/fdr/fdr_dump.cpp +++ b/src/fdr/fdr_dump.cpp @@ -86,7 +86,7 @@ void dumpTeddy(const Teddy *teddy, FILE *f) { fprintf(f, " masks %u\n", des->numMasks); fprintf(f, " buckets %u\n", des->getNumBuckets()); fprintf(f, " packed %s\n", des->packed ? "true" : "false"); - fprintf(f, " strings ???\n"); + fprintf(f, " strings %u\n", teddy->numStrings); fprintf(f, " size %zu bytes\n", fdrSize((const FDR *)teddy)); fprintf(f, " max length %u\n", teddy->maxStringLen); fprintf(f, " floodoff %u (%x)\n", teddy->floodOffset, @@ -107,7 +107,7 @@ void dumpFDR(const FDR *fdr, FILE *f) { fprintf(f, " domain %u\n", fdr->domain); fprintf(f, " stride %u\n", fdr->stride); - fprintf(f, " strings ???\n"); + fprintf(f, " strings %u\n", fdr->numStrings); fprintf(f, " size %zu bytes\n", fdrSize(fdr)); fprintf(f, " max length %u\n", fdr->maxStringLen); fprintf(f, " floodoff %u (%x)\n", fdr->floodOffset, fdr->floodOffset); diff --git a/src/fdr/fdr_internal.h b/src/fdr/fdr_internal.h index 8109d1e8..2315b2d8 100644 --- a/src/fdr/fdr_internal.h +++ b/src/fdr/fdr_internal.h @@ -69,6 +69,7 @@ struct FDR { u32 engineID; u32 size; u32 maxStringLen; + u32 numStrings; u32 confOffset; u32 floodOffset; u8 stride; /* stride - how frequently the data is consulted by the first diff --git a/src/fdr/teddy_compile.cpp b/src/fdr/teddy_compile.cpp index 14f19354..a35e5900 100644 --- a/src/fdr/teddy_compile.cpp +++ b/src/fdr/teddy_compile.cpp @@ -520,6 +520,7 @@ bytecode_ptr TeddyCompiler::build() { teddy->size = size; teddy->engineID = eng.getID(); teddy->maxStringLen = verify_u32(maxLen(lits)); + teddy->numStrings = verify_u32(lits.size()); // Write confirm structures. u8 *ptr = teddy_base + ROUNDUP_CL(headerSize) + ROUNDUP_CL(maskLen) + diff --git a/src/fdr/teddy_internal.h b/src/fdr/teddy_internal.h index d1752452..174710bc 100644 --- a/src/fdr/teddy_internal.h +++ b/src/fdr/teddy_internal.h @@ -55,6 +55,7 @@ struct Teddy { u32 engineID; u32 size; u32 maxStringLen; + u32 numStrings; u32 confOffset; u32 floodOffset; }; From bc953717c1c8e8bafc4f694bd6097203f2a5d6f5 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 26 Jun 2017 10:05:03 +1000 Subject: [PATCH 041/190] rose: dump lit tables in their own files --- src/rose/rose_build_dump.cpp | 102 +++++++++++++++++------------------ 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index a5467b31..d3e800cf 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -1679,13 +1679,22 @@ void dumpComponentInfo(const RoseEngine *t, const string &base) { } } +/** + * \brief Helper function: returns a writeable C stdio FILE* handle wrapped in + * a unique_ptr that takes care of closing the file on destruction. + */ +static +std::unique_ptr openStdioFile(const string &filename) { + return std::unique_ptr( + fopen(filename.c_str(), "w"), &fclose); +} static void dumpComponentInfoCsv(const RoseEngine *t, const string &base) { - FILE *f = fopen((base +"rose_components.csv").c_str(), "w"); + auto f = openStdioFile(base + "/rose_components.csv"); - fprintf(f, "Index, Offset,Engine Type,States,Stream State,Bytecode Size," - "Kind,Notes\n"); + fprintf(f.get(), "Index, Offset,Engine Type,States,Stream State," + "Bytecode Size,Kind,Notes\n"); for (u32 i = 0; i < t->queueCount; i++) { const NfaInfo *nfa_info = getNfaInfoByQueue(t, i); @@ -1740,19 +1749,16 @@ void dumpComponentInfoCsv(const RoseEngine *t, const string &base) { } } - fprintf(f, "%u,%zd,\"%s\",%u,%u,%u,%s,%s\n", i, + fprintf(f.get(), "%u,%zd,\"%s\",%u,%u,%u,%s,%s\n", i, (const char *)n - (const char *)t, describe(*n).c_str(), n->nPositions, n->streamStateSize, n->length, to_string(kind).c_str(), notes.str().c_str()); } - fclose(f); } static void dumpExhaust(const RoseEngine *t, const string &base) { - stringstream sstxt; - sstxt << base << "rose_exhaust.txt"; - FILE *f = fopen(sstxt.str().c_str(), "w"); + auto f = openStdioFile(base + "/rose_exhaust.csv"); const NfaInfo *infos = (const NfaInfo *)((const char *)t + t->nfaInfoOffset); @@ -1762,7 +1768,7 @@ void dumpExhaust(const RoseEngine *t, const string &base) { for (u32 i = 0; i < queue_count; ++i) { u32 ekey_offset = infos[i].ekeyListOffset; - fprintf(f, "%u (%u):", i, ekey_offset); + fprintf(f.get(), "%u (%u):", i, ekey_offset); if (ekey_offset) { const u32 *ekeys = (const u32 *)((const char *)t + ekey_offset); @@ -1772,14 +1778,12 @@ void dumpExhaust(const RoseEngine *t, const string &base) { if (e == ~0U) { break; } - fprintf(f, " %u", e); + fprintf(f.get(), " %u", e); } } - fprintf(f, "\n"); + fprintf(f.get(), "\n"); } - - fclose(f); } static @@ -1797,9 +1801,8 @@ void dumpNfas(const RoseEngine *t, bool dump_raw, const string &base) { if (dump_raw) { stringstream ssraw; ssraw << base << "rose_nfa_" << i << ".raw"; - FILE *f = fopen(ssraw.str().c_str(), "w"); - fwrite(n, 1, n->length, f); - fclose(f); + auto f = openStdioFile(ssraw.str()); + fwrite(n, 1, n->length, f.get()); } } } @@ -1847,9 +1850,8 @@ void dumpRevNfas(const RoseEngine *t, bool dump_raw, const string &base) { if (dump_raw) { stringstream ssraw; ssraw << base << "som_rev_nfa_" << i << ".raw"; - FILE *f = fopen(ssraw.str().c_str(), "w"); - fwrite(n, 1, n->length, f); - fclose(f); + auto f = openStdioFile(ssraw.str()); + fwrite(n, 1, n->length, f.get()); } } } @@ -2067,26 +2069,6 @@ void roseDumpText(const RoseEngine *t, FILE *f) { dumpAnchoredStats(atable, f); } - if (ftable) { - fprintf(f, "\nFloating literal matcher stats:\n\n"); - hwlmPrintStats(ftable, f); - } - - if (drtable) { - fprintf(f, "\nDelay Rebuild literal matcher stats:\n\n"); - hwlmPrintStats(drtable, f); - } - - if (etable) { - fprintf(f, "\nEOD-anchored literal matcher stats:\n\n"); - hwlmPrintStats(etable, f); - } - - if (sbtable) { - fprintf(f, "\nSmall-block literal matcher stats:\n\n"); - hwlmPrintStats(sbtable, f); - } - dumpLongLiteralTable(t, f); } @@ -2221,6 +2203,30 @@ void roseDumpPrograms(const vector &fragments, const RoseEngine *t, dumpRoseDelayPrograms(t, base + "/rose_delay_programs.txt"); } +static +void roseDumpLiteralMatchers(const RoseEngine *t, const string &base) { + if (const HWLM *ftable = getFloatingMatcher(t)) { + auto f = openStdioFile(base + "/lit_table_floating.txt"); + hwlmPrintStats(ftable, f.get()); + } + + if (const HWLM *drtable = getDelayRebuildMatcher(t)) { + auto f = openStdioFile(base + "/lit_table_delay_rebuild.txt"); + hwlmPrintStats(drtable, f.get()); + } + + if (const HWLM *etable = getEodMatcher(t)) { + auto f = openStdioFile(base + "/lit_table_eod.txt"); + hwlmPrintStats(etable, f.get()); + } + + if (const HWLM *sbtable = getSmallBlockMatcher(t)) { + auto f = openStdioFile(base + "/lit_table_small_block.txt"); + hwlmPrintStats(sbtable, f.get()); + } + +} + void dumpRose(const RoseBuildImpl &build, const vector &fragments, const map &leftfix_queue_map, const map &suffix_queue_map, @@ -2231,24 +2237,19 @@ void dumpRose(const RoseBuildImpl &build, const vector &fragments, return; } - stringstream ss; - ss << grey.dumpPath << "rose.txt"; - - FILE *f = fopen(ss.str().c_str(), "w"); + auto f = openStdioFile(grey.dumpPath + "/rose.txt"); if (!t) { - fprintf(f, "<< no rose >>\n"); - fclose(f); + fprintf(f.get(), "<< no rose >>\n"); return; } // Dump Rose table info - roseDumpText(t, f); - - fclose(f); + roseDumpText(t, f.get()); roseDumpComponents(t, false, grey.dumpPath); roseDumpPrograms(fragments, t, grey.dumpPath); + roseDumpLiteralMatchers(t, grey.dumpPath); // Graph. dumpRoseGraph(build, t, fragments, leftfix_queue_map, suffix_queue_map, @@ -2257,9 +2258,8 @@ void dumpRose(const RoseBuildImpl &build, const vector &fragments, // Literals dumpRoseLiterals(build, fragments, grey); - f = fopen((grey.dumpPath + "/rose_struct.txt").c_str(), "w"); - roseDumpStructRaw(t, f); - fclose(f); + f = openStdioFile(grey.dumpPath + "/rose_struct.txt"); + roseDumpStructRaw(t, f.get()); } } // namespace ue2 From 4edf1e4195d503edb5266f130c34b7c25d1b0379 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 26 Jun 2017 10:48:25 +1000 Subject: [PATCH 042/190] dump: move openStdioFile() to util/dump_util.h --- src/rose/rose_build_dump.cpp | 31 +++++++++++-------------------- src/util/dump_util.h | 16 ++++++++++++++++ 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index d3e800cf..718596e0 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -48,6 +48,7 @@ #include "util/compile_context.h" #include "util/container.h" #include "util/dump_charclass.h" +#include "util/dump_util.h" #include "util/graph_range.h" #include "util/multibit.h" #include "util/multibit_build.h" @@ -1679,19 +1680,9 @@ void dumpComponentInfo(const RoseEngine *t, const string &base) { } } -/** - * \brief Helper function: returns a writeable C stdio FILE* handle wrapped in - * a unique_ptr that takes care of closing the file on destruction. - */ -static -std::unique_ptr openStdioFile(const string &filename) { - return std::unique_ptr( - fopen(filename.c_str(), "w"), &fclose); -} - static void dumpComponentInfoCsv(const RoseEngine *t, const string &base) { - auto f = openStdioFile(base + "/rose_components.csv"); + auto f = openStdioFile(base + "/rose_components.csv", "w"); fprintf(f.get(), "Index, Offset,Engine Type,States,Stream State," "Bytecode Size,Kind,Notes\n"); @@ -1758,7 +1749,7 @@ void dumpComponentInfoCsv(const RoseEngine *t, const string &base) { static void dumpExhaust(const RoseEngine *t, const string &base) { - auto f = openStdioFile(base + "/rose_exhaust.csv"); + auto f = openStdioFile(base + "/rose_exhaust.csv", "w"); const NfaInfo *infos = (const NfaInfo *)((const char *)t + t->nfaInfoOffset); @@ -1801,7 +1792,7 @@ void dumpNfas(const RoseEngine *t, bool dump_raw, const string &base) { if (dump_raw) { stringstream ssraw; ssraw << base << "rose_nfa_" << i << ".raw"; - auto f = openStdioFile(ssraw.str()); + auto f = openStdioFile(ssraw.str(), "w"); fwrite(n, 1, n->length, f.get()); } } @@ -1850,7 +1841,7 @@ void dumpRevNfas(const RoseEngine *t, bool dump_raw, const string &base) { if (dump_raw) { stringstream ssraw; ssraw << base << "som_rev_nfa_" << i << ".raw"; - auto f = openStdioFile(ssraw.str()); + auto f = openStdioFile(ssraw.str(), "w"); fwrite(n, 1, n->length, f.get()); } } @@ -2206,22 +2197,22 @@ void roseDumpPrograms(const vector &fragments, const RoseEngine *t, static void roseDumpLiteralMatchers(const RoseEngine *t, const string &base) { if (const HWLM *ftable = getFloatingMatcher(t)) { - auto f = openStdioFile(base + "/lit_table_floating.txt"); + auto f = openStdioFile(base + "/lit_table_floating.txt", "w"); hwlmPrintStats(ftable, f.get()); } if (const HWLM *drtable = getDelayRebuildMatcher(t)) { - auto f = openStdioFile(base + "/lit_table_delay_rebuild.txt"); + auto f = openStdioFile(base + "/lit_table_delay_rebuild.txt", "w"); hwlmPrintStats(drtable, f.get()); } if (const HWLM *etable = getEodMatcher(t)) { - auto f = openStdioFile(base + "/lit_table_eod.txt"); + auto f = openStdioFile(base + "/lit_table_eod.txt", "w"); hwlmPrintStats(etable, f.get()); } if (const HWLM *sbtable = getSmallBlockMatcher(t)) { - auto f = openStdioFile(base + "/lit_table_small_block.txt"); + auto f = openStdioFile(base + "/lit_table_small_block.txt", "w"); hwlmPrintStats(sbtable, f.get()); } @@ -2237,7 +2228,7 @@ void dumpRose(const RoseBuildImpl &build, const vector &fragments, return; } - auto f = openStdioFile(grey.dumpPath + "/rose.txt"); + auto f = openStdioFile(grey.dumpPath + "/rose.txt", "w"); if (!t) { fprintf(f.get(), "<< no rose >>\n"); @@ -2258,7 +2249,7 @@ void dumpRose(const RoseBuildImpl &build, const vector &fragments, // Literals dumpRoseLiterals(build, fragments, grey); - f = openStdioFile(grey.dumpPath + "/rose_struct.txt"); + f = openStdioFile(grey.dumpPath + "/rose_struct.txt", "w"); roseDumpStructRaw(t, f.get()); } diff --git a/src/util/dump_util.h b/src/util/dump_util.h index f5ebe94a..91aeb7f4 100644 --- a/src/util/dump_util.h +++ b/src/util/dump_util.h @@ -30,6 +30,8 @@ #define DUMP_UTIL #include +#include +#include namespace ue2 { @@ -38,6 +40,20 @@ namespace ue2 { */ FILE *fopen_or_throw(const char *path, const char *mode); +/** + * \brief Helper function: returns a C stdio FILE* handle wrapped in + * a unique_ptr that takes care of closing the file on destruction. + * + * If the file cannot be opened, throws an exception. + */ +inline +std::unique_ptr +openStdioFile(const std::string &filename, const char *mode) { + return std::unique_ptr( + fopen_or_throw(filename.c_str(), mode), &fclose); +} + + } // namespace ue2 #endif From f762fb9af6131d441b1c09763eb75ccae7c28664 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 26 Jun 2017 14:25:44 +1000 Subject: [PATCH 043/190] dump_util: richer StdioFile type --- src/rose/rose_build_dump.cpp | 50 ++++++++++++++++++------------------ src/util/dump_util.h | 24 +++++++++-------- 2 files changed, 39 insertions(+), 35 deletions(-) diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 718596e0..81acec12 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -1682,10 +1682,10 @@ void dumpComponentInfo(const RoseEngine *t, const string &base) { static void dumpComponentInfoCsv(const RoseEngine *t, const string &base) { - auto f = openStdioFile(base + "/rose_components.csv", "w"); + StdioFile f(base + "/rose_components.csv", "w"); - fprintf(f.get(), "Index, Offset,Engine Type,States,Stream State," - "Bytecode Size,Kind,Notes\n"); + fprintf(f, "Index, Offset,Engine Type,States,Stream State," + "Bytecode Size,Kind,Notes\n"); for (u32 i = 0; i < t->queueCount; i++) { const NfaInfo *nfa_info = getNfaInfoByQueue(t, i); @@ -1740,7 +1740,7 @@ void dumpComponentInfoCsv(const RoseEngine *t, const string &base) { } } - fprintf(f.get(), "%u,%zd,\"%s\",%u,%u,%u,%s,%s\n", i, + fprintf(f, "%u,%zd,\"%s\",%u,%u,%u,%s,%s\n", i, (const char *)n - (const char *)t, describe(*n).c_str(), n->nPositions, n->streamStateSize, n->length, to_string(kind).c_str(), notes.str().c_str()); @@ -1749,7 +1749,7 @@ void dumpComponentInfoCsv(const RoseEngine *t, const string &base) { static void dumpExhaust(const RoseEngine *t, const string &base) { - auto f = openStdioFile(base + "/rose_exhaust.csv", "w"); + StdioFile f(base + "/rose_exhaust.csv", "w"); const NfaInfo *infos = (const NfaInfo *)((const char *)t + t->nfaInfoOffset); @@ -1759,7 +1759,7 @@ void dumpExhaust(const RoseEngine *t, const string &base) { for (u32 i = 0; i < queue_count; ++i) { u32 ekey_offset = infos[i].ekeyListOffset; - fprintf(f.get(), "%u (%u):", i, ekey_offset); + fprintf(f, "%u (%u):", i, ekey_offset); if (ekey_offset) { const u32 *ekeys = (const u32 *)((const char *)t + ekey_offset); @@ -1769,11 +1769,11 @@ void dumpExhaust(const RoseEngine *t, const string &base) { if (e == ~0U) { break; } - fprintf(f.get(), " %u", e); + fprintf(f, " %u", e); } } - fprintf(f.get(), "\n"); + fprintf(f, "\n"); } } @@ -1792,8 +1792,8 @@ void dumpNfas(const RoseEngine *t, bool dump_raw, const string &base) { if (dump_raw) { stringstream ssraw; ssraw << base << "rose_nfa_" << i << ".raw"; - auto f = openStdioFile(ssraw.str(), "w"); - fwrite(n, 1, n->length, f.get()); + StdioFile f(ssraw.str(), "w"); + fwrite(n, 1, n->length, f); } } } @@ -1841,8 +1841,8 @@ void dumpRevNfas(const RoseEngine *t, bool dump_raw, const string &base) { if (dump_raw) { stringstream ssraw; ssraw << base << "som_rev_nfa_" << i << ".raw"; - auto f = openStdioFile(ssraw.str(), "w"); - fwrite(n, 1, n->length, f.get()); + StdioFile f(ssraw.str(), "w"); + fwrite(n, 1, n->length, f); } } } @@ -2197,23 +2197,23 @@ void roseDumpPrograms(const vector &fragments, const RoseEngine *t, static void roseDumpLiteralMatchers(const RoseEngine *t, const string &base) { if (const HWLM *ftable = getFloatingMatcher(t)) { - auto f = openStdioFile(base + "/lit_table_floating.txt", "w"); - hwlmPrintStats(ftable, f.get()); + StdioFile f(base + "/lit_table_floating.txt", "w"); + hwlmPrintStats(ftable, f); } if (const HWLM *drtable = getDelayRebuildMatcher(t)) { - auto f = openStdioFile(base + "/lit_table_delay_rebuild.txt", "w"); - hwlmPrintStats(drtable, f.get()); + StdioFile f(base + "/lit_table_delay_rebuild.txt", "w"); + hwlmPrintStats(drtable, f); } if (const HWLM *etable = getEodMatcher(t)) { - auto f = openStdioFile(base + "/lit_table_eod.txt", "w"); - hwlmPrintStats(etable, f.get()); + StdioFile f(base + "/lit_table_eod.txt", "w"); + hwlmPrintStats(etable, f); } if (const HWLM *sbtable = getSmallBlockMatcher(t)) { - auto f = openStdioFile(base + "/lit_table_small_block.txt", "w"); - hwlmPrintStats(sbtable, f.get()); + StdioFile f(base + "/lit_table_small_block.txt", "w"); + hwlmPrintStats(sbtable, f); } } @@ -2228,15 +2228,15 @@ void dumpRose(const RoseBuildImpl &build, const vector &fragments, return; } - auto f = openStdioFile(grey.dumpPath + "/rose.txt", "w"); + StdioFile f(grey.dumpPath + "/rose.txt", "w"); if (!t) { - fprintf(f.get(), "<< no rose >>\n"); + fprintf(f, "<< no rose >>\n"); return; } // Dump Rose table info - roseDumpText(t, f.get()); + roseDumpText(t, f); roseDumpComponents(t, false, grey.dumpPath); roseDumpPrograms(fragments, t, grey.dumpPath); @@ -2249,8 +2249,8 @@ void dumpRose(const RoseBuildImpl &build, const vector &fragments, // Literals dumpRoseLiterals(build, fragments, grey); - f = openStdioFile(grey.dumpPath + "/rose_struct.txt", "w"); - roseDumpStructRaw(t, f.get()); + f = StdioFile(grey.dumpPath + "/rose_struct.txt", "w"); + roseDumpStructRaw(t, f); } } // namespace ue2 diff --git a/src/util/dump_util.h b/src/util/dump_util.h index 91aeb7f4..dc352c28 100644 --- a/src/util/dump_util.h +++ b/src/util/dump_util.h @@ -29,6 +29,8 @@ #ifndef DUMP_UTIL #define DUMP_UTIL +#include "noncopyable.h" + #include #include #include @@ -41,18 +43,20 @@ namespace ue2 { FILE *fopen_or_throw(const char *path, const char *mode); /** - * \brief Helper function: returns a C stdio FILE* handle wrapped in - * a unique_ptr that takes care of closing the file on destruction. - * - * If the file cannot be opened, throws an exception. + * \brief Helper class: wraps C stdio FILE* handle and takes care of closing + * the file on destruction. */ -inline -std::unique_ptr -openStdioFile(const std::string &filename, const char *mode) { - return std::unique_ptr( - fopen_or_throw(filename.c_str(), mode), &fclose); -} +class StdioFile : noncopyable { +public: + StdioFile(const std::string &filename, const char *mode) + : handle(fopen_or_throw(filename.c_str(), mode), &fclose) {} + // Implicit conversion to FILE* for use by stdio calls. + operator FILE *() { return handle.get(); } + +private: + std::unique_ptr handle; +}; } // namespace ue2 From cbcc46444b39706cd0b4b6eeb5c6860be2aaca1d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 26 Jun 2017 11:15:29 +1000 Subject: [PATCH 044/190] fdr/teddy: dump confirm lit load --- src/fdr/fdr_dump.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/fdr/fdr_dump.cpp b/src/fdr/fdr_dump.cpp index ae81f257..0a4d7415 100644 --- a/src/fdr/fdr_dump.cpp +++ b/src/fdr/fdr_dump.cpp @@ -45,7 +45,7 @@ #error No dump support! #endif -using std::unique_ptr; +using namespace std; namespace ue2 { @@ -60,6 +60,17 @@ bool fdrIsTeddy(const FDR *fdr) { return !getFdrDescription(engine); } +static +void dumpLitIndex(const FDRConfirm *fdrc, FILE *f) { + const u32 *lit_index = getConfirmLitIndex(fdrc); + u32 num_lits = 1U << fdrc->nBits; + u32 lits_used = count_if(lit_index, lit_index + num_lits, + [](u32 idx) { return idx != 0; }); + + fprintf(f, " load %u/%u (%0.2f%%)\n", lits_used, num_lits, + (double)lits_used / (double)(num_lits)*100); +} + static void dumpConfirms(const void *fdr_base, u32 conf_offset, u32 num_confirms, FILE *f) { @@ -71,6 +82,7 @@ void dumpConfirms(const void *fdr_base, u32 conf_offset, u32 num_confirms, fprintf(f, " mult 0x%016llx\n", fdrc->mult); fprintf(f, " nbits %u\n", fdrc->nBits); fprintf(f, " groups 0x%016llx\n", fdrc->groups); + dumpLitIndex(fdrc, f); } } From bf3ced92f4a1a3dc230c604ca99564debbe3fffa Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 26 Jun 2017 14:39:46 +1000 Subject: [PATCH 045/190] hwlm_dump: take base filename, like NFA dump API --- src/hwlm/hwlm_dump.cpp | 11 +++++++---- src/hwlm/hwlm_dump.h | 10 +++++----- src/rose/rose_build_dump.cpp | 21 ++++++++------------- 3 files changed, 20 insertions(+), 22 deletions(-) diff --git a/src/hwlm/hwlm_dump.cpp b/src/hwlm/hwlm_dump.cpp index 58411ab2..59353eee 100644 --- a/src/hwlm/hwlm_dump.cpp +++ b/src/hwlm/hwlm_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,16 +38,19 @@ #include "ue2common.h" #include "fdr/fdr_dump.h" #include "nfa/accel_dump.h" - -#include +#include "util/dump_util.h" #ifndef DUMP_SUPPORT #error No dump support! #endif +using namespace std; + namespace ue2 { -void hwlmPrintStats(const HWLM *h, FILE *f) { +void hwlmGenerateDumpFiles(const HWLM *h, const string &base) { + StdioFile f(base + ".txt", "w"); + switch (h->type) { case HWLM_ENGINE_NOOD: noodPrintStats((const noodTable *)HWLM_C_DATA(h), f); diff --git a/src/hwlm/hwlm_dump.h b/src/hwlm/hwlm_dump.h index e7e38353..12f61c86 100644 --- a/src/hwlm/hwlm_dump.h +++ b/src/hwlm/hwlm_dump.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,16 +35,16 @@ #ifdef DUMP_SUPPORT -#include +#include struct HWLM; namespace ue2 { /** \brief Dump some information about the give HWLM structure. */ -void hwlmPrintStats(const HWLM *h, FILE *f); +void hwlmGenerateDumpFiles(const HWLM *h, const std::string &base); } // namespace ue2 -#endif -#endif +#endif // DUMP_SUPPORT +#endif // HWLM_DUMP_H diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 81acec12..5e9f95f2 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -2196,26 +2196,21 @@ void roseDumpPrograms(const vector &fragments, const RoseEngine *t, static void roseDumpLiteralMatchers(const RoseEngine *t, const string &base) { - if (const HWLM *ftable = getFloatingMatcher(t)) { - StdioFile f(base + "/lit_table_floating.txt", "w"); - hwlmPrintStats(ftable, f); + if (const HWLM *hwlm = getFloatingMatcher(t)) { + hwlmGenerateDumpFiles(hwlm, base + "/lit_table_floating"); } - if (const HWLM *drtable = getDelayRebuildMatcher(t)) { - StdioFile f(base + "/lit_table_delay_rebuild.txt", "w"); - hwlmPrintStats(drtable, f); + if (const HWLM *hwlm = getDelayRebuildMatcher(t)) { + hwlmGenerateDumpFiles(hwlm, base + "/lit_table_delay_rebuild"); } - if (const HWLM *etable = getEodMatcher(t)) { - StdioFile f(base + "/lit_table_eod.txt", "w"); - hwlmPrintStats(etable, f); + if (const HWLM *hwlm = getEodMatcher(t)) { + hwlmGenerateDumpFiles(hwlm, base + "/lit_table_eod"); } - if (const HWLM *sbtable = getSmallBlockMatcher(t)) { - StdioFile f(base + "/lit_table_small_block.txt", "w"); - hwlmPrintStats(sbtable, f); + if (const HWLM *hwlm = getSmallBlockMatcher(t)) { + hwlmGenerateDumpFiles(hwlm, base + "/lit_table_small_block"); } - } void dumpRose(const RoseBuildImpl &build, const vector &fragments, From 4be7d6fecc20ed51b0821ff764d0352a02699a35 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 30 May 2017 16:12:41 +1000 Subject: [PATCH 046/190] noodle: Use a sane temp buf for streaming --- src/hwlm/hwlm.c | 3 +-- src/hwlm/noodle_engine.c | 7 ++++--- src/hwlm/noodle_engine.h | 5 ++--- src/scratch.h | 2 -- 4 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/hwlm/hwlm.c b/src/hwlm/hwlm.c index 6eaa7ed1..37e56ae0 100644 --- a/src/hwlm/hwlm.c +++ b/src/hwlm/hwlm.c @@ -228,8 +228,7 @@ hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch, cb, ctxt); } else { return noodExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, cb, - ctxt, scratch->fdr_temp_buf, - FDR_TEMP_BUF_SIZE); + ctxt); } } else { // t->type == HWLM_ENGINE_FDR diff --git a/src/hwlm/noodle_engine.c b/src/hwlm/noodle_engine.c index 9758f42b..0370ef31 100644 --- a/src/hwlm/noodle_engine.c +++ b/src/hwlm/noodle_engine.c @@ -370,8 +370,7 @@ hwlm_error_t noodExec(const struct noodTable *n, const u8 *buf, size_t len, /** \brief Streaming-mode scanner. */ hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf, size_t hlen, const u8 *buf, size_t len, - HWLMCallback cb, void *ctxt, u8 *temp_buf, - UNUSED size_t temp_buffer_size) { + HWLMCallback cb, void *ctxt) { assert(n); struct cb_info cbi = {cb, n->id, ctxt, 0}; @@ -380,10 +379,12 @@ hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf, if (hlen) { assert(hbuf); + u8 ALIGN_DIRECTIVE temp_buf[16]; // HWLM_LITERAL_MAX_LEN * 2 + size_t tl1 = MIN(n->len - 1, hlen); size_t tl2 = MIN(n->len - 1, len); size_t temp_len = tl1 + tl2; - assert(temp_len < temp_buffer_size); + assert(temp_len < sizeof(temp_buf)); memcpy(temp_buf, hbuf + hlen - tl1, tl1); memcpy(temp_buf + tl1, buf, tl2); diff --git a/src/hwlm/noodle_engine.h b/src/hwlm/noodle_engine.h index e044a863..597a7bbc 100644 --- a/src/hwlm/noodle_engine.h +++ b/src/hwlm/noodle_engine.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -49,8 +49,7 @@ hwlm_error_t noodExec(const struct noodTable *n, const u8 *buf, size_t len, /** \brief Streaming-mode scanner. */ hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf, size_t hlen, const u8 *buf, size_t len, - HWLMCallback cb, void *ctxt, u8 *temp_buf, - size_t temp_buffer_size); + HWLMCallback cb, void *ctxt); #ifdef __cplusplus } /* extern "C" */ diff --git a/src/scratch.h b/src/scratch.h index 47f8afa8..1d4b849e 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -45,7 +45,6 @@ extern "C" #endif UNUSED static const u32 SCRATCH_MAGIC = 0x544F4259; -#define FDR_TEMP_BUF_SIZE 222 struct fatbit; struct hs_scratch; @@ -201,7 +200,6 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { u32 delay_fatbit_size; /**< size of each delay fatbit in bytes */ u32 scratchSize; char *scratch_alloc; /* user allocated scratch object */ - u8 ALIGN_DIRECTIVE fdr_temp_buf[FDR_TEMP_BUF_SIZE]; }; /* array of fatbit ptr; TODO: why not an array of fatbits? */ From 293f9fcc495a9ba139c58e852f0d7e7bf2647f3c Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 30 May 2017 16:26:13 +1000 Subject: [PATCH 047/190] noodle: we don't need memcpy --- src/hwlm/noodle_engine.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/hwlm/noodle_engine.c b/src/hwlm/noodle_engine.c index 0370ef31..4897eb09 100644 --- a/src/hwlm/noodle_engine.c +++ b/src/hwlm/noodle_engine.c @@ -39,6 +39,7 @@ #include "util/intrinsics.h" #include "util/join.h" #include "util/masked_move.h" +#include "util/partial_store.h" #include "util/simd_utils.h" #include @@ -385,8 +386,11 @@ hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf, size_t tl2 = MIN(n->len - 1, len); size_t temp_len = tl1 + tl2; assert(temp_len < sizeof(temp_buf)); - memcpy(temp_buf, hbuf + hlen - tl1, tl1); - memcpy(temp_buf + tl1, buf, tl2); + assert(tl1 <= sizeof(u64a)); + assert(tl2 <= sizeof(u64a)); + unaligned_store_u64a(temp_buf, + partial_load_u64a(hbuf + hlen - tl1, tl1)); + unaligned_store_u64a(temp_buf + tl1, partial_load_u64a(buf, tl2)); cbi.offsetAdj = -tl1; rv = scan(temp_buf, temp_len, n->str, n->len, n->key_offset, n->nocase, From 9c538a7522da22d0b39eb96026d7862c23256f01 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Mon, 26 Jun 2017 10:15:49 +1000 Subject: [PATCH 048/190] Move hwlm literal len define --- src/hwlm/hwlm.h | 5 ++++- src/hwlm/hwlm_literal.h | 3 --- src/hwlm/noodle_engine.c | 3 ++- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/hwlm/hwlm.h b/src/hwlm/hwlm.h index a17575df..00561346 100644 --- a/src/hwlm/hwlm.h +++ b/src/hwlm/hwlm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -71,6 +71,9 @@ typedef hwlm_group_t hwlmcb_rv_t; * designed for a different architecture). */ #define HWLM_ERROR_UNKNOWN 2 +/** \brief Max length of the literal passed to HWLM. */ +#define HWLM_LITERAL_MAX_LEN 8 + struct hs_scratch; struct HWLM; diff --git a/src/hwlm/hwlm_literal.h b/src/hwlm/hwlm_literal.h index 0e2a1ea5..9ae7744d 100644 --- a/src/hwlm/hwlm_literal.h +++ b/src/hwlm/hwlm_literal.h @@ -42,9 +42,6 @@ namespace ue2 { -/** \brief Max length of the literal passed to HWLM. */ -#define HWLM_LITERAL_MAX_LEN 8 - /** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */ #define HWLM_MASKLEN 8 diff --git a/src/hwlm/noodle_engine.c b/src/hwlm/noodle_engine.c index 4897eb09..cdc07dfc 100644 --- a/src/hwlm/noodle_engine.c +++ b/src/hwlm/noodle_engine.c @@ -380,7 +380,8 @@ hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf, if (hlen) { assert(hbuf); - u8 ALIGN_DIRECTIVE temp_buf[16]; // HWLM_LITERAL_MAX_LEN * 2 + u8 ALIGN_DIRECTIVE temp_buf[HWLM_LITERAL_MAX_LEN * 2]; + memset(temp_buf, 0, sizeof(temp_buf)); size_t tl1 = MIN(n->len - 1, hlen); size_t tl2 = MIN(n->len - 1, len); From 31a445a0e8d7c9e6b763783bfe2f13d89a4dd4e3 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 30 May 2017 15:54:51 +1000 Subject: [PATCH 049/190] noodle: behave like our other literal matchers Noodle now supports supplementary masks. --- src/hwlm/hwlm.c | 50 +++---- src/hwlm/hwlm_build.cpp | 5 - src/hwlm/noodle_build.cpp | 87 ++++++++--- src/hwlm/noodle_engine.c | 246 ++++++++++++++++++-------------- src/hwlm/noodle_engine_avx2.c | 43 +++--- src/hwlm/noodle_engine_avx512.c | 42 +++--- src/hwlm/noodle_engine_sse.c | 45 +++--- src/hwlm/noodle_internal.h | 21 +-- 8 files changed, 307 insertions(+), 232 deletions(-) diff --git a/src/hwlm/hwlm.c b/src/hwlm/hwlm.c index 37e56ae0..4af987c5 100644 --- a/src/hwlm/hwlm.c +++ b/src/hwlm/hwlm.c @@ -184,20 +184,18 @@ hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len, if (t->type == HWLM_ENGINE_NOOD) { DEBUG_PRINTF("calling noodExec\n"); - return noodExec(HWLM_C_DATA(t), buf + start, len - start, start, cb, - ctxt); - } else { - assert(t->type == HWLM_ENGINE_FDR); - const union AccelAux *aa = &t->accel0; - if ((groups & ~t->accel1_groups) == 0) { - DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type); - aa = &t->accel1; - } - do_accel_block(aa, buf, len, &start); - DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups, - start); - return fdrExec(HWLM_C_DATA(t), buf, len, start, cb, ctxt, groups); + return noodExec(HWLM_C_DATA(t), buf, len, start, cb, ctxt); } + + assert(t->type == HWLM_ENGINE_FDR); + const union AccelAux *aa = &t->accel0; + if ((groups & ~t->accel1_groups) == 0) { + DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type); + aa = &t->accel1; + } + do_accel_block(aa, buf, len, &start); + DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups, start); + return fdrExec(HWLM_C_DATA(t), buf, len, start, cb, ctxt, groups); } hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch, @@ -224,23 +222,21 @@ hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch, // If we've been handed a start offset, we can use a block mode scan at // that offset. if (start) { - return noodExec(HWLM_C_DATA(t), buf + start, len - start, start, - cb, ctxt); + return noodExec(HWLM_C_DATA(t), buf, len, start, cb, ctxt); } else { return noodExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, cb, ctxt); } - } else { - // t->type == HWLM_ENGINE_FDR - const union AccelAux *aa = &t->accel0; - if ((groups & ~t->accel1_groups) == 0) { - DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type); - aa = &t->accel1; - } - do_accel_streaming(aa, hbuf, hlen, buf, len, &start); - DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups, - start); - return fdrExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, - start, cb, ctxt, groups); } + + assert(t->type == HWLM_ENGINE_FDR); + const union AccelAux *aa = &t->accel0; + if ((groups & ~t->accel1_groups) == 0) { + DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type); + aa = &t->accel1; + } + do_accel_streaming(aa, hbuf, hlen, buf, len, &start); + DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups, start); + return fdrExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, start, cb, + ctxt, groups); } diff --git a/src/hwlm/hwlm_build.cpp b/src/hwlm/hwlm_build.cpp index 2f61ea6d..c2db5480 100644 --- a/src/hwlm/hwlm_build.cpp +++ b/src/hwlm/hwlm_build.cpp @@ -89,11 +89,6 @@ bool isNoodleable(const vector &lits, return false; } - if (!lits.front().msk.empty()) { - DEBUG_PRINTF("noodle can't handle supplementary masks\n"); - return false; - } - return true; } diff --git a/src/hwlm/noodle_build.cpp b/src/hwlm/noodle_build.cpp index 63fdf072..4a6ac8d7 100644 --- a/src/hwlm/noodle_build.cpp +++ b/src/hwlm/noodle_build.cpp @@ -35,14 +35,33 @@ #include "hwlm_literal.h" #include "noodle_internal.h" +#include "util/bitutils.h" #include "util/compare.h" #include "util/verify_types.h" #include "ue2common.h" #include // for memcpy +#include + +using std::vector; namespace ue2 { +static +u64a make_u64a_mask(const vector &v) { + assert(v.size() <= sizeof(u64a)); + if (v.size() > sizeof(u64a)) { + throw std::exception(); + } + + u64a mask = 0; + size_t len = v.size(); + unsigned char *m = (unsigned char *)&mask; + DEBUG_PRINTF("making mask len %zu\n", len); + memcpy(m, &v[0], len); + return mask; +} + static size_t findNoodFragOffset(const hwlmLiteral &lit) { const auto &s = lit.s; @@ -67,30 +86,60 @@ size_t findNoodFragOffset(const hwlmLiteral &lit) { } bytecode_ptr noodBuildTable(const hwlmLiteral &lit) { - if (!lit.msk.empty()) { - DEBUG_PRINTF("noodle can't handle supplementary masks\n"); - return nullptr; + const auto &s = lit.s; + + size_t mask_len = std::max(s.length(), lit.msk.size()); + DEBUG_PRINTF("mask is %zu bytes\n", lit.msk.size()); + assert(mask_len <= 8); + assert(lit.msk.size() == lit.cmp.size()); + + vector n_msk(mask_len); + vector n_cmp(mask_len); + + for (unsigned i = mask_len - lit.msk.size(), j = 0; i < mask_len; + i++, j++) { + DEBUG_PRINTF("m[%u] %hhx c[%u] %hhx\n", i, lit.msk[j], i, lit.cmp[j]); + n_msk[i] = lit.msk[j]; + n_cmp[i] = lit.cmp[j]; } - const auto &s = lit.s; - size_t noodle_len = sizeof(noodTable) + s.length(); - auto n = make_zeroed_bytecode_ptr(noodle_len); + size_t s_off = mask_len - s.length(); + for (unsigned i = s_off; i < mask_len; i++) { + u8 c = s[i - s_off]; + u8 si_msk = lit.nocase && ourisalpha(c) ? (u8)CASE_CLEAR : (u8)0xff; + n_msk[i] |= si_msk; + n_cmp[i] |= c & si_msk; + assert((n_cmp[i] & si_msk) == c); + DEBUG_PRINTF("m[%u] %hhx c[%u] %hhx '%c'\n", i, n_msk[i], i, n_cmp[i], + ourisprint(c) ? (char)c : '.'); + } + + auto n = make_zeroed_bytecode_ptr(sizeof(noodTable)); assert(n); + DEBUG_PRINTF("size of nood %zu\n", sizeof(noodTable)); size_t key_offset = findNoodFragOffset(lit); n->id = lit.id; - n->len = verify_u32(s.length()); - n->key_offset = verify_u32(key_offset); + n->lit_len = s.length(); + n->single = s.length() == 1 ? 1 : 0; + n->key_offset = verify_u8(n->lit_len - key_offset); n->nocase = lit.nocase ? 1 : 0; - memcpy(n->str, s.c_str(), s.length()); + n->key0 = s[key_offset]; + if (n->single) { + n->key1 = 0; + } else { + n->key1 = s[key_offset + 1]; + } + n->msk = make_u64a_mask(n_msk); + n->cmp = make_u64a_mask(n_cmp); + n->msk_len = mask_len; return n; } -size_t noodSize(const noodTable *n) { - assert(n); // shouldn't call with null - return sizeof(*n) + n->len; +size_t noodSize(const noodTable *) { + return sizeof(noodTable); } } // namespace ue2 @@ -102,13 +151,17 @@ namespace ue2 { void noodPrintStats(const noodTable *n, FILE *f) { fprintf(f, "Noodle table\n"); - fprintf(f, "Len: %u Key Offset: %u\n", n->len, n->key_offset); + fprintf(f, "Len: %u Key Offset: %u\n", n->lit_len, n->key_offset); + fprintf(f, "Msk: %llx Cmp: %llx MskLen %u\n", + n->msk >> 8 * (8 - n->msk_len), n->cmp >> 8 * (8 - n->msk_len), + n->msk_len); fprintf(f, "String: "); - for (u32 i = 0; i < n->len; i++) { - if (isgraph(n->str[i]) && n->str[i] != '\\') { - fprintf(f, "%c", n->str[i]); + for (u32 i = n->msk_len - n->lit_len; i < n->msk_len; i++) { + const u8 *m = (const u8 *)&n->cmp; + if (isgraph(m[i]) && m[i] != '\\') { + fprintf(f, "%c", m[i]); } else { - fprintf(f, "\\x%02hhx", n->str[i]); + fprintf(f, "\\x%02hhx", m[i]); } } fprintf(f, "\n"); diff --git a/src/hwlm/noodle_engine.c b/src/hwlm/noodle_engine.c index cdc07dfc..ba8d6913 100644 --- a/src/hwlm/noodle_engine.c +++ b/src/hwlm/noodle_engine.c @@ -84,9 +84,8 @@ struct cb_info { while (unlikely(z)) { \ Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); \ size_t matchPos = d - buf + pos; \ - DEBUG_PRINTF("match pos %zu\n", matchPos); \ - hwlmcb_rv_t rv = final(buf, len, key, 1, 0, 0, noCase, cbi, \ - matchPos); \ + DEBUG_PRINTF("match pos %zu\n", matchPos); \ + hwlmcb_rv_t rv = final(n, buf, len, 1, cbi, matchPos); \ RETURN_IF_TERMINATED(rv); \ } \ } while (0) @@ -96,9 +95,8 @@ struct cb_info { while (unlikely(z)) { \ Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); \ size_t matchPos = d - buf + pos - 1; \ - DEBUG_PRINTF("match pos %zu\n", matchPos); \ - hwlmcb_rv_t rv = final(buf, len, key, keyLen, keyOffset, 1, \ - noCase, cbi, matchPos); \ + DEBUG_PRINTF("match pos %zu\n", matchPos); \ + hwlmcb_rv_t rv = final(n, buf, len, 0, cbi, matchPos); \ RETURN_IF_TERMINATED(rv); \ } \ } while (0) @@ -112,21 +110,28 @@ u8 caseClear8(u8 x, bool noCase) { // is used only for single chars with case insensitivity used correctly, // so it can go straight to the callback if we get this far. static really_inline -hwlm_error_t final(const u8 *buf, size_t len, const u8 *key, size_t keyLen, - size_t keyOffset, bool is_double, bool noCase, - const struct cb_info *cbi, size_t pos) { - pos -= keyOffset; - if (is_double) { - if (pos + keyLen > len) { - return HWLM_SUCCESS; - } - if (cmpForward(buf + pos, key, keyLen, noCase)) { // ret 1 on mismatch - return HWLM_SUCCESS; +hwlm_error_t final(const struct noodTable *n, const u8 *buf, UNUSED size_t len, + char single, const struct cb_info *cbi, size_t pos) { + if (single) { + if (n->msk_len == 1) { + goto match; } } - pos += cbi->offsetAdj; - DEBUG_PRINTF("match @ %zu->%zu\n", pos, (pos + keyLen - 1)); - hwlmcb_rv_t rv = cbi->cb(pos, (pos + keyLen - 1), cbi->id, cbi->ctx); + assert(len >= n->msk_len); + u64a v = + partial_load_u64a(buf + pos + n->key_offset - n->msk_len, n->msk_len); + DEBUG_PRINTF("v %016llx msk %016llx cmp %016llx\n", v, n->msk, n->cmp); + if ((v & n->msk) != n->cmp) { + /* mask didn't match */ + return HWLM_SUCCESS; + } + +match: + pos -= cbi->offsetAdj; + DEBUG_PRINTF("match @ %zu->%zu\n", pos + n->key_offset - n->lit_len, + pos + n->key_offset); + hwlmcb_rv_t rv = cbi->cb(pos + n->key_offset - n->lit_len, + pos + n->key_offset - 1, cbi->id, cbi->ctx); if (rv == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATED; } @@ -148,38 +153,43 @@ hwlm_error_t final(const u8 *buf, size_t len, const u8 *key, size_t keyLen, #endif static really_inline -hwlm_error_t scanSingleMain(const u8 *buf, size_t len, const u8 *key, - bool noCase, const struct cb_info *cbi) { +hwlm_error_t scanSingleMain(const struct noodTable *n, const u8 *buf, + size_t len, size_t start, bool noCase, + const struct cb_info *cbi) { - const MASK_TYPE mask1 = getMask(key[0], noCase); + const MASK_TYPE mask1 = getMask(n->key0, noCase); const MASK_TYPE caseMask = getCaseMask(); + size_t offset = start + n->msk_len - 1; + size_t end = len; + assert(offset < end); + #if !defined(HAVE_AVX512) hwlm_error_t rv; - size_t end = len; - if (len < CHUNKSIZE) { - rv = scanSingleShort(buf, len, key, noCase, caseMask, mask1, cbi, 0, len); + if (end - offset < CHUNKSIZE) { + rv = scanSingleShort(n, buf, len, noCase, caseMask, mask1, cbi, offset, + end); return rv; } - if (len == CHUNKSIZE) { - rv = scanSingleUnaligned(buf, len, 0, key, noCase, caseMask, mask1, cbi, - 0, len); + if (end - offset == CHUNKSIZE) { + rv = scanSingleUnaligned(n, buf, len, 0, noCase, caseMask, mask1, cbi, + offset, end); return rv; } uintptr_t data = (uintptr_t)buf; - uintptr_t s2Start = ROUNDUP_N(data, CHUNKSIZE) - data; + uintptr_t s2Start = ROUNDUP_N(data + offset, CHUNKSIZE) - data; uintptr_t last = data + end; uintptr_t s2End = ROUNDDOWN_N(last, CHUNKSIZE) - data; - uintptr_t s3Start = len - CHUNKSIZE; + uintptr_t s3Start = end - CHUNKSIZE; - if (s2Start) { + if (offset != s2Start) { // first scan out to the fast scan starting point DEBUG_PRINTF("stage 1: -> %zu\n", s2Start); - rv = scanSingleUnaligned(buf, len, 0, key, noCase, caseMask, mask1, cbi, - 0, s2Start); + rv = scanSingleUnaligned(n, buf, len, 0, noCase, caseMask, mask1, cbi, + offset, s2Start); RETURN_IF_TERMINATED(rv); } @@ -187,68 +197,70 @@ hwlm_error_t scanSingleMain(const u8 *buf, size_t len, const u8 *key, // scan as far as we can, bounded by the last point this key can // possibly match DEBUG_PRINTF("fast: ~ %zu -> %zu\n", s2Start, s2End); - rv = scanSingleFast(buf, len, key, noCase, caseMask, mask1, cbi, - s2Start, s2End); + rv = scanSingleFast(n, buf, len, noCase, caseMask, mask1, cbi, s2Start, + s2End); RETURN_IF_TERMINATED(rv); } // if we are done bail out - if (s2End == end) { + if (s2End == len) { return HWLM_SUCCESS; } - DEBUG_PRINTF("stage 3: %zu -> %zu\n", s2End, end); - rv = scanSingleUnaligned(buf, len, s3Start, key, noCase, caseMask, mask1, - cbi, s2End, end); + DEBUG_PRINTF("stage 3: %zu -> %zu\n", s2End, len); + rv = scanSingleUnaligned(n, buf, len, s3Start, noCase, caseMask, mask1, cbi, + s2End, len); return rv; #else // HAVE_AVX512 - return scanSingle512(buf, len, key, noCase, caseMask, mask1, cbi); + return scanSingle512(n, buf, len, noCase, caseMask, mask1, cbi, offset, + end); #endif } static really_inline -hwlm_error_t scanDoubleMain(const u8 *buf, size_t len, const u8 *key, - size_t keyLen, size_t keyOffset, bool noCase, +hwlm_error_t scanDoubleMain(const struct noodTable *n, const u8 *buf, + size_t len, size_t start, bool noCase, const struct cb_info *cbi) { // we stop scanning for the key-fragment when the rest of the key can't // possibly fit in the remaining buffer - size_t end = len - keyLen + keyOffset + 2; + size_t end = len - n->key_offset + 2; + + // the first place the key can match + size_t offset = start + n->msk_len - n->key_offset; const MASK_TYPE caseMask = getCaseMask(); - const MASK_TYPE mask1 = getMask(key[keyOffset + 0], noCase); - const MASK_TYPE mask2 = getMask(key[keyOffset + 1], noCase); + const MASK_TYPE mask1 = getMask(n->key0, noCase); + const MASK_TYPE mask2 = getMask(n->key1, noCase); #if !defined(HAVE_AVX512) hwlm_error_t rv; - if (end - keyOffset < CHUNKSIZE) { - rv = scanDoubleShort(buf, len, key, keyLen, keyOffset, noCase, caseMask, - mask1, mask2, cbi, keyOffset, end); + if (end - offset < CHUNKSIZE) { + rv = scanDoubleShort(n, buf, len, noCase, caseMask, mask1, mask2, cbi, + offset, end); return rv; } - if (end - keyOffset == CHUNKSIZE) { - rv = scanDoubleUnaligned(buf, len, keyOffset, key, keyLen, keyOffset, - noCase, caseMask, mask1, mask2, cbi, keyOffset, - end); + if (end - offset == CHUNKSIZE) { + rv = scanDoubleUnaligned(n, buf, len, offset, noCase, caseMask, mask1, + mask2, cbi, offset, end); return rv; } uintptr_t data = (uintptr_t)buf; - uintptr_t s2Start = ROUNDUP_N(data + keyOffset, CHUNKSIZE) - data; + uintptr_t s2Start = ROUNDUP_N(data + offset, CHUNKSIZE) - data; uintptr_t s1End = s2Start + 1; uintptr_t last = data + end; uintptr_t s2End = ROUNDDOWN_N(last, CHUNKSIZE) - data; uintptr_t s3Start = end - CHUNKSIZE; - uintptr_t off = keyOffset; + uintptr_t off = offset; - if (s2Start != keyOffset) { + if (s2Start != off) { // first scan out to the fast scan starting point plus one char past to // catch the key on the overlap - DEBUG_PRINTF("stage 1: -> %zu\n", s2Start); - rv = scanDoubleUnaligned(buf, len, keyOffset, key, keyLen, keyOffset, - noCase, caseMask, mask1, mask2, cbi, off, - s1End); + DEBUG_PRINTF("stage 1: %zu -> %zu\n", off, s2Start); + rv = scanDoubleUnaligned(n, buf, len, offset, noCase, caseMask, mask1, + mask2, cbi, off, s1End); RETURN_IF_TERMINATED(rv); } off = s1End; @@ -262,8 +274,8 @@ hwlm_error_t scanDoubleMain(const u8 *buf, size_t len, const u8 *key, // scan as far as we can, bounded by the last point this key can // possibly match DEBUG_PRINTF("fast: ~ %zu -> %zu\n", s2Start, s3Start); - rv = scanDoubleFast(buf, len, key, keyLen, keyOffset, noCase, caseMask, - mask1, mask2, cbi, s2Start, s2End); + rv = scanDoubleFast(n, buf, len, noCase, caseMask, mask1, mask2, cbi, + s2Start, s2End); RETURN_IF_TERMINATED(rv); off = s2End; } @@ -274,98 +286,101 @@ hwlm_error_t scanDoubleMain(const u8 *buf, size_t len, const u8 *key, } DEBUG_PRINTF("stage 3: %zu -> %zu\n", s3Start, end); - rv = scanDoubleUnaligned(buf, len, s3Start, key, keyLen, keyOffset, noCase, - caseMask, mask1, mask2, cbi, off, end); + rv = scanDoubleUnaligned(n, buf, len, s3Start, noCase, caseMask, mask1, + mask2, cbi, off, end); return rv; #else // AVX512 - return scanDouble512(buf, len, key, keyLen, keyOffset, noCase, caseMask, - mask1, mask2, cbi, keyOffset, end); + return scanDouble512(n, buf, len, noCase, caseMask, mask1, mask2, cbi, + offset, end); #endif // AVX512 } static really_inline -hwlm_error_t scanSingleNoCase(const u8 *buf, size_t len, const u8 *key, +hwlm_error_t scanSingleNoCase(const struct noodTable *n, const u8 *buf, + size_t len, size_t start, const struct cb_info *cbi) { - return scanSingleMain(buf, len, key, 1, cbi); + return scanSingleMain(n, buf, len, start, 1, cbi); } static really_inline -hwlm_error_t scanSingleCase(const u8 *buf, size_t len, const u8 *key, +hwlm_error_t scanSingleCase(const struct noodTable *n, const u8 *buf, + size_t len, size_t start, const struct cb_info *cbi) { - return scanSingleMain(buf, len, key, 0, cbi); + return scanSingleMain(n, buf, len, start, 0, cbi); } // Single-character specialisation, used when keyLen = 1 static really_inline -hwlm_error_t scanSingle(const u8 *buf, size_t len, const u8 *key, bool noCase, - const struct cb_info *cbi) { - if (!ourisalpha(key[0])) { +hwlm_error_t scanSingle(const struct noodTable *n, const u8 *buf, size_t len, + size_t start, bool noCase, const struct cb_info *cbi) { + if (!ourisalpha(n->key0)) { noCase = 0; // force noCase off if we don't have an alphabetic char } // kinda ugly, but this forces constant propagation if (noCase) { - return scanSingleNoCase(buf, len, key, cbi); + return scanSingleNoCase(n, buf, len, start, cbi); } else { - return scanSingleCase(buf, len, key, cbi); + return scanSingleCase(n, buf, len, start, cbi); } } static really_inline -hwlm_error_t scanDoubleNoCase(const u8 *buf, size_t len, const u8 *key, - size_t keyLen, size_t keyOffset, +hwlm_error_t scanDoubleNoCase(const struct noodTable *n, const u8 *buf, + size_t len, size_t start, const struct cb_info *cbi) { - return scanDoubleMain(buf, len, key, keyLen, keyOffset, 1, cbi); + return scanDoubleMain(n, buf, len, start, 1, cbi); } static really_inline -hwlm_error_t scanDoubleCase(const u8 *buf, size_t len, const u8 *key, - size_t keyLen, size_t keyOffset, +hwlm_error_t scanDoubleCase(const struct noodTable *n, const u8 *buf, + size_t len, size_t start, const struct cb_info *cbi) { - return scanDoubleMain(buf, len, key, keyLen, keyOffset, 0, cbi); + return scanDoubleMain(n, buf, len, start, 0, cbi); } static really_inline -hwlm_error_t scanDouble(const u8 *buf, size_t len, const u8 *key, size_t keyLen, - size_t keyOffset, bool noCase, - const struct cb_info *cbi) { +hwlm_error_t scanDouble(const struct noodTable *n, const u8 *buf, size_t len, + size_t start, bool noCase, const struct cb_info *cbi) { // kinda ugly, but this forces constant propagation if (noCase) { - return scanDoubleNoCase(buf, len, key, keyLen, keyOffset, cbi); + return scanDoubleNoCase(n, buf, len, start, cbi); } else { - return scanDoubleCase(buf, len, key, keyLen, keyOffset, cbi); + return scanDoubleCase(n, buf, len, start, cbi); } } // main entry point for the scan code static really_inline -hwlm_error_t scan(const u8 *buf, size_t len, const u8 *key, size_t keyLen, - size_t keyOffset, bool noCase, const struct cb_info *cbi) { - if (len < keyLen) { +hwlm_error_t scan(const struct noodTable *n, const u8 *buf, size_t len, + size_t start, char single, bool noCase, + const struct cb_info *cbi) { + if (len - start < n->msk_len) { // can't find string of length keyLen in a shorter buffer return HWLM_SUCCESS; } - if (keyLen == 1) { - assert(keyOffset == 0); - return scanSingle(buf, len, key, noCase, cbi); + if (single) { + return scanSingle(n, buf, len, start, noCase, cbi); } else { - return scanDouble(buf, len, key, keyLen, keyOffset, noCase, cbi); + return scanDouble(n, buf, len, start, noCase, cbi); } } /** \brief Block-mode scanner. */ hwlm_error_t noodExec(const struct noodTable *n, const u8 *buf, size_t len, - size_t offset_adj, HWLMCallback cb, void *ctxt) { + size_t start, HWLMCallback cb, void *ctxt) { assert(n && buf); - struct cb_info cbi = { cb, n->id, ctxt, offset_adj }; - DEBUG_PRINTF("nood scan of %zu bytes for %*s\n", len, n->len, n->str); - return scan(buf, len, n->str, n->len, n->key_offset, n->nocase, &cbi); + struct cb_info cbi = {cb, n->id, ctxt, 0}; + DEBUG_PRINTF("nood scan of %zu bytes for %*s @ %p\n", len, n->lit_len, + (const char *)&n->cmp + n->msk_len - n->lit_len, buf); + + return scan(n, buf, len, start, n->single, n->nocase, &cbi); } /** \brief Streaming-mode scanner. */ @@ -375,34 +390,49 @@ hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf, assert(n); struct cb_info cbi = {cb, n->id, ctxt, 0}; - hwlm_error_t rv; + DEBUG_PRINTF("nood scan of %zu bytes (%zu hlen) for %*s @ %p\n", len, hlen, + n->lit_len, (const char *)&n->cmp + n->msk_len - n->lit_len, + buf); if (hlen) { + /* + * we have history, so build up a buffer from enough of the history + * buffer plus what we've been given to scan. Since this is relatively + * short, just check against msk+cmp per byte offset for matches. + */ assert(hbuf); - u8 ALIGN_DIRECTIVE temp_buf[HWLM_LITERAL_MAX_LEN * 2]; memset(temp_buf, 0, sizeof(temp_buf)); - size_t tl1 = MIN(n->len - 1, hlen); - size_t tl2 = MIN(n->len - 1, len); - size_t temp_len = tl1 + tl2; - assert(temp_len < sizeof(temp_buf)); + assert(n->msk_len); + size_t tl1 = MIN((size_t)n->msk_len - 1, hlen); + size_t tl2 = MIN((size_t)n->msk_len - 1, len); + + assert(tl1 + tl2 <= sizeof(temp_buf)); assert(tl1 <= sizeof(u64a)); assert(tl2 <= sizeof(u64a)); + DEBUG_PRINTF("using %zu bytes of hist and %zu bytes of buf\n", tl1, tl2); + unaligned_store_u64a(temp_buf, partial_load_u64a(hbuf + hlen - tl1, tl1)); unaligned_store_u64a(temp_buf + tl1, partial_load_u64a(buf, tl2)); - cbi.offsetAdj = -tl1; - rv = scan(temp_buf, temp_len, n->str, n->len, n->key_offset, n->nocase, - &cbi); - if (rv == HWLM_TERMINATED) { - return HWLM_TERMINATED; + for (size_t i = 0; i < tl1; i++) { + u64a v = unaligned_load_u64a(temp_buf + i); + if ((v & n->msk) == n->cmp) { + size_t m_end = -tl1 + i + n->msk_len - 1; + size_t m_start = m_end - n->lit_len; + DEBUG_PRINTF("match @ %zu->%zu (i %zu)\n", m_start, m_end, i); + hwlmcb_rv_t rv = cb(m_start, m_end, n->id, ctxt); + if (rv == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATED; + } + } } } assert(buf); cbi.offsetAdj = 0; - return scan(buf, len, n->str, n->len, n->key_offset, n->nocase, &cbi); + return scan(n, buf, len, 0, n->single, n->nocase, &cbi); } diff --git a/src/hwlm/noodle_engine_avx2.c b/src/hwlm/noodle_engine_avx2.c index a3f46047..f10e4a7b 100644 --- a/src/hwlm/noodle_engine_avx2.c +++ b/src/hwlm/noodle_engine_avx2.c @@ -38,10 +38,11 @@ static really_inline m256 getCaseMask(void) { } static really_inline -hwlm_error_t scanSingleUnaligned(const u8 *buf, size_t len, size_t offset, - const u8 *key, bool noCase, m256 caseMask, - m256 mask1, const struct cb_info *cbi, - size_t start, size_t end) { +hwlm_error_t scanSingleUnaligned(const struct noodTable *n, const u8 *buf, + size_t len, size_t offset, bool noCase, + m256 caseMask, m256 mask1, + const struct cb_info *cbi, size_t start, + size_t end) { const u8 *d = buf + offset; DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset); const size_t l = end - start; @@ -66,11 +67,11 @@ hwlm_error_t scanSingleUnaligned(const u8 *buf, size_t len, size_t offset, } static really_inline -hwlm_error_t scanDoubleUnaligned(const u8 *buf, size_t len, size_t offset, - const u8 *key, size_t keyLen, size_t keyOffset, - bool noCase, m256 caseMask, m256 mask1, - m256 mask2, const struct cb_info *cbi, - size_t start, size_t end) { +hwlm_error_t scanDoubleUnaligned(const struct noodTable *n, const u8 *buf, + size_t len, size_t offset, bool noCase, + m256 caseMask, m256 mask1, m256 mask2, + const struct cb_info *cbi, size_t start, + size_t end) { const u8 *d = buf + offset; DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset); size_t l = end - start; @@ -100,8 +101,8 @@ hwlm_error_t scanDoubleUnaligned(const u8 *buf, size_t len, size_t offset, // alignment boundary if needed and to finish off data that the aligned scan // function can't handle (due to small/unaligned chunk at end) static really_inline -hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key, - bool noCase, m256 caseMask, m256 mask1, +hwlm_error_t scanSingleShort(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m256 caseMask, m256 mask1, const struct cb_info *cbi, size_t start, size_t end) { const u8 *d = buf + start; @@ -140,11 +141,10 @@ hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key, } static really_inline -hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key, - size_t keyLen, size_t keyOffset, bool noCase, - m256 caseMask, m256 mask1, m256 mask2, - const struct cb_info *cbi, size_t start, - size_t end) { +hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m256 caseMask, m256 mask1, + m256 mask2, const struct cb_info *cbi, + size_t start, size_t end) { const u8 *d = buf + start; size_t l = end - start; if (!l) { @@ -182,8 +182,8 @@ hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key, } static really_inline -hwlm_error_t scanSingleFast(const u8 *buf, size_t len, const u8 *key, - bool noCase, m256 caseMask, m256 mask1, +hwlm_error_t scanSingleFast(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m256 caseMask, m256 mask1, const struct cb_info *cbi, size_t start, size_t end) { const u8 *d = buf + start, *e = buf + end; @@ -203,10 +203,9 @@ hwlm_error_t scanSingleFast(const u8 *buf, size_t len, const u8 *key, } static really_inline -hwlm_error_t scanDoubleFast(const u8 *buf, size_t len, const u8 *key, - size_t keyLen, size_t keyOffset, bool noCase, - m256 caseMask, m256 mask1, m256 mask2, - const struct cb_info *cbi, size_t start, +hwlm_error_t scanDoubleFast(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m256 caseMask, m256 mask1, + m256 mask2, const struct cb_info *cbi, size_t start, size_t end) { const u8 *d = buf + start, *e = buf + end; DEBUG_PRINTF("start %zu end %zu \n", start, end); diff --git a/src/hwlm/noodle_engine_avx512.c b/src/hwlm/noodle_engine_avx512.c index d4e6527f..8cac1b15 100644 --- a/src/hwlm/noodle_engine_avx512.c +++ b/src/hwlm/noodle_engine_avx512.c @@ -43,8 +43,8 @@ m512 getCaseMask(void) { // alignment boundary if needed and to finish off data that the aligned scan // function can't handle (due to small/unaligned chunk at end) static really_inline -hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key, - bool noCase, m512 caseMask, m512 mask1, +hwlm_error_t scanSingleShort(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m512 caseMask, m512 mask1, const struct cb_info *cbi, size_t start, size_t end) { const u8 *d = buf + start; @@ -73,11 +73,12 @@ hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key, } static really_inline -hwlm_error_t scanSingle512(const u8 *buf, size_t len, const u8 *key, +hwlm_error_t scanSingle512(const struct noodTable *n, const u8 *buf, size_t len, bool noCase, m512 caseMask, m512 mask1, - const struct cb_info *cbi) { - const u8 *d = buf; - const u8 *e = buf + len; + const struct cb_info *cbi, size_t start, + size_t end) { + const u8 *d = buf + start; + const u8 *e = buf + end; DEBUG_PRINTF("start %p end %p \n", d, e); assert(d < e); if (d + 64 >= e) { @@ -86,8 +87,8 @@ hwlm_error_t scanSingle512(const u8 *buf, size_t len, const u8 *key, // peel off first part to cacheline boundary const u8 *d1 = ROUNDUP_PTR(d, 64); - if (scanSingleShort(buf, len, key, noCase, caseMask, mask1, cbi, 0, - d1 - d) == HWLM_TERMINATED) { + if (scanSingleShort(n, buf, len, noCase, caseMask, mask1, cbi, start, + d1 - buf) == HWLM_TERMINATED) { return HWLM_TERMINATED; } d = d1; @@ -106,16 +107,15 @@ tail: DEBUG_PRINTF("d %p e %p \n", d, e); // finish off tail - return scanSingleShort(buf, len, key, noCase, caseMask, mask1, cbi, d - buf, + return scanSingleShort(n, buf, len, noCase, caseMask, mask1, cbi, d - buf, e - buf); } static really_inline -hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key, - size_t keyLen, size_t keyOffset, bool noCase, - m512 caseMask, m512 mask1, m512 mask2, - const struct cb_info *cbi, u64a *lastz0, - size_t start, size_t end) { +hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m512 caseMask, m512 mask1, + m512 mask2, const struct cb_info *cbi, + u64a *lastz0, size_t start, size_t end) { DEBUG_PRINTF("start %zu end %zu last 0x%016llx\n", start, end, *lastz0); const u8 *d = buf + start; ptrdiff_t scan_len = end - start; @@ -142,9 +142,8 @@ hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key, } static really_inline -hwlm_error_t scanDouble512(const u8 *buf, size_t len, const u8 *key, - size_t keyLen, size_t keyOffset, bool noCase, - m512 caseMask, m512 mask1, m512 mask2, +hwlm_error_t scanDouble512(const struct noodTable *n, const u8 *buf, size_t len, + bool noCase, m512 caseMask, m512 mask1, m512 mask2, const struct cb_info *cbi, size_t start, size_t end) { const u8 *d = buf + start; @@ -158,9 +157,8 @@ hwlm_error_t scanDouble512(const u8 *buf, size_t len, const u8 *key, // peel off first part to cacheline boundary const u8 *d1 = ROUNDUP_PTR(d, 64); - if (scanDoubleShort(buf, len, key, keyLen, keyOffset, noCase, caseMask, - mask1, mask2, cbi, &lastz0, start, - d1 - buf) == HWLM_TERMINATED) { + if (scanDoubleShort(n, buf, len, noCase, caseMask, mask1, mask2, cbi, + &lastz0, start, d1 - buf) == HWLM_TERMINATED) { return HWLM_TERMINATED; } d = d1; @@ -188,6 +186,6 @@ tail: DEBUG_PRINTF("d %p e %p off %zu \n", d, e, d - buf); // finish off tail - return scanDoubleShort(buf, len, key, keyLen, keyOffset, noCase, caseMask, - mask1, mask2, cbi, &lastz0, d - buf, end); + return scanDoubleShort(n, buf, len, noCase, caseMask, mask1, mask2, cbi, + &lastz0, d - buf, end); } diff --git a/src/hwlm/noodle_engine_sse.c b/src/hwlm/noodle_engine_sse.c index 40575409..7cd53d7c 100644 --- a/src/hwlm/noodle_engine_sse.c +++ b/src/hwlm/noodle_engine_sse.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,8 +38,8 @@ static really_inline m128 getCaseMask(void) { } static really_inline -hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key, - bool noCase, m128 caseMask, m128 mask1, +hwlm_error_t scanSingleShort(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m128 caseMask, m128 mask1, const struct cb_info *cbi, size_t start, size_t end) { const u8 *d = buf + start; @@ -67,10 +67,11 @@ hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key, } static really_inline -hwlm_error_t scanSingleUnaligned(const u8 *buf, size_t len, size_t offset, - const u8 *key, bool noCase, m128 caseMask, - m128 mask1, const struct cb_info *cbi, - size_t start, size_t end) { +hwlm_error_t scanSingleUnaligned(const struct noodTable *n, const u8 *buf, + size_t len, size_t offset, bool noCase, + m128 caseMask, m128 mask1, + const struct cb_info *cbi, size_t start, + size_t end) { const u8 *d = buf + offset; DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset); const size_t l = end - start; @@ -96,11 +97,10 @@ hwlm_error_t scanSingleUnaligned(const u8 *buf, size_t len, size_t offset, } static really_inline -hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key, - size_t keyLen, size_t keyOffset, bool noCase, - m128 caseMask, m128 mask1, m128 mask2, - const struct cb_info *cbi, size_t start, - size_t end) { +hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m128 caseMask, m128 mask1, + m128 mask2, const struct cb_info *cbi, + size_t start, size_t end) { const u8 *d = buf + start; size_t l = end - start; if (!l) { @@ -128,11 +128,11 @@ hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key, } static really_inline -hwlm_error_t scanDoubleUnaligned(const u8 *buf, size_t len, size_t offset, - const u8 *key, size_t keyLen, size_t keyOffset, - bool noCase, m128 caseMask, m128 mask1, - m128 mask2, const struct cb_info *cbi, - size_t start, size_t end) { +hwlm_error_t scanDoubleUnaligned(const struct noodTable *n, const u8 *buf, + size_t len, size_t offset, bool noCase, + m128 caseMask, m128 mask1, m128 mask2, + const struct cb_info *cbi, size_t start, + size_t end) { const u8 *d = buf + offset; DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset); size_t l = end - start; @@ -158,8 +158,8 @@ hwlm_error_t scanDoubleUnaligned(const u8 *buf, size_t len, size_t offset, } static really_inline -hwlm_error_t scanSingleFast(const u8 *buf, size_t len, const u8 *key, - bool noCase, m128 caseMask, m128 mask1, +hwlm_error_t scanSingleFast(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m128 caseMask, m128 mask1, const struct cb_info *cbi, size_t start, size_t end) { const u8 *d = buf + start, *e = buf + end; @@ -179,10 +179,9 @@ hwlm_error_t scanSingleFast(const u8 *buf, size_t len, const u8 *key, } static really_inline -hwlm_error_t scanDoubleFast(const u8 *buf, size_t len, const u8 *key, - size_t keyLen, size_t keyOffset, bool noCase, - m128 caseMask, m128 mask1, m128 mask2, - const struct cb_info *cbi, size_t start, +hwlm_error_t scanDoubleFast(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m128 caseMask, m128 mask1, + m128 mask2, const struct cb_info *cbi, size_t start, size_t end) { const u8 *d = buf + start, *e = buf + end; assert(d < e); diff --git a/src/hwlm/noodle_internal.h b/src/hwlm/noodle_internal.h index cc287816..bfb1a9e2 100644 --- a/src/hwlm/noodle_internal.h +++ b/src/hwlm/noodle_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,18 +30,23 @@ * \brief Data structures for Noodle literal matcher engine. */ -#ifndef NOODLE_INTERNAL_H_25D751C42E34A6 -#define NOODLE_INTERNAL_H_25D751C42E34A6 +#ifndef NOODLE_INTERNAL_H +#define NOODLE_INTERNAL_H #include "ue2common.h" struct noodTable { u32 id; - u32 len; - u32 key_offset; - u8 nocase; - u8 str[]; + u64a msk; + u64a cmp; + u8 lit_len; + u8 msk_len; + u8 key_offset; + u8 nocase; + u8 single; + u8 key0; + u8 key1; }; -#endif /* NOODLE_INTERNAL_H_25D751C42E34A6 */ +#endif /* NOODLE_INTERNAL_H */ From a18fbfe873397f4b0a868366f4532927cc74f9d5 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 29 Jun 2017 12:46:56 +1000 Subject: [PATCH 050/190] castle_dump: use StdioFile --- src/nfa/castle_dump.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/nfa/castle_dump.cpp b/src/nfa/castle_dump.cpp index 1514ca8c..595b98ec 100644 --- a/src/nfa/castle_dump.cpp +++ b/src/nfa/castle_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -71,7 +71,7 @@ void dumpTextSubCastle(const SubCastle &sub, FILE *f) { void nfaExecCastle_dump(const struct NFA *nfa, const string &base) { const Castle *c = (const Castle *)getImplNfa(nfa); - FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + StdioFile f(base + ".txt", "w"); fprintf(f, "Castle multi-tenant repeat engine\n"); fprintf(f, "\n"); @@ -117,7 +117,6 @@ void nfaExecCastle_dump(const struct NFA *nfa, const string &base) { fprintf(f, "Sub %u:\n", i); dumpTextSubCastle(sub[i], f); } - fclose(f); } } // namespace ue2 From 11408d0ce36f074da7186906f97a5d32c8f4333c Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 29 Jun 2017 12:52:14 +1000 Subject: [PATCH 051/190] goughcompile_dump: use StdioFile --- src/nfa/goughcompile_dump.cpp | 33 +++++++-------------------------- 1 file changed, 7 insertions(+), 26 deletions(-) diff --git a/src/nfa/goughcompile_dump.cpp b/src/nfa/goughcompile_dump.cpp index cb361cdb..96ab196e 100644 --- a/src/nfa/goughcompile_dump.cpp +++ b/src/nfa/goughcompile_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,8 +32,10 @@ #include "goughcompile_internal.h" #include "grey.h" #include "util/container.h" +#include "util/dump_util.h" #include "util/graph_range.h" +#include #include #ifndef DUMP_SUPPORT @@ -66,10 +68,7 @@ string dump_name(const gough_edge_id &e) { static void dump_graph(const GoughGraph &g, const string &base, const Grey &grey) { - stringstream ss; - ss << grey.dumpPath << "gough_" << base << ".dot"; - - FILE *f = fopen(ss.str().c_str(), "w"); + StdioFile f(grey.dumpPath + "gough_" + base + ".dot", "w"); fprintf(f, "digraph NFA {\n"); fprintf(f, "rankdir=LR;\n"); @@ -94,8 +93,6 @@ void dump_graph(const GoughGraph &g, const string &base, const Grey &grey) { dump_name(g[s]).c_str(), dump_name(g[t]).c_str()); } fprintf(f, "}\n"); - - fclose(f); } static @@ -133,9 +130,7 @@ set uses(const GoughEdgeProps &ep) { static void dump_var_mapping(const GoughGraph &g, const string &base, const Grey &grey) { - stringstream ss; - ss << grey.dumpPath << "gough_" << base << "_vars.txt"; - FILE *f = fopen(ss.str().c_str(), "w"); + StdioFile f(grey.dumpPath + "gough_" + base + "_vars.txt", "w"); for (auto v : vertices_range(g)) { set used = uses(g[v]); if (g[v].vars.empty() && used.empty()) { @@ -180,7 +175,6 @@ void dump_var_mapping(const GoughGraph &g, const string &base, fprintf(f, "\n"); } } - fclose(f); } static @@ -220,12 +214,7 @@ void gather_vars(const GoughGraph &g, vector *vars, static void dump_vars(const GoughGraph &g, const string &base, const Grey &grey) { - FILE *f; - { - stringstream ss; - ss << grey.dumpPath << "gough_" << base << "_vars.dot"; - f = fopen(ss.str().c_str(), "w"); - } + StdioFile f(grey.dumpPath + "gough_" + base + "_vars.dot", "w"); fprintf(f, "digraph NFA {\n"); fprintf(f, "rankdir=LR;\n"); fprintf(f, "size=\"11.5,8\"\n"); @@ -271,7 +260,6 @@ void dump_vars(const GoughGraph &g, const string &base, const Grey &grey) { } fprintf(f, "}\n"); - fclose(f); } void dump(const GoughGraph &g, const string &base, const Grey &grey) { @@ -317,18 +305,11 @@ void dump_blocks(const map> &blocks, return; } - FILE *f; - { - stringstream ss; - ss << grey.dumpPath << "gough_" << base << "_programs.txt"; - f = fopen(ss.str().c_str(), "w"); - } + StdioFile f(grey.dumpPath + "gough_" + base + "_programs.txt", "w"); for (const auto &m : blocks) { dump_block(f, m.first, m.second); } - - fclose(f); } } // namespace ue2 From a16a6f48b02260dbdafd52228a3ceb5daff72c67 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 29 Jun 2017 12:54:26 +1000 Subject: [PATCH 052/190] goughdump: use StdioFile --- src/nfa/goughdump.cpp | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/src/nfa/goughdump.cpp b/src/nfa/goughdump.cpp index 1b37a0b1..5f710612 100644 --- a/src/nfa/goughdump.cpp +++ b/src/nfa/goughdump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -353,22 +353,14 @@ void nfaExecGough16_dumpText(const struct NFA *nfa, FILE *f) { void nfaExecGough16_dump(const NFA *nfa, const string &base) { assert(nfa->type == GOUGH_NFA_16); - FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); - nfaExecGough16_dumpText(nfa, f); - fclose(f); - f = fopen_or_throw((base + ".dot").c_str(), "w"); - nfaExecGough16_dumpDot(nfa, f); - fclose(f); + nfaExecGough16_dumpText(nfa, StdioFile(base + ".txt", "w")); + nfaExecGough16_dumpDot(nfa, StdioFile(base + ".dot", "w")); } void nfaExecGough8_dump(const NFA *nfa, const string &base) { assert(nfa->type == GOUGH_NFA_8); - FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); - nfaExecGough8_dumpText(nfa, f); - fclose(f); - f = fopen_or_throw((base + ".dot").c_str(), "w"); - nfaExecGough8_dumpDot(nfa, f); - fclose(f); + nfaExecGough8_dumpText(nfa, StdioFile(base + ".txt", "w")); + nfaExecGough8_dumpDot(nfa, StdioFile(base + ".dot", "w")); } } // namespace ue2 From 80cf4bd9a2954600a1ae35df0833393ddd47b26c Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 29 Jun 2017 12:56:55 +1000 Subject: [PATCH 053/190] lbr_dump: use StdioFile --- src/nfa/lbr_dump.cpp | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/src/nfa/lbr_dump.cpp b/src/nfa/lbr_dump.cpp index 0948e122..89da6871 100644 --- a/src/nfa/lbr_dump.cpp +++ b/src/nfa/lbr_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -71,47 +71,40 @@ void nfaExecLbrDot_dump(const NFA *nfa, const string &base) { assert(nfa); assert(nfa->type == LBR_NFA_DOT); const lbr_dot *ld = (const lbr_dot *)getImplNfa(nfa); - FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + StdioFile f(base + ".txt", "w"); lbrDumpCommon(&ld->common, f); fprintf(f, "DOT model\n"); fprintf(f, "\n"); dumpTextReverse(nfa, f); - fclose(f); } void nfaExecLbrVerm_dump(const NFA *nfa, const string &base) { assert(nfa); assert(nfa->type == LBR_NFA_VERM); const lbr_verm *lv = (const lbr_verm *)getImplNfa(nfa); - - FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); - + StdioFile f(base + ".txt", "w"); lbrDumpCommon(&lv->common, f); fprintf(f, "VERM model, scanning for 0x%02x\n", lv->c); fprintf(f, "\n"); dumpTextReverse(nfa, f); - fclose(f); } void nfaExecLbrNVerm_dump(const NFA *nfa, const string &base) { assert(nfa); assert(nfa->type == LBR_NFA_NVERM); const lbr_verm *lv = (const lbr_verm *)getImplNfa(nfa); - - FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); - + StdioFile f(base + ".txt", "w"); lbrDumpCommon(&lv->common, f); fprintf(f, "NEGATED VERM model, scanning for 0x%02x\n", lv->c); fprintf(f, "\n"); dumpTextReverse(nfa, f); - fclose(f); } void nfaExecLbrShuf_dump(const NFA *nfa, const string &base) { assert(nfa); assert(nfa->type == LBR_NFA_SHUF); - FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + StdioFile f(base + ".txt", "w"); const lbr_shuf *ls = (const lbr_shuf *)getImplNfa(nfa); lbrDumpCommon(&ls->common, f); @@ -122,14 +115,13 @@ void nfaExecLbrShuf_dump(const NFA *nfa, const string &base) { describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count()); fprintf(f, "\n"); dumpTextReverse(nfa, f); - fclose(f); } void nfaExecLbrTruf_dump(const NFA *nfa, const string &base) { assert(nfa); assert(nfa->type == LBR_NFA_TRUF); - FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + StdioFile f(base + ".txt", "w"); const lbr_truf *lt = (const lbr_truf *)getImplNfa(nfa); lbrDumpCommon(<->common, f); @@ -140,7 +132,6 @@ void nfaExecLbrTruf_dump(const NFA *nfa, const string &base) { describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count()); fprintf(f, "\n"); dumpTextReverse(nfa, f); - fclose(f); } } // namespace ue2 From 0b40e96385b9b20306fd68a5197e6822697c8410 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 29 Jun 2017 13:03:52 +1000 Subject: [PATCH 054/190] limex_dump: use StdioFile, tidy --- src/nfa/limex_dump.cpp | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/src/nfa/limex_dump.cpp b/src/nfa/limex_dump.cpp index 797e87ba..9256c841 100644 --- a/src/nfa/limex_dump.cpp +++ b/src/nfa/limex_dump.cpp @@ -487,25 +487,24 @@ void dumpLimDotInfo(const limex_type *limex, u32 state, FILE *f) { } } +template +static +void dumpLimexDot(const NFA *nfa, const limex_type *limex, FILE *f) { + dumpDotPreamble(f); + u32 state_count = nfa->nPositions; + dumpVertexDotInfo(limex, state_count, f, limex_labeller(limex)); + for (u32 i = 0; i < state_count; i++) { + dumpLimDotInfo(limex, i, f); + dumpExDotInfo(limex, i, f); + } + dumpDotTrailer(f); +} + #define LIMEX_DUMP_FN(size) \ void nfaExecLimEx##size##_dump(const NFA *nfa, const string &base) { \ auto limex = (const LimExNFA##size *)getImplNfa(nfa); \ - \ - FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); \ - dumpLimexText(limex, f); \ - fclose(f); \ - \ - f = fopen_or_throw((base + ".dot").c_str(), "w"); \ - dumpDotPreamble(f); \ - u32 state_count = nfa->nPositions; \ - dumpVertexDotInfo(limex, state_count, f, \ - limex_labeller(limex)); \ - for (u32 i = 0; i < state_count; i++) { \ - dumpLimDotInfo(limex, i, f); \ - dumpExDotInfo(limex, i, f); \ - } \ - dumpDotTrailer(f); \ - fclose(f); \ + dumpLimexText(limex, StdioFile(base + ".txt", "w")); \ + dumpLimexDot(nfa, limex, StdioFile(base + ".dot", "w")); \ } LIMEX_DUMP_FN(32) From 74f6e41296d430790612937a0aadf2faf405a1cd Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 29 Jun 2017 13:06:26 +1000 Subject: [PATCH 055/190] mcclellandump: use StdioFile --- src/nfa/mcclellandump.cpp | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/src/nfa/mcclellandump.cpp b/src/nfa/mcclellandump.cpp index 9e04ad63..a13795fd 100644 --- a/src/nfa/mcclellandump.cpp +++ b/src/nfa/mcclellandump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -442,22 +442,14 @@ void nfaExecMcClellan8_dumpText(const NFA *nfa, FILE *f) { void nfaExecMcClellan16_dump(const NFA *nfa, const string &base) { assert(nfa->type == MCCLELLAN_NFA_16); - FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); - nfaExecMcClellan16_dumpText(nfa, f); - fclose(f); - f = fopen_or_throw((base + ".dot").c_str(), "w"); - nfaExecMcClellan16_dumpDot(nfa, f); - fclose(f); + nfaExecMcClellan16_dumpText(nfa, StdioFile(base + ".txt", "w")); + nfaExecMcClellan16_dumpDot(nfa, StdioFile(base + ".dot", "w")); } void nfaExecMcClellan8_dump(const NFA *nfa, const string &base) { assert(nfa->type == MCCLELLAN_NFA_8); - FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); - nfaExecMcClellan8_dumpText(nfa, f); - fclose(f); - f = fopen_or_throw((base + ".dot").c_str(), "w"); - nfaExecMcClellan8_dumpDot(nfa, f); - fclose(f); + nfaExecMcClellan8_dumpText(nfa, StdioFile(base + ".txt", "w")); + nfaExecMcClellan8_dumpDot(nfa, StdioFile(base + ".dot", "w")); } } // namespace ue2 From 6c647c2fb86b82836f023580439f3126b384a332 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 29 Jun 2017 13:07:39 +1000 Subject: [PATCH 056/190] mcsheng_dump: use StdioFile --- src/nfa/mcsheng_dump.cpp | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/src/nfa/mcsheng_dump.cpp b/src/nfa/mcsheng_dump.cpp index f5c058af..2b563079 100644 --- a/src/nfa/mcsheng_dump.cpp +++ b/src/nfa/mcsheng_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -394,22 +394,14 @@ void dump_text_8(const NFA *nfa, FILE *f) { void nfaExecMcSheng16_dump(const NFA *nfa, const string &base) { assert(nfa->type == MCSHENG_NFA_16); - FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); - dump_text_16(nfa, f); - fclose(f); - f = fopen_or_throw((base + ".dot").c_str(), "w"); - dump_dot_16(nfa, f); - fclose(f); + dump_text_16(nfa, StdioFile(base + ".txt", "w")); + dump_dot_16(nfa, StdioFile(base + ".dot", "w")); } void nfaExecMcSheng8_dump(const NFA *nfa, const string &base) { assert(nfa->type == MCSHENG_NFA_8); - FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); - dump_text_8(nfa, f); - fclose(f); - f = fopen_or_throw((base + ".dot").c_str(), "w"); - dump_dot_8(nfa, f); - fclose(f); + dump_text_8(nfa, StdioFile(base + ".txt", "w")); + dump_dot_8(nfa, StdioFile(base + ".dot", "w")); } } // namespace ue2 From 08e094748c9964d35331b99aa5a0c3204b232202 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 29 Jun 2017 13:09:16 +1000 Subject: [PATCH 057/190] mpv_dump: use StdioFile --- src/nfa/mpv_dump.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/nfa/mpv_dump.cpp b/src/nfa/mpv_dump.cpp index 9a8a4067..4979395d 100644 --- a/src/nfa/mpv_dump.cpp +++ b/src/nfa/mpv_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -132,7 +132,7 @@ void dumpCounter(FILE *f, const mpv_counter_info *c) { void nfaExecMpv_dump(const NFA *nfa, const string &base) { const mpv *m = (const mpv *)getImplNfa(nfa); - FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + StdioFile f(base + ".txt", "w"); fprintf(f, "Puff the Magic Engines\n"); fprintf(f, "\n"); @@ -154,7 +154,6 @@ void nfaExecMpv_dump(const NFA *nfa, const string &base) { } dumpTextReverse(nfa, f); - fclose(f); } } // namespace ue2 From 63f3718c79428f14d76e32db20e933a22aa60367 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 29 Jun 2017 13:10:40 +1000 Subject: [PATCH 058/190] shengdump: use StdioFile --- src/nfa/shengdump.cpp | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/nfa/shengdump.cpp b/src/nfa/shengdump.cpp index ce87beaf..99fda76f 100644 --- a/src/nfa/shengdump.cpp +++ b/src/nfa/shengdump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -41,7 +41,6 @@ #include "util/dump_util.h" #include "util/simd_types.h" - #ifndef DUMP_SUPPORT #error No dump support! #endif @@ -267,12 +266,8 @@ void nfaExecSheng_dumpDot(const NFA *nfa, FILE *f) { void nfaExecSheng_dump(const NFA *nfa, const string &base) { assert(nfa->type == SHENG_NFA); - FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); - nfaExecSheng_dumpText(nfa, f); - fclose(f); - f = fopen_or_throw((base + ".dot").c_str(), "w"); - nfaExecSheng_dumpDot(nfa, f); - fclose(f); + nfaExecSheng_dumpText(nfa, StdioFile(base + ".txt", "w")); + nfaExecSheng_dumpDot(nfa, StdioFile(base + ".dot", "w")); } } // namespace ue2 From 06fa790b5d1ac239b2b783d191fb35d5e960540f Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 29 Jun 2017 13:11:54 +1000 Subject: [PATCH 059/190] tamarama_dump: use StdioFile --- src/nfa/tamarama_dump.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/nfa/tamarama_dump.cpp b/src/nfa/tamarama_dump.cpp index 88cb33cc..87c2b84c 100644 --- a/src/nfa/tamarama_dump.cpp +++ b/src/nfa/tamarama_dump.cpp @@ -54,7 +54,7 @@ namespace ue2 { void nfaExecTamarama_dump(const struct NFA *nfa, const string &base) { const Tamarama *t = (const Tamarama *)getImplNfa(nfa); - FILE *f = fopen_or_throw((base + ".txt").c_str(), "w"); + StdioFile f(base + ".txt", "w"); fprintf(f, "Tamarama container engine\n"); fprintf(f, "\n"); @@ -63,7 +63,6 @@ void nfaExecTamarama_dump(const struct NFA *nfa, const string &base) { fprintf(f, "\n"); dumpTextReverse(nfa, f); fprintf(f, "\n"); - fclose(f); const u32 *subOffset = (const u32 *)((const char *)t + sizeof(struct Tamarama) + From e9f4adba0d559a32444de1796967570e73daee28 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 29 Jun 2017 13:15:07 +1000 Subject: [PATCH 060/190] ng_dump: use StdioFile --- src/nfagraph/ng_dump.cpp | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/src/nfagraph/ng_dump.cpp b/src/nfagraph/ng_dump.cpp index 094d2401..9624f762 100644 --- a/src/nfagraph/ng_dump.cpp +++ b/src/nfagraph/ng_dump.cpp @@ -51,6 +51,7 @@ #include "smallwrite/smallwrite_dump.h" #include "util/bitutils.h" #include "util/dump_charclass.h" +#include "util/dump_util.h" #include "util/report.h" #include "util/report_manager.h" #include "util/ue2string.h" @@ -348,14 +349,7 @@ void dumpSmallWrite(const RoseEngine *rose, const Grey &grey) { } const struct SmallWriteEngine *smwr = getSmallWrite(rose); - - stringstream ss; - ss << grey.dumpPath << "smallwrite.txt"; - - FILE *f = fopen(ss.str().c_str(), "w"); - smwrDumpText(smwr, f); - fclose(f); - + smwrDumpText(smwr, StdioFile(grey.dumpPath + "smallwrite.txt", "w")); smwrDumpNFA(smwr, false, grey.dumpPath); } @@ -420,9 +414,7 @@ void dumpReportManager(const ReportManager &rm, const Grey &grey) { return; } - stringstream ss; - ss << grey.dumpPath << "internal_reports.txt"; - FILE *f = fopen(ss.str().c_str(), "w"); + StdioFile f(grey.dumpPath + "internal_reports.txt", "w"); const vector &reports = rm.reports(); for (size_t i = 0; i < reports.size(); i++) { const Report &report = reports[i]; @@ -461,7 +453,6 @@ void dumpReportManager(const ReportManager &rm, const Grey &grey) { } fprintf(f, "\n"); } - fclose(f); } } // namespace ue2 From 9f0bc429b4599b15da95ddac7fcef47c87a7b564 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 29 Jun 2017 13:17:04 +1000 Subject: [PATCH 061/190] rose_in_dump: use StdioFile --- src/rose/rose_in_dump.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/rose/rose_in_dump.cpp b/src/rose/rose_in_dump.cpp index 172b58e8..5266e9d7 100644 --- a/src/rose/rose_in_dump.cpp +++ b/src/rose/rose_in_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,6 +35,7 @@ #include "nfagraph/ng_dump.h" #include "nfagraph/ng_util.h" #include "util/container.h" +#include "util/dump_util.h" #include "util/graph_range.h" #include @@ -59,7 +60,7 @@ void dumpPreRoseGraph(const RoseInGraph &ig, const Grey &grey, filename = "pre_rose.dot"; } DEBUG_PRINTF("dumping rose graphs\n"); - FILE *f = fopen((grey.dumpPath + filename).c_str(), "w"); + StdioFile f(grey.dumpPath + filename, "w"); fprintf(f, "digraph NFA {\n"); fprintf(f, "rankdir=LR;\n"); fprintf(f, "size=\"11.5,8\"\n"); @@ -127,7 +128,6 @@ void dumpPreRoseGraph(const RoseInGraph &ig, const Grey &grey, } fprintf(f, "}\n"); - fclose(f); } } From 82606b3ffea660ebe29f844bdd067a766f2e5f2a Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 29 Jun 2017 13:18:51 +1000 Subject: [PATCH 062/190] smallwrite_dump: use StdioFile --- src/smallwrite/smallwrite_dump.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/smallwrite/smallwrite_dump.cpp b/src/smallwrite/smallwrite_dump.cpp index bdf55c30..b2c33ecf 100644 --- a/src/smallwrite/smallwrite_dump.cpp +++ b/src/smallwrite/smallwrite_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,6 +34,7 @@ #include "nfa/nfa_build_util.h" #include "nfa/nfa_dump_api.h" #include "nfa/nfa_internal.h" +#include "util/dump_util.h" #include #include @@ -74,9 +75,8 @@ void smwrDumpNFA(const SmallWriteEngine *smwr, bool dump_raw, nfaGenerateDumpFiles(n, base + "smallwrite_nfa"); if (dump_raw) { - FILE *f = fopen((base + "smallwrite_nfa.raw").c_str(), "w"); + StdioFile f(base + "smallwrite_nfa.raw", "w"); fwrite(n, 1, n->length, f); - fclose(f); } } From a659b9b686876810c94c7556e26719b25f4e8005 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 29 Jun 2017 13:20:31 +1000 Subject: [PATCH 063/190] slot_manager_dump: use StdioFile --- src/som/slot_manager_dump.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/som/slot_manager_dump.cpp b/src/som/slot_manager_dump.cpp index 484d6c14..4ed5cef0 100644 --- a/src/som/slot_manager_dump.cpp +++ b/src/som/slot_manager_dump.cpp @@ -36,10 +36,11 @@ #include "nfagraph/ng_dump.h" #include "nfagraph/ng_is_equal.h" #include "util/container.h" +#include "util/dump_util.h" #include "ue2common.h" -#include #include +#include #include #ifndef DUMP_SUPPORT @@ -55,7 +56,6 @@ void dumpSomSlotManager(const SomSlotManager &ssm, const Grey &grey) { return; } - string filename = grey.dumpPath + "/ssm.txt"; map by_slot; map by_slot_ir; @@ -67,7 +67,7 @@ void dumpSomSlotManager(const SomSlotManager &ssm, const Grey &grey) { by_slot_ir[e.slot] = &e; } - FILE *f = fopen(filename.c_str(), "w"); + StdioFile f(grey.dumpPath + "/ssm.txt", "w"); fprintf(f, "slot width %u bytes\n\n", ssm.precision); @@ -94,8 +94,6 @@ void dumpSomSlotManager(const SomSlotManager &ssm, const Grey &grey) { } } - fclose(f); - for (const auto &h : ssm.cache->initial_prefixes) { dumpHolder(*h, hash_holder(*h), "ssm_prefix", grey); } From 4105dd48050f1a6d1468be3208c1e9ba8c117bb5 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 29 Jun 2017 13:12:23 +1000 Subject: [PATCH 064/190] tamarama_dump: fix typo in comment --- src/nfa/tamarama_dump.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nfa/tamarama_dump.cpp b/src/nfa/tamarama_dump.cpp index 87c2b84c..e6d34f7c 100644 --- a/src/nfa/tamarama_dump.cpp +++ b/src/nfa/tamarama_dump.cpp @@ -27,7 +27,7 @@ */ /** \file - * \brief Tamarama: container engine for exclusve engines, dump code. + * \brief Tamarama: container engine for exclusive engines, dump code. */ #include "config.h" From 1d041b12b7ea5d197646f0ad5a8ad732028a8b06 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 25 May 2017 12:46:01 +1000 Subject: [PATCH 065/190] shufti/truffle tests: silence ubsan warning The ubsan support in clang warned about us accessing idx-1 of an array here. --- unit/internal/shufti.cpp | 17 ++++++++++------- unit/internal/truffle.cpp | 17 ++++++++++------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/unit/internal/shufti.cpp b/unit/internal/shufti.cpp index 06407c41..0c9d2607 100644 --- a/unit/internal/shufti.cpp +++ b/unit/internal/shufti.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -910,12 +910,13 @@ TEST(ReverseShufti, ExecNoMatch1) { int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); - char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; + char t[] = " bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; + char *t1 = t + 1; size_t len = strlen(t1); for (size_t i = 0; i < 16; i++) { const u8 *rv = rshuftiExec(lo, hi, (u8 *)t1, (u8 *)t1 + len - i); - ASSERT_EQ((const u8 *)(t1 - 1), rv); + ASSERT_EQ((const u8 *)t, rv); } } @@ -929,12 +930,13 @@ TEST(ReverseShufti, ExecNoMatch2) { int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); - char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; + char t[] = " bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; + char *t1 = t + 1; size_t len = strlen(t1); for (size_t i = 0; i < 16; i++) { const u8 *rv = rshuftiExec(lo, hi, (u8 *)t1, (u8 *)t1 + len - i); - ASSERT_EQ((const u8 *)(t1 - 1), rv); + ASSERT_EQ((const u8 *)t, rv); } } @@ -947,12 +949,13 @@ TEST(ReverseShufti, ExecNoMatch3) { int ret = shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); ASSERT_NE(-1, ret); - char t1[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"; + char t[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"; + char *t1 = t + 1; size_t len = strlen(t1); for (size_t i = 0; i < 16; i++) { const u8 *rv = rshuftiExec(lo, hi, (u8 *)t1, (u8 *)t1 + len - i); - ASSERT_EQ((const u8 *)(t1 - 1), rv); + ASSERT_EQ((const u8 *)t, rv); } } diff --git a/unit/internal/truffle.cpp b/unit/internal/truffle.cpp index e9e4f19c..988eb13c 100644 --- a/unit/internal/truffle.cpp +++ b/unit/internal/truffle.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -391,12 +391,13 @@ TEST(ReverseTruffle, ExecNoMatch1) { truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); - char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; + char t[] = " bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; + char *t1 = t + 1; size_t len = strlen(t1); for (size_t i = 0; i < 16; i++) { const u8 *rv = rtruffleExec(mask1, mask2, (u8 *)t1, (u8 *)t1 + len - i); - ASSERT_EQ((const u8 *)(t1 - 1), rv); + ASSERT_EQ((const u8 *)t, rv); } } @@ -410,12 +411,13 @@ TEST(ReverseTruffle, ExecNoMatch2) { truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); - char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; + char t[] = " bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; + char *t1 = t + 1; size_t len = strlen(t1); for (size_t i = 0; i < 16; i++) { const u8 *rv = rtruffleExec(mask1, mask2, (u8 *)t1, (u8 *)t1 + len - i); - ASSERT_EQ((const u8 *)(t1 - 1), rv); + ASSERT_EQ((const u8 *)t, rv); } } @@ -427,12 +429,13 @@ TEST(ReverseTruffle, ExecNoMatch3) { truffleBuildMasks(chars, (u8 *)&mask1, (u8 *)&mask2); - char t1[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"; + char t[] = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"; + char *t1 = t + 1; size_t len = strlen(t1); for (size_t i = 0; i < 16; i++) { const u8 *rv = rtruffleExec(mask1, mask2, (u8 *)t1, (u8 *)t1 + len - i); - ASSERT_EQ((const u8 *)(t1 - 1), rv); + ASSERT_EQ((const u8 *)t, rv); } } From 166f5d8ba5a432bc64163057ba36f319bfc3695e Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Thu, 29 Jun 2017 16:26:56 +1000 Subject: [PATCH 066/190] noodle: scan using the correct offsets --- src/hwlm/noodle_engine.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/hwlm/noodle_engine.c b/src/hwlm/noodle_engine.c index ba8d6913..cd1eb2d1 100644 --- a/src/hwlm/noodle_engine.c +++ b/src/hwlm/noodle_engine.c @@ -174,8 +174,8 @@ hwlm_error_t scanSingleMain(const struct noodTable *n, const u8 *buf, } if (end - offset == CHUNKSIZE) { - rv = scanSingleUnaligned(n, buf, len, 0, noCase, caseMask, mask1, cbi, - offset, end); + rv = scanSingleUnaligned(n, buf, len, offset, noCase, caseMask, mask1, + cbi, offset, end); return rv; } @@ -188,8 +188,8 @@ hwlm_error_t scanSingleMain(const struct noodTable *n, const u8 *buf, if (offset != s2Start) { // first scan out to the fast scan starting point DEBUG_PRINTF("stage 1: -> %zu\n", s2Start); - rv = scanSingleUnaligned(n, buf, len, 0, noCase, caseMask, mask1, cbi, - offset, s2Start); + rv = scanSingleUnaligned(n, buf, len, offset, noCase, caseMask, mask1, + cbi, offset, s2Start); RETURN_IF_TERMINATED(rv); } From f2b97a51d86f7ae7b1ed50c56e21a4fe131f2092 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Fri, 30 Jun 2017 09:10:48 +1000 Subject: [PATCH 067/190] noodle: param name --- src/hwlm/noodle_engine.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hwlm/noodle_engine.h b/src/hwlm/noodle_engine.h index 597a7bbc..18847e5a 100644 --- a/src/hwlm/noodle_engine.h +++ b/src/hwlm/noodle_engine.h @@ -44,7 +44,7 @@ struct noodTable; /** \brief Block-mode scanner. */ hwlm_error_t noodExec(const struct noodTable *n, const u8 *buf, size_t len, - size_t offset_adj, HWLMCallback cb, void *ctxt); + size_t start, HWLMCallback cb, void *ctxt); /** \brief Streaming-mode scanner. */ hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf, From 35d396d0611337dd8abeef1a861c4325658a5042 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Fri, 30 Jun 2017 11:42:32 +1000 Subject: [PATCH 068/190] noodle: correct streaming bounds --- src/hwlm/noodle_engine.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/hwlm/noodle_engine.c b/src/hwlm/noodle_engine.c index cd1eb2d1..f0d711b2 100644 --- a/src/hwlm/noodle_engine.c +++ b/src/hwlm/noodle_engine.c @@ -389,12 +389,17 @@ hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf, HWLMCallback cb, void *ctxt) { assert(n); + if (len + hlen < n->msk_len) { + DEBUG_PRINTF("not enough bytes for a match\n"); + return HWLM_SUCCESS; + } + struct cb_info cbi = {cb, n->id, ctxt, 0}; DEBUG_PRINTF("nood scan of %zu bytes (%zu hlen) for %*s @ %p\n", len, hlen, n->lit_len, (const char *)&n->cmp + n->msk_len - n->lit_len, buf); - if (hlen) { + if (hlen && n->msk_len > 1) { /* * we have history, so build up a buffer from enough of the history * buffer plus what we've been given to scan. Since this is relatively @@ -409,6 +414,7 @@ hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf, size_t tl2 = MIN((size_t)n->msk_len - 1, len); assert(tl1 + tl2 <= sizeof(temp_buf)); + assert(tl1 + tl2 >= n->msk_len); assert(tl1 <= sizeof(u64a)); assert(tl2 <= sizeof(u64a)); DEBUG_PRINTF("using %zu bytes of hist and %zu bytes of buf\n", tl1, tl2); @@ -417,7 +423,7 @@ hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf, partial_load_u64a(hbuf + hlen - tl1, tl1)); unaligned_store_u64a(temp_buf + tl1, partial_load_u64a(buf, tl2)); - for (size_t i = 0; i < tl1; i++) { + for (size_t i = 0; i <= tl1 + tl2 - n->msk_len; i++) { u64a v = unaligned_load_u64a(temp_buf + i); if ((v & n->msk) == n->cmp) { size_t m_end = -tl1 + i + n->msk_len - 1; From a1ff4d32937e4bbc07ac710de6d282437149917a Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 28 Jun 2017 16:29:31 +1000 Subject: [PATCH 069/190] small_color_map: add efficient 2-bit color map --- CMakeLists.txt | 1 + src/nfagraph/ng_cyclic_redundancy.cpp | 13 +-- src/util/graph_small_color_map.h | 161 ++++++++++++++++++++++++++ 3 files changed, 168 insertions(+), 7 deletions(-) create mode 100644 src/util/graph_small_color_map.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 2c2e298a..1d62b3c6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -997,6 +997,7 @@ SET (hs_SRCS src/util/fatbit_build.cpp src/util/fatbit_build.h src/util/graph.h + src/util/graph_small_color_map.h src/util/hash.h src/util/hash_dynamic_bitset.h src/util/math.h diff --git a/src/nfagraph/ng_cyclic_redundancy.cpp b/src/nfagraph/ng_cyclic_redundancy.cpp index e4138a4f..80980a66 100644 --- a/src/nfagraph/ng_cyclic_redundancy.cpp +++ b/src/nfagraph/ng_cyclic_redundancy.cpp @@ -63,6 +63,7 @@ #include "ng_util.h" #include "util/container.h" #include "util/graph_range.h" +#include "util/graph_small_color_map.h" #include "util/ue2_containers.h" #include @@ -124,17 +125,15 @@ class SearchVisitor : public boost::default_dfs_visitor { } // namespace -template +template static bool searchForward(const Graph &g, const CharReach &reach, - vector &colours, + ColorMap &colours, const flat_set &s, typename Graph::vertex_descriptor w) { - fill(colours.begin(), colours.end(), boost::white_color); - auto colour_map = - make_iterator_property_map(colours.begin(), get(vertex_index, g)); + colours.fill(small_color::white); try { - depth_first_visit(g, w, SearchVisitor(reach), colour_map, + depth_first_visit(g, w, SearchVisitor(reach), colours, VertexInSet(s)); } catch (SearchFailed &) { return false; @@ -166,7 +165,7 @@ bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v, typedef typename Graph::vertex_descriptor vertex_descriptor; // Colour map used for depth_first_visit(). - vector colours(num_vertices(g)); + auto colours = make_small_color_map(g); // precalc successors of v. flat_set succ_v; diff --git a/src/util/graph_small_color_map.h b/src/util/graph_small_color_map.h new file mode 100644 index 00000000..03e61cf4 --- /dev/null +++ b/src/util/graph_small_color_map.h @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \brief Small Color Map: implements a property map designed to represent + * colors using minimal memory (two bits per index). + * + * This is based on the Boost BGL two_bit_color_map, but provides some extra + * functionality (such as a fill operation). + */ + +#ifndef GRAPH_SMALL_COLOR_MAP_H +#define GRAPH_SMALL_COLOR_MAP_H + +#include "ue2common.h" + +#include +#include +#include + +namespace ue2 { + +enum class small_color : u8 { + white = 0, + gray = 1, + black = 2 + // Note: we have room for one more colour. +}; + +} // namespace ue2 + +namespace boost { + +/** \brief Specialisation of boost::color_traits for small_color. */ +template<> +struct color_traits { + static ue2::small_color white() { return ue2::small_color::white; } + static ue2::small_color gray() { return ue2::small_color::gray; } + static ue2::small_color black() { return ue2::small_color::black; } +}; + +} // namespace boost + +namespace ue2 { + +static constexpr u8 fill_lut[] = { + 0, // white + 0x55, // gray + 0xaa, // black +}; + +/** + * \brief Small Color Map: implements a property map designed to represent + * colors using minimal memory (two bits per index). + * + * If your graph type provides an index map in get(vertex_index, g), you can + * use make_small_color_map() to construct this. + */ +template +class small_color_map { + size_t n; + IndexMap index_map; + + // This class is passed by value into (potentially recursive) BGL + // algorithms, so we use a shared_ptr to keep the copy lightweight and + // ensure that data is correctly destroyed. + std::shared_ptr> data; + + static constexpr size_t bit_size = 2; + static constexpr size_t entries_per_byte = (sizeof(u8) * 8) / bit_size; + static constexpr u8 bit_mask = (1U << bit_size) - 1; + +public: + using key_type = typename boost::property_traits::key_type; + using value_type = small_color; + using reference = small_color; + using category = boost::read_write_property_map_tag; + + small_color_map(size_t n_in, const IndexMap &index_map_in) + : n(n_in), index_map(index_map_in) { + size_t num_bytes = (n + entries_per_byte - 1) / entries_per_byte; + data = std::make_shared>(num_bytes); + fill(small_color::white); + } + + void fill(small_color color) { + assert(static_cast(color) < sizeof(fill_lut)); + u8 val = fill_lut[static_cast(color)]; + std::memset(data->data(), val, data->size()); + } + + small_color get_impl(key_type key) const { + auto i = get(index_map, key); + assert(i < n); + size_t byte = i / entries_per_byte; + assert(byte < data->size()); + size_t bit = (i % entries_per_byte) * bit_size; + u8 val = ((*data)[byte] >> bit) & bit_mask; + return static_cast(val); + } + + void put_impl(key_type key, small_color color) { + auto i = get(index_map, key); + assert(i < n); + size_t byte = i / entries_per_byte; + assert(byte < data->size()); + size_t bit = (i % entries_per_byte) * bit_size; + auto &block = (*data)[byte]; + u8 val = static_cast(color); + block = (block & ~(bit_mask << bit)) | (val << bit); + } +}; + +template +small_color get(const small_color_map &color_map, + typename boost::property_traits::key_type key) { + return color_map.get_impl(key); +} + +template +void put(small_color_map &color_map, + typename boost::property_traits::key_type key, + small_color val) { + color_map.put_impl(key, val); +} + +template +auto make_small_color_map(const Graph &g) + -> small_color_map { + return small_color_map( + num_vertices(g), get(vertex_index, g)); +} + +} // namespace ue2 + +#endif // GRAPH_SMALL_COLOR_MAP_H From 48f9a6d518f7d32832c7dc9f18970ce9d12f604d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 29 Jun 2017 10:26:17 +1000 Subject: [PATCH 070/190] limex_compile: use small_color_map --- src/nfa/limex_compile.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 7183d4b7..5e18b800 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -55,6 +55,7 @@ #include "util/container.h" #include "util/graph.h" #include "util/graph_range.h" +#include "util/graph_small_color_map.h" #include "util/order_check.h" #include "util/verify_types.h" #include "util/ue2_containers.h" @@ -544,11 +545,9 @@ void filterAccelStates(NGHolder &g, const map> &tops, ue2::unordered_map out; try { - vector colour(num_vertices(g)); boost::breadth_first_search(g, g.start, - visitor(fas_visitor(*accel_map, &out)) - .color_map(make_iterator_property_map(colour.begin(), - get(vertex_index, g)))); + visitor(fas_visitor(*accel_map, &out)) + .color_map(make_small_color_map(g))); } catch (fas_visitor *) { ; /* found max accel_states */ } @@ -1615,9 +1614,7 @@ bool cannotDie(const build_info &args, const set &tops) { // top, looking for a cyclic path consisting of vertices of dot reach. If // one exists, than the NFA cannot die after this top is triggered. - vector colours(num_vertices(h)); - auto colour_map = boost::make_iterator_property_map(colours.begin(), - get(vertex_index, h)); + auto colour_map = make_small_color_map(h); struct CycleFound {}; struct CannotDieVisitor : public boost::default_dfs_visitor { From 03c1af117348428ee5776f2f8067866fba519cc4 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 29 Jun 2017 10:38:46 +1000 Subject: [PATCH 071/190] ng_misc_opt: use small_color_map --- src/nfagraph/ng_misc_opt.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/nfagraph/ng_misc_opt.cpp b/src/nfagraph/ng_misc_opt.cpp index c55a02e6..c8dfcbab 100644 --- a/src/nfagraph/ng_misc_opt.cpp +++ b/src/nfagraph/ng_misc_opt.cpp @@ -69,6 +69,7 @@ #include "util/charreach.h" #include "util/container.h" #include "util/graph_range.h" +#include "util/graph_small_color_map.h" #include "util/ue2_containers.h" #include "ue2common.h" @@ -552,16 +553,17 @@ bool mergeCyclicDotStars(NGHolder &g) { struct PrunePathsInfo { explicit PrunePathsInfo(const NGHolder &g) - : color_map(num_vertices(g)), bad(num_vertices(g)) {} + : color_map(make_small_color_map(g)), bad(num_vertices(g)) {} void clear() { no_explore.clear(); - fill(color_map.begin(), color_map.end(), boost::white_color); + color_map.fill(small_color::white); bad.reset(); } flat_set no_explore; - vector color_map; + using color_map_type = decltype(make_small_color_map(NGHolder())); + color_map_type color_map; boost::dynamic_bitset<> bad; }; @@ -597,9 +599,6 @@ void findDependentVertices(const NGHolder &g, PrunePathsInfo &info, auto filtered_g = make_filtered_graph(g, make_bad_edge_filter(&info.no_explore)); - auto color = make_iterator_property_map(info.color_map.begin(), - get(vertex_index, g)); - // We use a bitset to track bad vertices, rather than filling a (potentially // very large) set structure. auto recorder = make_vertex_index_bitset_recorder(info.bad); @@ -608,7 +607,7 @@ void findDependentVertices(const NGHolder &g, PrunePathsInfo &info, if (b != g.start && g[b].char_reach.isSubsetOf(g[v].char_reach)) { continue; } - boost::depth_first_visit(filtered_g, b, recorder, color); + boost::depth_first_visit(filtered_g, b, recorder, info.color_map); } } From 1392be048a8c6a5f8040435e18b293ab2bdd4266 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 29 Jun 2017 10:57:17 +1000 Subject: [PATCH 072/190] ng_width: use small_color_map --- src/nfagraph/ng_width.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/nfagraph/ng_width.cpp b/src/nfagraph/ng_width.cpp index c2e9eb1a..4c33220c 100644 --- a/src/nfagraph/ng_width.cpp +++ b/src/nfagraph/ng_width.cpp @@ -37,6 +37,7 @@ #include "ue2common.h" #include "util/depth.h" #include "util/graph.h" +#include "util/graph_small_color_map.h" #include #include @@ -143,7 +144,7 @@ depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter, assert(hasCorrectlyNumberedVertices(h)); const size_t num = num_vertices(h); vector distance(num); - vector colors(num); + auto colors = make_small_color_map(h); auto index_map = get(&NFAGraphVertexProps::index, g); @@ -151,15 +152,15 @@ depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter, dag_shortest_paths(g, src, distance_map(make_iterator_property_map(distance.begin(), index_map)) .weight_map(boost::make_constant_property(-1)) - .color_map(make_iterator_property_map(colors.begin(), index_map))); + .color_map(colors)); depth acceptDepth, acceptEodDepth; - if (colors.at(NODE_ACCEPT) == boost::white_color) { + if (get(colors, h.accept) == small_color::white) { acceptDepth = depth::unreachable(); } else { acceptDepth = depth(-1 * distance.at(NODE_ACCEPT)); } - if (colors.at(NODE_ACCEPT_EOD) == boost::white_color) { + if (get(colors, h.acceptEod) == small_color::white) { acceptEodDepth = depth::unreachable(); } else { acceptEodDepth = depth(-1 * distance.at(NODE_ACCEPT_EOD)); From 8982e7177c4b9f87f6c40eff43ec56e4530fe373 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 29 Jun 2017 11:05:55 +1000 Subject: [PATCH 073/190] ng_region: use small_color_map --- src/nfagraph/ng_region.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/nfagraph/ng_region.cpp b/src/nfagraph/ng_region.cpp index 91904b46..6463a281 100644 --- a/src/nfagraph/ng_region.cpp +++ b/src/nfagraph/ng_region.cpp @@ -58,6 +58,7 @@ #include "util/container.h" #include "util/ue2_containers.h" #include "util/graph_range.h" +#include "util/graph_small_color_map.h" #include #include @@ -407,19 +408,20 @@ void liftSinks(const AcyclicGraph &acyclic_g, vector &topoOrder) { } } +using ColorMap = decltype(make_small_color_map(NGHolder())); + /** Build a reverse topo ordering (with only the specials that are in use). We * also want to ensure vertices which only lead to back edges are placed near * their parents. */ static vector buildTopoOrder(const NGHolder &w, const AcyclicGraph &acyclic_g, - vector &colours) { + ColorMap &colours) { vector topoOrder; topoOrder.reserve(num_vertices(w)); topological_sort(acyclic_g, back_inserter(topoOrder), - color_map(make_iterator_property_map(colours.begin(), - get(vertex_index, acyclic_g)))); + color_map(colours)); reorderSpecials(w, acyclic_g, topoOrder); @@ -443,15 +445,14 @@ unordered_map assignRegions(const NGHolder &g) { const u32 numVertices = num_vertices(g); DEBUG_PRINTF("assigning regions for %u vertices in holder\n", numVertices); - vector colours(numVertices); + auto colours = make_small_color_map(g); // Build an acyclic graph for this NGHolder. BackEdgeSet deadEdges; depth_first_search(g, visitor(BackEdges(deadEdges)) .root_vertex(g.start) - .color_map(make_iterator_property_map(colours.begin(), - get(vertex_index, g)))); + .color_map(colours)); auto af = make_bad_edge_filter(&deadEdges); AcyclicGraph acyclic_g(g, af); From 32270725c6bbf3a1a5d43faedf222e46d2ad8fa6 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 29 Jun 2017 11:08:54 +1000 Subject: [PATCH 074/190] ng_repeat: use small_color_map --- src/nfagraph/ng_repeat.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/nfagraph/ng_repeat.cpp b/src/nfagraph/ng_repeat.cpp index 60ad2200..4487376a 100644 --- a/src/nfagraph/ng_repeat.cpp +++ b/src/nfagraph/ng_repeat.cpp @@ -47,6 +47,7 @@ #include "util/container.h" #include "util/dump_charclass.h" #include "util/graph_range.h" +#include "util/graph_small_color_map.h" #include "util/report_manager.h" #include @@ -2084,14 +2085,14 @@ public: static void populateFixedTopInfo(const map &fixed_depth_tops, const NGHolder &g, - ue2::unordered_set *reached_by_fixed_tops) { + unordered_set *reached_by_fixed_tops) { if (fixed_depth_tops.empty()) { return; /* we will never find anything */ } assert(!proper_out_degree(g.startDs, g)); ue2::unordered_map top_depths; - vector colours(num_vertices(g)); + auto colours = make_small_color_map(g); for (const auto &e : out_edges_range(g.start, g)) { NFAVertex v = target(e, g); @@ -2121,9 +2122,7 @@ void populateFixedTopInfo(const map &fixed_depth_tops, /* for each vertex reachable from v update its map to reflect that it is * reachable from a top of depth td. */ - depth_first_visit(g, v, pfti_visitor(top_depths, td), - make_iterator_property_map(colours.begin(), - get(vertex_index, g))); + depth_first_visit(g, v, pfti_visitor(top_depths, td), colours); } for (const auto &v_depth : top_depths) { From b97fa8c8082cc3f7a8a349c1ad06609bd83c1b2b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 29 Jun 2017 11:14:46 +1000 Subject: [PATCH 075/190] ng_literal_analysis: use small_color_map --- src/nfagraph/ng_literal_analysis.cpp | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/src/nfagraph/ng_literal_analysis.cpp b/src/nfagraph/ng_literal_analysis.cpp index 19660580..87c4e79e 100644 --- a/src/nfagraph/ng_literal_analysis.cpp +++ b/src/nfagraph/ng_literal_analysis.cpp @@ -40,6 +40,7 @@ #include "util/depth.h" #include "util/graph.h" #include "util/graph_range.h" +#include "util/graph_small_color_map.h" #include "util/ue2_graph.h" #include "util/ue2string.h" @@ -462,17 +463,13 @@ next_literal: #ifdef DEBUG static UNUSED -const char *describeColor(boost::default_color_type c) { +const char *describeColor(small_color c) { switch (c) { - case boost::white_color: + case small_color::white: return "white"; - case boost::gray_color: + case small_color::gray: return "gray"; - case boost::green_color: - return "green"; - case boost::red_color: - return "red"; - case boost::black_color: + case small_color::black: return "black"; default: return "unknown"; @@ -527,7 +524,7 @@ void findMinCut(LitGraph &lg, vector &cutset) { const auto v_index_map = get(&LitGraphVertexProps::index, lg); const auto e_index_map = get(&LitGraphEdgeProps::index, lg); const size_t num_verts = num_vertices(lg); - vector colors(num_verts); + auto colors = make_small_color_map(lg); vector distances(num_verts); vector predecessors(num_verts); vector residuals(num_edges(lg)); @@ -537,7 +534,7 @@ void findMinCut(LitGraph &lg, vector &cutset) { make_iterator_property_map(residuals.begin(), e_index_map), make_iterator_property_map(rev_edges.begin(), e_index_map), make_iterator_property_map(predecessors.begin(), v_index_map), - make_iterator_property_map(colors.begin(), v_index_map), + colors, make_iterator_property_map(distances.begin(), v_index_map), v_index_map, lg.root, lg.sink); DEBUG_PRINTF("done, flow = %llu\n", flow); @@ -552,19 +549,19 @@ void findMinCut(LitGraph &lg, vector &cutset) { for (const auto &e : edges_range(lg)) { const LitVertex u = source(e, lg), v = target(e, lg); - const auto ucolor = colors[lg[u].index]; - const auto vcolor = colors[lg[v].index]; + const auto ucolor = get(colors, u); + const auto vcolor = get(colors, v); DEBUG_PRINTF("edge %zu:%s -> %zu:%s score %llu\n", lg[u].index, describeColor(ucolor), lg[v].index, describeColor(vcolor), lg[e].score); - if (ucolor != boost::white_color && vcolor == boost::white_color) { + if (ucolor != small_color::white && vcolor == small_color::white) { assert(v != lg.sink); white_cut.push_back(e); white_flow += lg[e].score; } - if (ucolor == boost::black_color && vcolor != boost::black_color) { + if (ucolor == small_color::black && vcolor != small_color::black) { assert(v != lg.sink); black_cut.push_back(e); black_flow += lg[e].score; From 33141e64b6a222d4825f11b422f8c893d64d3fee Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 29 Jun 2017 11:20:17 +1000 Subject: [PATCH 076/190] ng_netflow: use small_color_map --- src/nfagraph/ng_netflow.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/nfagraph/ng_netflow.cpp b/src/nfagraph/ng_netflow.cpp index cff26358..780a319f 100644 --- a/src/nfagraph/ng_netflow.cpp +++ b/src/nfagraph/ng_netflow.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,6 +37,7 @@ #include "ue2common.h" #include "util/container.h" #include "util/graph_range.h" +#include "util/graph_small_color_map.h" #include #include @@ -118,7 +119,7 @@ void removeEdgesFromIndex(NGHolder &g, vector &capacityMap, u32 idx) { * colour map (from which we can find the min cut). */ static u64a getMaxFlow(NGHolder &h, const vector &capacityMap_in, - vector &colorMap) { + decltype(make_small_color_map(NGHolder())) &colorMap) { vector capacityMap = capacityMap_in; NFAVertex src = h.start; NFAVertex sink = h.acceptEod; @@ -141,7 +142,6 @@ u64a getMaxFlow(NGHolder &h, const vector &capacityMap_in, vector edgeResiduals(numTotalEdges); vector predecessors(numVertices); vector distances(numVertices); - assert(colorMap.size() == numVertices); auto v_index_map = get(vertex_index, h); auto e_index_map = get(edge_index, h); @@ -151,7 +151,7 @@ u64a getMaxFlow(NGHolder &h, const vector &capacityMap_in, make_iterator_property_map(edgeResiduals.begin(), e_index_map), make_iterator_property_map(reverseEdges.begin(), e_index_map), make_iterator_property_map(predecessors.begin(), v_index_map), - make_iterator_property_map(colorMap.begin(), v_index_map), + colorMap, make_iterator_property_map(distances.begin(), v_index_map), v_index_map, src, sink); @@ -169,8 +169,8 @@ vector findMinCut(NGHolder &h, const vector &scores) { assert(hasCorrectlyNumberedEdges(h)); assert(hasCorrectlyNumberedVertices(h)); - vector colorMap(num_vertices(h)); - u64a flow = getMaxFlow(h, scores, colorMap); + auto colors = make_small_color_map(h); + u64a flow = getMaxFlow(h, scores, colors); vector picked_white; vector picked_black; @@ -185,17 +185,17 @@ vector findMinCut(NGHolder &h, const vector &scores) { continue; // skips, among other things, reverse edges } - default_color_type fromColor = colorMap[h[from].index]; - default_color_type toColor = colorMap[h[to].index]; + auto fromColor = get(colors, from); + auto toColor = get(colors, to); - if (fromColor != boost::white_color && toColor == boost::white_color) { + if (fromColor != small_color::white && toColor == small_color::white) { assert(ec <= INVALID_EDGE_CAP); DEBUG_PRINTF("found white cut edge %zu->%zu cap %llu\n", h[from].index, h[to].index, ec); observed_white_flow += ec; picked_white.push_back(e); } - if (fromColor == boost::black_color && toColor != boost::black_color) { + if (fromColor == small_color::black && toColor != small_color::black) { assert(ec <= INVALID_EDGE_CAP); DEBUG_PRINTF("found black cut edge %zu->%zu cap %llu\n", h[from].index, h[to].index, ec); @@ -206,7 +206,7 @@ vector findMinCut(NGHolder &h, const vector &scores) { DEBUG_PRINTF("min flow = %llu b flow = %llu w flow %llu\n", flow, observed_black_flow, observed_white_flow); - if (MIN(observed_white_flow, observed_black_flow) != flow) { + if (min(observed_white_flow, observed_black_flow) != flow) { DEBUG_PRINTF("bad cut\n"); } From f98ccedf275d2455598e7b08ca4363bfd6d2c453 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 29 Jun 2017 11:25:13 +1000 Subject: [PATCH 077/190] ng_prune: use small_color_map --- src/nfagraph/ng_prune.cpp | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/src/nfagraph/ng_prune.cpp b/src/nfagraph/ng_prune.cpp index 88f1880f..72d017ae 100644 --- a/src/nfagraph/ng_prune.cpp +++ b/src/nfagraph/ng_prune.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,6 +38,7 @@ #include "util/container.h" #include "util/graph.h" #include "util/graph_range.h" +#include "util/graph_small_color_map.h" #include "util/report_manager.h" #include @@ -105,23 +106,18 @@ template static bool pruneForwardUseless(NGHolder &h, const nfag_t &g, typename nfag_t::vertex_descriptor s, - vector &vertexColor) { + decltype(make_small_color_map(NGHolder())) &colors) { // Begin with all vertices set to white, as DFV only marks visited // vertices. - fill(vertexColor.begin(), vertexColor.end(), boost::white_color); + colors.fill(small_color::white); - auto index_map = get(&NFAGraphVertexProps::index, g); - - depth_first_visit(g, s, make_dfs_visitor(boost::null_visitor()), - make_iterator_property_map(vertexColor.begin(), - index_map)); + depth_first_visit(g, s, make_dfs_visitor(boost::null_visitor()), colors); vector dead; // All non-special vertices that are still white can be removed. for (auto v : vertices_range(g)) { - u32 idx = g[v].index; - if (!is_special(v, g) && vertexColor[idx] == boost::white_color) { + if (!is_special(v, g) && get(colors, v) == small_color::white) { DEBUG_PRINTF("vertex %zu is unreachable from %zu\n", g[v].index, g[s].index); dead.push_back(NFAVertex(v)); @@ -143,11 +139,11 @@ bool pruneForwardUseless(NGHolder &h, const nfag_t &g, void pruneUseless(NGHolder &g, bool renumber) { DEBUG_PRINTF("pruning useless vertices\n"); assert(hasCorrectlyNumberedVertices(g)); - vector vertexColor(num_vertices(g)); + auto colors = make_small_color_map(g); - bool work_done = pruneForwardUseless(g, g, g.start, vertexColor); + bool work_done = pruneForwardUseless(g, g, g.start, colors); work_done |= pruneForwardUseless(g, reverse_graph(g), - g.acceptEod, vertexColor); + g.acceptEod, colors); if (!work_done) { return; From 8d178d52ef3bd82663882d2002bee185110c3f18 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 29 Jun 2017 11:33:03 +1000 Subject: [PATCH 078/190] ng_util: use small_color_map --- src/nfagraph/ng_util.cpp | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/src/nfagraph/ng_util.cpp b/src/nfagraph/ng_util.cpp index c0ad6199..197ac66d 100644 --- a/src/nfagraph/ng_util.cpp +++ b/src/nfagraph/ng_util.cpp @@ -39,6 +39,7 @@ #include "nfa/limex_limits.h" // for NFA_MAX_TOP_MASKS. #include "parser/position.h" #include "util/graph_range.h" +#include "util/graph_small_color_map.h" #include "util/make_unique.h" #include "util/order_check.h" #include "util/ue2string.h" @@ -52,7 +53,6 @@ #include using namespace std; -using boost::default_color_type; using boost::make_filtered_graph; using boost::make_assoc_property_map; @@ -226,15 +226,12 @@ bool isAcyclic(const NGHolder &g) { /** True if the graph has a cycle reachable from the given source vertex. */ bool hasReachableCycle(const NGHolder &g, NFAVertex src) { assert(hasCorrectlyNumberedVertices(g)); - vector colors(num_vertices(g)); try { // Use depth_first_visit, rather than depth_first_search, so that we // only search from src. - auto index_map = get(vertex_index, g); boost::depth_first_visit(g, src, DetectCycles(g), - make_iterator_property_map(colors.begin(), - index_map)); + make_small_color_map(g)); } catch (const CycleFound &) { return true; } @@ -353,24 +350,19 @@ vector getTopoOrdering(const NGHolder &g) { // Use the same colour map for both DFS and topological_sort below: avoids // having to reallocate it, etc. - const size_t num_verts = num_vertices(g); - vector colour(num_verts); + auto colors = make_small_color_map(g); using EdgeSet = ue2::unordered_set; EdgeSet backEdges; BackEdges be(backEdges); - auto index_map = get(vertex_index, g); - depth_first_search(g, visitor(be).root_vertex(g.start) - .color_map(make_iterator_property_map( - colour.begin(), index_map))); + depth_first_search(g, visitor(be).root_vertex(g.start).color_map(colors)); auto acyclic_g = make_filtered_graph(g, make_bad_edge_filter(&backEdges)); vector ordering; - ordering.reserve(num_verts); - topological_sort(acyclic_g, back_inserter(ordering), - color_map(make_iterator_property_map(colour.begin(), index_map))); + ordering.reserve(num_vertices(g)); + topological_sort(acyclic_g, back_inserter(ordering), color_map(colors)); reorderSpecials(g, ordering); @@ -379,7 +371,7 @@ vector getTopoOrdering(const NGHolder &g) { static void mustBeSetBefore_int(NFAVertex u, const NGHolder &g, - vector &vertexColor) { + decltype(make_small_color_map(NGHolder())) &colors) { set s; insert(&s, adjacent_vertices(u, g)); @@ -396,10 +388,8 @@ void mustBeSetBefore_int(NFAVertex u, const NGHolder &g, auto prefix = make_filtered_graph(g, make_bad_edge_filter(&dead)); - depth_first_visit( - prefix, g.start, make_dfs_visitor(boost::null_visitor()), - make_iterator_property_map(vertexColor.begin(), - get(vertex_index, g))); + depth_first_visit(prefix, g.start, make_dfs_visitor(boost::null_visitor()), + colors); } bool mustBeSetBefore(NFAVertex u, NFAVertex v, const NGHolder &g, @@ -412,14 +402,14 @@ bool mustBeSetBefore(NFAVertex u, NFAVertex v, const NGHolder &g, return cache.cache[key]; } - vector vertexColor(num_vertices(g)); - mustBeSetBefore_int(u, g, vertexColor); + auto colors = make_small_color_map(g); + mustBeSetBefore_int(u, g, colors); for (auto vi : vertices_range(g)) { auto key2 = make_pair(g[u].index, g[vi].index); DEBUG_PRINTF("adding %zu %zu\n", key2.first, key2.second); assert(!contains(cache.cache, key2)); - bool value = vertexColor[g[vi].index] == boost::white_color; + bool value = get(colors, vi) == small_color::white; cache.cache[key2] = value; assert(contains(cache.cache, key2)); } From 9c046db360f81606582e9c43920d2e1258fb08a6 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 29 Jun 2017 11:38:13 +1000 Subject: [PATCH 079/190] ng_util: make more use of small_color_map in DFS --- src/nfagraph/ng_util.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/nfagraph/ng_util.cpp b/src/nfagraph/ng_util.cpp index 197ac66d..14082407 100644 --- a/src/nfagraph/ng_util.cpp +++ b/src/nfagraph/ng_util.cpp @@ -214,8 +214,8 @@ bool isFloating(const NGHolder &g) { bool isAcyclic(const NGHolder &g) { try { - boost::depth_first_search(g, visitor(DetectCycles(g)) - .root_vertex(g.start)); + boost::depth_first_search(g, DetectCycles(g), make_small_color_map(g), + g.start); } catch (const CycleFound &) { return false; } @@ -243,7 +243,8 @@ bool hasBigCycles(const NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); set dead; BackEdges> backEdgeVisitor(dead); - boost::depth_first_search(g, visitor(backEdgeVisitor).root_vertex(g.start)); + boost::depth_first_search(g, backEdgeVisitor, make_small_color_map(g), + g.start); for (const auto &e : dead) { if (source(e, g) != target(e, g)) { From 90faea4ce98b9bdc2d3fdca4cd3b82e9a17832fd Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 29 Jun 2017 11:43:45 +1000 Subject: [PATCH 080/190] ng_depth: use small_color_map in findLoopReachable --- src/nfagraph/ng_depth.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/nfagraph/ng_depth.cpp b/src/nfagraph/ng_depth.cpp index 67a6b27b..aaa5166f 100644 --- a/src/nfagraph/ng_depth.cpp +++ b/src/nfagraph/ng_depth.cpp @@ -34,17 +34,18 @@ #include "ng_util.h" #include "ue2common.h" #include "util/graph_range.h" +#include "util/graph_small_color_map.h" #include #include +#include #include #include -#include #include +#include #include #include -#include #include using namespace std; @@ -137,13 +138,15 @@ vector findLoopReachable(const Graph &g, EdgeSet deadEdges; BackEdges be(deadEdges); - depth_first_search(g, visitor(be).root_vertex(src)); + auto colors = make_small_color_map(g); + + depth_first_search(g, be, colors, src); auto af = make_bad_edge_filter(&deadEdges); auto acyclic_g = make_filtered_graph(g, af); vector topoOrder; /* actually reverse topological order */ topoOrder.reserve(deadNodes.size()); - topological_sort(acyclic_g, back_inserter(topoOrder)); + topological_sort(acyclic_g, back_inserter(topoOrder), color_map(colors)); for (const auto &e : deadEdges) { size_t srcIdx = g[source(e, g)].index; From c0320b8cdc703ffcaadc298fe0d1e5431f636b09 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 30 Jun 2017 09:43:31 +1000 Subject: [PATCH 081/190] ng_depth: more use of small_color_map --- src/nfagraph/ng_depth.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/nfagraph/ng_depth.cpp b/src/nfagraph/ng_depth.cpp index aaa5166f..6c90326c 100644 --- a/src/nfagraph/ng_depth.cpp +++ b/src/nfagraph/ng_depth.cpp @@ -207,14 +207,16 @@ void calcDepthFromSource(const GraphT &g, visitor(make_bfs_visitor(record_distances( make_iterator_property_map(dMin.begin(), min_index_map), - boost::on_tree_edge())))); + boost::on_tree_edge()))) + .color_map(make_small_color_map(mindist_g))); auto max_index_map = get(vertex_index, maxdist_g); dag_shortest_paths(maxdist_g, srcVertex, distance_map(make_iterator_property_map(dMax.begin(), max_index_map)) - .weight_map(make_constant_property(-1))); + .weight_map(make_constant_property(-1)) + .color_map(make_small_color_map(maxdist_g))); for (size_t i = 0; i < numVerts; i++) { if (dMin[i] > DIST_UNREACHABLE) { From 0d1e441629b0ecd3d01c5110b77a6cf77d147abe Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 30 Jun 2017 11:15:39 +1000 Subject: [PATCH 082/190] cmake: add graph_range.h --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1d62b3c6..fc05f0f3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -997,6 +997,7 @@ SET (hs_SRCS src/util/fatbit_build.cpp src/util/fatbit_build.h src/util/graph.h + src/util/graph_range.h src/util/graph_small_color_map.h src/util/hash.h src/util/hash_dynamic_bitset.h From 35a42061f6d694db5cd1c97105d9267005abafd8 Mon Sep 17 00:00:00 2001 From: "Chang, Harry" Date: Thu, 29 Jun 2017 17:13:27 -0700 Subject: [PATCH 083/190] patch for invalid reading 1 byte in Reinforced Teddy, abandon fetching the first reinforced byte. --- src/fdr/teddy.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/fdr/teddy.c b/src/fdr/teddy.c index da5096a0..fd149016 100644 --- a/src/fdr/teddy.c +++ b/src/fdr/teddy.c @@ -479,7 +479,6 @@ do { \ a->buf, buf_end, \ a->buf_history, a->len_history, n_msk); \ m256 r_0 = PREP_CONF_FN_NO_REINFORCEMENT(val_0, n_msk); \ - c_0 = *(ptr + 31); \ r_0 = or256(r_0, p_mask); \ CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn); \ ptr += 32; \ From 21a4c8d4e2dbe815db971524c9b7d8c2bebdf72d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 3 Jul 2017 11:29:35 +1000 Subject: [PATCH 084/190] rdfa: move raw_dfa member functions to rdfa.cpp --- CMakeLists.txt | 1 + src/nfa/dfa_build_strat.cpp | 6 ++-- src/nfa/mcclellancompile.cpp | 27 ----------------- src/nfa/rdfa.cpp | 59 ++++++++++++++++++++++++++++++++++++ 4 files changed, 62 insertions(+), 31 deletions(-) create mode 100644 src/nfa/rdfa.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index fc05f0f3..c51d6133 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -735,6 +735,7 @@ SET (hs_SRCS src/nfa/nfa_build_util.h src/nfa/nfa_internal.h src/nfa/nfa_kind.h + src/nfa/rdfa.cpp src/nfa/rdfa.h src/nfa/rdfa_graph.cpp src/nfa/rdfa_graph.h diff --git a/src/nfa/dfa_build_strat.cpp b/src/nfa/dfa_build_strat.cpp index d4d418aa..b6b7a7fb 100644 --- a/src/nfa/dfa_build_strat.cpp +++ b/src/nfa/dfa_build_strat.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,11 +30,9 @@ namespace ue2 { -// prevent weak vtables for raw_report_info, dfa_build_strat and raw_dfa +// prevent weak vtables for raw_report_info, dfa_build_strat raw_report_info::~raw_report_info() {} dfa_build_strat::~dfa_build_strat() {} -raw_dfa::~raw_dfa() {} - } // namespace ue2 diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 43b555af..93746777 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -914,33 +914,6 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, info.extra[curr_id].shermanState = true; } -/* - * Calls accessible outside this module. - */ - -u16 raw_dfa::getImplAlphaSize() const { - return alpha_size - N_SPECIAL_SYMBOL; -} - -void raw_dfa::stripExtraEodReports(void) { - /* if a state generates a given report as a normal accept - then it does - * not also need to generate an eod report for it */ - for (dstate &ds : states) { - for (const ReportID &report : ds.reports) { - ds.reports_eod.erase(report); - } - } -} - -bool raw_dfa::hasEodReports(void) const { - for (const dstate &ds : states) { - if (!ds.reports_eod.empty()) { - return true; - } - } - return false; -} - static bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) { symbol_t alphasize = raw.getImplAlphaSize(); diff --git a/src/nfa/rdfa.cpp b/src/nfa/rdfa.cpp new file mode 100644 index 00000000..077ff9ed --- /dev/null +++ b/src/nfa/rdfa.cpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rdfa.h" + +namespace ue2 { + +// prevent weak vtables +raw_dfa::~raw_dfa() {} + +u16 raw_dfa::getImplAlphaSize() const { + return alpha_size - N_SPECIAL_SYMBOL; +} + +void raw_dfa::stripExtraEodReports(void) { + /* if a state generates a given report as a normal accept - then it does + * not also need to generate an eod report for it */ + for (dstate &ds : states) { + for (const ReportID &report : ds.reports) { + ds.reports_eod.erase(report); + } + } +} + +bool raw_dfa::hasEodReports(void) const { + for (const dstate &ds : states) { + if (!ds.reports_eod.empty()) { + return true; + } + } + return false; +} + +} // namespace ue2 From cdb281df423fb031c3e496e9cb9c0ca193f95393 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 3 Jul 2017 11:08:25 +1000 Subject: [PATCH 085/190] rose_build_convert: replace bind2nd with lambda std::bind2nd was deprecated in C++11. --- src/rose/rose_build_convert.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rose/rose_build_convert.cpp b/src/rose/rose_build_convert.cpp index 0c1f4338..f80e25cb 100644 --- a/src/rose/rose_build_convert.cpp +++ b/src/rose/rose_build_convert.cpp @@ -84,7 +84,7 @@ size_t suffixFloodLen(const ue2_literal &s) { const ue2_literal::elem &c = s.back(); auto it = find_if(s.rbegin(), s.rend(), - bind2nd(not_equal_to(), c)); + [&c](const ue2_literal::elem &e) { return e != c; }); return distance(s.rbegin(), it); } From e8f09aa8c6b4c507e1970bfc63bd052fb07864b3 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 26 Jun 2017 16:17:25 +1000 Subject: [PATCH 086/190] ng_violet: fail on added_count limit quicker - also fixes typo "splitForImplementability" - adds more detail in debug output --- src/nfagraph/ng_violet.cpp | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index 2e1171ab..d3303985 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -1198,7 +1198,8 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, const vector &to_cut, const vector &cut, const map> &cut_lits) { - DEBUG_PRINTF("splitting %s:\n", to_string(h.kind).c_str()); + DEBUG_PRINTF("splitting %s (%zu vertices)\n", to_string(h.kind).c_str(), + num_vertices(h)); /* create literal vertices and connect preds */ unordered_set done_sources; @@ -1233,7 +1234,9 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, renumber_vertices(*new_lhs); renumber_edges(*new_lhs); - DEBUG_PRINTF(" into lhs %s\n", to_string(new_lhs->kind).c_str()); + DEBUG_PRINTF(" into lhs %s (%zu vertices)\n", + to_string(new_lhs->kind).c_str(), + num_vertices(*new_lhs)); assert(hasCorrectlyNumberedVertices(*new_lhs)); assert(hasCorrectlyNumberedEdges(*new_lhs)); @@ -1301,8 +1304,9 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, remove_edge(new_rhs->start, new_rhs->accept, *new_rhs); remove_edge(new_rhs->start, new_rhs->acceptEod, *new_rhs); renumber_edges(*new_rhs); - DEBUG_PRINTF(" into rhs %s\n", - to_string(new_rhs->kind).c_str()); + DEBUG_PRINTF(" into rhs %s (%zu vertices)\n", + to_string(new_rhs->kind).c_str(), + num_vertices(*new_rhs)); done_rhs.emplace(adj, new_rhs); assert(isCorrectlyTopped(*new_rhs)); } @@ -2828,9 +2832,9 @@ bool doEarlyDfa(RoseBuild &rose, RoseInGraph &vg, NGHolder &h, #define MAX_EDGES_FOR_IMPLEMENTABILITY 50 static -bool splitForImplementabilty(RoseInGraph &vg, NGHolder &h, - const vector &edges, - const CompileContext &cc) { +bool splitForImplementability(RoseInGraph &vg, NGHolder &h, + const vector &edges, + const CompileContext &cc) { vector> succ_lits; DEBUG_PRINTF("trying to split %s with %zu vertices on %zu edges\n", to_string(h.kind).c_str(), num_vertices(h), edges.size()); @@ -2912,8 +2916,12 @@ bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, return false; } - if (splitForImplementabilty(vg, *h, edges_by_graph[h], cc)) { + if (splitForImplementability(vg, *h, edges_by_graph[h], cc)) { added_count++; + if (added_count > MAX_IMPLEMENTABLE_SPLITS) { + DEBUG_PRINTF("added_count hit limit\n"); + return false; + } changed = true; continue; } @@ -2921,9 +2929,7 @@ bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, return false; } - if (added_count > MAX_IMPLEMENTABLE_SPLITS) { - return false; - } + assert(added_count <= MAX_IMPLEMENTABLE_SPLITS); if (changed) { removeRedundantLiterals(vg, cc); From 2fba9bd16c3e380b38eaa915b40ef3361f424c97 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 26 Jun 2017 16:15:23 +1000 Subject: [PATCH 087/190] ng_mcclellan: reject determinise if NFA is too big --- src/nfagraph/ng_mcclellan.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/nfagraph/ng_mcclellan.cpp b/src/nfagraph/ng_mcclellan.cpp index 7bb8335c..ec8ae223 100644 --- a/src/nfagraph/ng_mcclellan.cpp +++ b/src/nfagraph/ng_mcclellan.cpp @@ -558,11 +558,16 @@ unique_ptr buildMcClellan(const NGHolder &graph, = (graph.kind == NFA_OUTFIX || finalChance) ? FINAL_DFA_STATE_LIMIT : DFA_STATE_LIMIT; - unique_ptr rdfa = ue2::make_unique(graph.kind); - const u32 numStates = num_vertices(graph); DEBUG_PRINTF("determinising nfa with %u vertices\n", numStates); + if (numStates > FINAL_DFA_STATE_LIMIT) { + DEBUG_PRINTF("rejecting nfa as too many vertices\n"); + return nullptr; + } + + auto rdfa = ue2::make_unique(graph.kind); + if (numStates <= NFA_STATE_LIMIT) { /* Fast path. Automaton_Graph uses a bitfield internally to represent * states and is quicker than Automaton_Big. */ From b454ab64848a9850ba87c310539338a3643f2aff Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 4 Jul 2017 13:22:09 +1000 Subject: [PATCH 088/190] ng_prefilter.cpp: remove interior edges first This allows us to avoid looking them up while we're in copyOutEdges(), halves time on large cases. --- src/nfagraph/ng_prefilter.cpp | 37 +++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/src/nfagraph/ng_prefilter.cpp b/src/nfagraph/ng_prefilter.cpp index 3cd9d06d..64d4cf2f 100644 --- a/src/nfagraph/ng_prefilter.cpp +++ b/src/nfagraph/ng_prefilter.cpp @@ -213,27 +213,17 @@ map findRegionInfo(const NGHolder &h, } static -void copyInEdges(NGHolder &g, NFAVertex from, NFAVertex to, - const ue2::unordered_set &rverts) { +void copyInEdges(NGHolder &g, NFAVertex from, NFAVertex to) { for (const auto &e : in_edges_range(from, g)) { NFAVertex u = source(e, g); - if (contains(rverts, u)) { - continue; - } - add_edge_if_not_present(u, to, g[e], g); } } static -void copyOutEdges(NGHolder &g, NFAVertex from, NFAVertex to, - const ue2::unordered_set &rverts) { +void copyOutEdges(NGHolder &g, NFAVertex from, NFAVertex to) { for (const auto &e : out_edges_range(from, g)) { NFAVertex t = target(e, g); - if (contains(rverts, t)) { - continue; - } - add_edge_if_not_present(to, t, g[e], g); if (is_any_accept(t, g)) { @@ -243,6 +233,21 @@ void copyOutEdges(NGHolder &g, NFAVertex from, NFAVertex to, } } +static +void removeInteriorEdges(NGHolder &g, const RegionInfo &ri) { + // Set of vertices in region, for quick lookups. + const unordered_set rverts(ri.vertices.begin(), + ri.vertices.end()); + + auto is_interior_in_edge = [&](const NFAEdge &e) { + return contains(rverts, source(e, g)); + }; + + for (auto v : ri.vertices) { + remove_in_edge_if(v, is_interior_in_edge, g); + } +} + static void replaceRegion(NGHolder &g, const RegionInfo &ri, size_t *verticesAdded, size_t *verticesRemoved) { @@ -284,19 +289,17 @@ void replaceRegion(NGHolder &g, const RegionInfo &ri, add_edge(verts.back(), verts.back(), g); } - // Set of vertices in region, for quick lookups. - const ue2::unordered_set rverts(ri.vertices.begin(), - ri.vertices.end()); + removeInteriorEdges(g, ri); for (size_t i = 0; i < replacementSize; i++) { NFAVertex v_new = verts[i]; for (auto v_old : ri.vertices) { if (i == 0) { - copyInEdges(g, v_old, v_new, rverts); + copyInEdges(g, v_old, v_new); } if (i + 1 >= ri.minWidth) { - copyOutEdges(g, v_old, v_new, rverts); + copyOutEdges(g, v_old, v_new); } } } From 482e1ef931d509e2c7f91fecc341a3d88fe46fd6 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 5 Jul 2017 11:00:39 +1000 Subject: [PATCH 089/190] Ensure that reports would be sustained after self loop is removed Approximante matching means that is now possible to get a non-standard report on a cyclic during edge redundancy passes which means checks are now needed. --- src/nfagraph/ng_edge_redundancy.cpp | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/src/nfagraph/ng_edge_redundancy.cpp b/src/nfagraph/ng_edge_redundancy.cpp index 3ce62c41..1578d2e4 100644 --- a/src/nfagraph/ng_edge_redundancy.cpp +++ b/src/nfagraph/ng_edge_redundancy.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -181,6 +181,28 @@ bool removeEdgeRedundancyNearCyclesFwd(NGHolder &g, bool ignore_starts) { return dead_count; } +static +bool checkReportsRev(const NGHolder &g, NFAVertex v, + const set &happy) { + if (g[v].reports.empty()) { + return true; + } + + assert(edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second); + + /* an edge to accept takes priority over eod only accept */ + NFAVertex accept = edge(v, g.accept, g).second ? g.accept : g.acceptEod; + + flat_set happy_reports; + for (NFAVertex u : happy) { + if (edge(u, accept, g).second) { + insert(&happy_reports, g[u].reports); + } + } + + return is_subset_of(g[v].reports, happy_reports); +} + /** \brief Redundant self-loop removal (reverse version). * * A self loop on a vertex v can be removed if: @@ -233,7 +255,8 @@ bool removeEdgeRedundancyNearCyclesRev(NGHolder &g) { happy.insert(u); } - if (!happy.empty() && checkVerticesRev(g, sad, happy)) { + if (!happy.empty() && checkVerticesRev(g, sad, happy) + && checkReportsRev(g, v, happy)) { dead_count++; remove_edge(v, v, g); } From ebb1b0006bed07df18db326ff74a58e5e5d0e528 Mon Sep 17 00:00:00 2001 From: "Wang, Xiang W" Date: Thu, 6 Jul 2017 12:23:41 -0400 Subject: [PATCH 090/190] remove start argument in literal matcher callbacks --- src/fdr/fdr_confirm_runtime.h | 2 +- src/fdr/fdr_internal.h | 1 - src/fdr/flood_compile.cpp | 1 - src/fdr/flood_runtime.h | 68 ++++++++++++--------------- src/hwlm/hwlm.h | 9 ++-- src/hwlm/noodle_build.cpp | 7 ++- src/hwlm/noodle_engine.c | 18 +++----- src/hwlm/noodle_internal.h | 1 - src/rose/block.c | 6 +-- src/rose/catchup.c | 5 +- src/rose/match.c | 51 +++++++++------------ src/rose/match.h | 9 ++-- src/rose/program_runtime.c | 8 ++-- src/rose/program_runtime.h | 5 +- src/rose/rose.h | 4 +- src/rose/stream.c | 4 +- unit/internal/fdr.cpp | 86 ++++++++++++++++------------------- unit/internal/fdr_flood.cpp | 24 +++------- unit/internal/noodle.cpp | 27 ++--------- 19 files changed, 131 insertions(+), 205 deletions(-) diff --git a/src/fdr/fdr_confirm_runtime.h b/src/fdr/fdr_confirm_runtime.h index d75408f4..557873b7 100644 --- a/src/fdr/fdr_confirm_runtime.h +++ b/src/fdr/fdr_confirm_runtime.h @@ -88,7 +88,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a } *last_match = li->id; - *control = a->cb(loc - buf, i, li->id, a->ctxt); + *control = a->cb(i, li->id, a->ctxt); out: oldNext = li->next; // oldNext is either 0 or an 'adjust' value li++; diff --git a/src/fdr/fdr_internal.h b/src/fdr/fdr_internal.h index 2315b2d8..41470997 100644 --- a/src/fdr/fdr_internal.h +++ b/src/fdr/fdr_internal.h @@ -56,7 +56,6 @@ struct FDRFlood { u32 ids[FDR_FLOOD_MAX_IDS]; //!< the ids hwlm_group_t groups[FDR_FLOOD_MAX_IDS]; //!< group ids to go with string ids - u32 len[FDR_FLOOD_MAX_IDS]; //!< lengths to go with the string ids }; /** \brief FDR structure. diff --git a/src/fdr/flood_compile.cpp b/src/fdr/flood_compile.cpp index 7dcc17d1..6304ab2f 100644 --- a/src/fdr/flood_compile.cpp +++ b/src/fdr/flood_compile.cpp @@ -82,7 +82,6 @@ void addFlood(vector &tmpFlood, u8 c, const hwlmLiteral &lit, fl.ids[fl.idCount] = lit.id; fl.allGroups |= lit.groups; fl.groups[fl.idCount] = lit.groups; - fl.len[fl.idCount] = suffix; // when idCount gets to max_ids this flood no longer happens // only incremented one more time to avoid arithmetic overflow DEBUG_PRINTF("Added Flood for char '%c' suffix=%u len[%hu]=%u\n", diff --git a/src/fdr/flood_runtime.h b/src/fdr/flood_runtime.h index d3f6b3b2..93079afb 100644 --- a/src/fdr/flood_runtime.h +++ b/src/fdr/flood_runtime.h @@ -196,120 +196,110 @@ const u8 * floodDetect(const struct FDR * fdr, for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 4) { DEBUG_PRINTF("aaa %u %llx\n", t, fl->groups[0]); - u32 len0 = fl->len[0] - 1; if (*control & fl->groups[0]) { - *control = cb(i + t + 0 - len0, i + t + 0, fl->ids[0], ctxt); + *control = cb(i + t + 0, fl->ids[0], ctxt); } if (*control & fl->groups[0]) { - *control = cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt); + *control = cb(i + t + 1, fl->ids[0], ctxt); } if (*control & fl->groups[0]) { - *control = cb(i + t + 2 - len0, i + t + 2, fl->ids[0], ctxt); + *control = cb(i + t + 2, fl->ids[0], ctxt); } if (*control & fl->groups[0]) { - *control = cb(i + t + 3 - len0, i + t + 3, fl->ids[0], ctxt); + *control = cb(i + t + 3, fl->ids[0], ctxt); } } break; case 2: for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 4) { - u32 len0 = fl->len[0] - 1; - u32 len1 = fl->len[1] - 1; if (*control & fl->groups[0]) { - *control = cb(i + t - len0, i + t, fl->ids[0], ctxt); + *control = cb(i + t, fl->ids[0], ctxt); } if (*control & fl->groups[1]) { - *control = cb(i + t - len1, i + t, fl->ids[1], ctxt); + *control = cb(i + t, fl->ids[1], ctxt); } if (*control & fl->groups[0]) { *control = - cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt); + cb(i + t + 1, fl->ids[0], ctxt); } if (*control & fl->groups[1]) { - *control = cb(i + t + 1 - len1, i + t + 1, fl->ids[1], ctxt); + *control = cb(i + t + 1, fl->ids[1], ctxt); } if (*control & fl->groups[0]) { - *control = cb(i + t + 2 - len0, i + t + 2, fl->ids[0], ctxt); + *control = cb(i + t + 2, fl->ids[0], ctxt); } if (*control & fl->groups[1]) { - *control = cb(i + t + 2 - len1, i + t + 2, fl->ids[1], ctxt); + *control = cb(i + t + 2, fl->ids[1], ctxt); } if (*control & fl->groups[0]) { - *control = cb(i + t + 3 - len0, i + t + 3, fl->ids[0], ctxt); + *control = cb(i + t + 3, fl->ids[0], ctxt); } if (*control & fl->groups[1]) { - *control = cb(i + t + 3 - len1, i + t + 3, fl->ids[1], ctxt); + *control = cb(i + t + 3, fl->ids[1], ctxt); } } break; case 3: for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) { - u32 len0 = fl->len[0] - 1; - u32 len1 = fl->len[1] - 1; - u32 len2 = fl->len[2] - 1; if (*control & fl->groups[0]) { - *control = cb(i + t - len0, i + t, fl->ids[0], ctxt); + *control = cb(i + t, fl->ids[0], ctxt); } if (*control & fl->groups[1]) { - *control = cb(i + t - len1, i + t, fl->ids[1], ctxt); + *control = cb(i + t, fl->ids[1], ctxt); } if (*control & fl->groups[2]) { - *control = cb(i + t - len2, i + t, fl->ids[2], ctxt); + *control = cb(i + t, fl->ids[2], ctxt); } if (*control & fl->groups[0]) { - *control = cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt); + *control = cb(i + t + 1, fl->ids[0], ctxt); } if (*control & fl->groups[1]) { - *control = cb(i + t + 1 - len1, i + t + 1, fl->ids[1], ctxt); + *control = cb(i + t + 1, fl->ids[1], ctxt); } if (*control & fl->groups[2]) { - *control = cb(i + t + 1 - len2, i + t + 1, fl->ids[2], ctxt); + *control = cb(i + t + 1, fl->ids[2], ctxt); } } break; default: // slow generalized loop for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) { - u32 len0 = fl->len[0] - 1; - u32 len1 = fl->len[1] - 1; - u32 len2 = fl->len[2] - 1; - u32 len3 = fl->len[3] - 1; if (*control & fl->groups[0]) { - *control = cb(i + t - len0, i + t, fl->ids[0], ctxt); + *control = cb(i + t, fl->ids[0], ctxt); } if (*control & fl->groups[1]) { - *control = cb(i + t - len1, i + t, fl->ids[1], ctxt); + *control = cb(i + t, fl->ids[1], ctxt); } if (*control & fl->groups[2]) { - *control = cb(i + t - len2, i + t, fl->ids[2], ctxt); + *control = cb(i + t, fl->ids[2], ctxt); } if (*control & fl->groups[3]) { - *control = cb(i + t - len3, i + t, fl->ids[3], ctxt); + *control = cb(i + t, fl->ids[3], ctxt); } for (u32 t2 = 4; t2 < fl->idCount; t2++) { if (*control & fl->groups[t2]) { - *control = cb(i + t - (fl->len[t2] - 1), i + t, fl->ids[t2], ctxt); + *control = cb(i + t, fl->ids[t2], ctxt); } } if (*control & fl->groups[0]) { - *control = cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt); + *control = cb(i + t + 1, fl->ids[0], ctxt); } if (*control & fl->groups[1]) { - *control = cb(i + t + 1 - len1, i + t + 1, fl->ids[1], ctxt); + *control = cb(i + t + 1, fl->ids[1], ctxt); } if (*control & fl->groups[2]) { - *control = cb(i + t + 1 - len2, i + t + 1, fl->ids[2], ctxt); + *control = cb(i + t + 1, fl->ids[2], ctxt); } if (*control & fl->groups[3]) { - *control = cb(i + t + 1 - len3, i + t + 1, fl->ids[3], ctxt); + *control = cb(i + t + 1, fl->ids[3], ctxt); } for (u32 t2 = 4; t2 < fl->idCount; t2++) { if (*control & fl->groups[t2]) { - *control = cb(i + t + 1 - (fl->len[t2] - 1), i + t + 1, fl->ids[t2], ctxt); + *control = cb(i + t + 1, fl->ids[t2], ctxt); } } } @@ -320,7 +310,7 @@ const u8 * floodDetect(const struct FDR * fdr, for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t++) { for (u32 t2 = 0; t2 < fl->idCount; t2++) { if (*control & fl->groups[t2]) { - *control = cb(i + t - (fl->len[t2] - 1), i + t, fl->ids[t2], ctxt); + *control = cb(i + t, fl->ids[t2], ctxt); } } } diff --git a/src/hwlm/hwlm.h b/src/hwlm/hwlm.h index 00561346..92d4bfdb 100644 --- a/src/hwlm/hwlm.h +++ b/src/hwlm/hwlm.h @@ -79,9 +79,9 @@ struct HWLM; /** \brief The type for an HWLM callback. * - * This callback receives a start-of-match offset, an end-of-match offset, the - * ID of the match and the context pointer that was passed into \ref - * hwlmExec or \ref hwlmExecStreaming. + * This callback receives an end-of-match offset, the ID of the match and + * the context pointer that was passed into \ref hwlmExec or + * \ref hwlmExecStreaming. * * A callback return of \ref HWLM_TERMINATE_MATCHING will stop matching. * @@ -95,8 +95,7 @@ struct HWLM; * belonging to the literal which was active at the when the end match location * was first reached. */ -typedef hwlmcb_rv_t (*HWLMCallback)(size_t start, size_t end, u32 id, - void *context); +typedef hwlmcb_rv_t (*HWLMCallback)(size_t end, u32 id, void *context); /** \brief Match strings in table. * diff --git a/src/hwlm/noodle_build.cpp b/src/hwlm/noodle_build.cpp index 4a6ac8d7..a0128d0a 100644 --- a/src/hwlm/noodle_build.cpp +++ b/src/hwlm/noodle_build.cpp @@ -121,9 +121,8 @@ bytecode_ptr noodBuildTable(const hwlmLiteral &lit) { size_t key_offset = findNoodFragOffset(lit); n->id = lit.id; - n->lit_len = s.length(); n->single = s.length() == 1 ? 1 : 0; - n->key_offset = verify_u8(n->lit_len - key_offset); + n->key_offset = verify_u8(s.length() - key_offset); n->nocase = lit.nocase ? 1 : 0; n->key0 = s[key_offset]; if (n->single) { @@ -151,12 +150,12 @@ namespace ue2 { void noodPrintStats(const noodTable *n, FILE *f) { fprintf(f, "Noodle table\n"); - fprintf(f, "Len: %u Key Offset: %u\n", n->lit_len, n->key_offset); + fprintf(f, "Key Offset: %u\n", n->key_offset); fprintf(f, "Msk: %llx Cmp: %llx MskLen %u\n", n->msk >> 8 * (8 - n->msk_len), n->cmp >> 8 * (8 - n->msk_len), n->msk_len); fprintf(f, "String: "); - for (u32 i = n->msk_len - n->lit_len; i < n->msk_len; i++) { + for (u32 i = 0; i < n->msk_len; i++) { const u8 *m = (const u8 *)&n->cmp; if (isgraph(m[i]) && m[i] != '\\') { fprintf(f, "%c", m[i]); diff --git a/src/hwlm/noodle_engine.c b/src/hwlm/noodle_engine.c index f0d711b2..009c4b98 100644 --- a/src/hwlm/noodle_engine.c +++ b/src/hwlm/noodle_engine.c @@ -128,10 +128,8 @@ hwlm_error_t final(const struct noodTable *n, const u8 *buf, UNUSED size_t len, match: pos -= cbi->offsetAdj; - DEBUG_PRINTF("match @ %zu->%zu\n", pos + n->key_offset - n->lit_len, - pos + n->key_offset); - hwlmcb_rv_t rv = cbi->cb(pos + n->key_offset - n->lit_len, - pos + n->key_offset - 1, cbi->id, cbi->ctx); + DEBUG_PRINTF("match @ %zu\n", pos + n->key_offset); + hwlmcb_rv_t rv = cbi->cb(pos + n->key_offset - 1, cbi->id, cbi->ctx); if (rv == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATED; } @@ -377,8 +375,8 @@ hwlm_error_t noodExec(const struct noodTable *n, const u8 *buf, size_t len, assert(n && buf); struct cb_info cbi = {cb, n->id, ctxt, 0}; - DEBUG_PRINTF("nood scan of %zu bytes for %*s @ %p\n", len, n->lit_len, - (const char *)&n->cmp + n->msk_len - n->lit_len, buf); + DEBUG_PRINTF("nood scan of %zu bytes for %*s @ %p\n", len, n->msk_len, + (const char *)&n->cmp, buf); return scan(n, buf, len, start, n->single, n->nocase, &cbi); } @@ -396,8 +394,7 @@ hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf, struct cb_info cbi = {cb, n->id, ctxt, 0}; DEBUG_PRINTF("nood scan of %zu bytes (%zu hlen) for %*s @ %p\n", len, hlen, - n->lit_len, (const char *)&n->cmp + n->msk_len - n->lit_len, - buf); + n->msk_len, (const char *)&n->cmp, buf); if (hlen && n->msk_len > 1) { /* @@ -427,9 +424,8 @@ hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf, u64a v = unaligned_load_u64a(temp_buf + i); if ((v & n->msk) == n->cmp) { size_t m_end = -tl1 + i + n->msk_len - 1; - size_t m_start = m_end - n->lit_len; - DEBUG_PRINTF("match @ %zu->%zu (i %zu)\n", m_start, m_end, i); - hwlmcb_rv_t rv = cb(m_start, m_end, n->id, ctxt); + DEBUG_PRINTF("match @ %zu (i %zu)\n", m_end, i); + hwlmcb_rv_t rv = cb(m_end, n->id, ctxt); if (rv == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATED; } diff --git a/src/hwlm/noodle_internal.h b/src/hwlm/noodle_internal.h index bfb1a9e2..8f76f177 100644 --- a/src/hwlm/noodle_internal.h +++ b/src/hwlm/noodle_internal.h @@ -39,7 +39,6 @@ struct noodTable { u32 id; u64a msk; u64a cmp; - u8 lit_len; u8 msk_len; u8 key_offset; u8 nocase; diff --git a/src/rose/block.c b/src/rose/block.c index fc72c6e9..2c493219 100644 --- a/src/rose/block.c +++ b/src/rose/block.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -178,13 +178,11 @@ void roseBlockEodExec(const struct RoseEngine *t, u64a offset, assert(!scratch->tctxt.filledDelayedSlots); const u64a som = 0; - const size_t match_len = 0; const u8 flags = ROSE_PROG_FLAG_SKIP_MPV_CATCHUP; // Note: we ignore the result, as this is the last thing to ever happen on // a scan. - roseRunProgram(t, scratch, t->eodProgramOffset, som, offset, match_len, - flags); + roseRunProgram(t, scratch, t->eodProgramOffset, som, offset, flags); } /** diff --git a/src/rose/catchup.c b/src/rose/catchup.c index 82537241..9e36d091 100644 --- a/src/rose/catchup.c +++ b/src/rose/catchup.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -51,13 +51,12 @@ static really_inline int roseNfaRunProgram(const struct RoseEngine *rose, struct hs_scratch *scratch, u64a som, u64a offset, ReportID id, const char from_mpv) { const u32 program = id; - const size_t match_len = 0; // Unused in this path. u8 flags = ROSE_PROG_FLAG_IN_CATCHUP; if (from_mpv) { flags |= ROSE_PROG_FLAG_FROM_MPV; } - roseRunProgram(rose, scratch, program, som, offset, match_len, flags); + roseRunProgram(rose, scratch, program, som, offset, flags); return can_stop_matching(scratch) ? MO_HALT_MATCHING : MO_CONTINUE_MATCHING; } diff --git a/src/rose/match.c b/src/rose/match.c index daf81eac..91e045a5 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -66,8 +66,7 @@ void printMatch(const struct core_info *ci, u64a start, u64a end) { } #endif -hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, - void *ctx) { +hwlmcb_rv_t roseDelayRebuildCallback(size_t end, u32 id, void *ctx) { struct hs_scratch *scratch = ctx; struct RoseContext *tctx = &scratch->tctxt; struct core_info *ci = &scratch->core_info; @@ -77,9 +76,9 @@ hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, u64a real_end = ci->buf_offset - rb_len + end + 1; // index after last byte #ifdef DEBUG - DEBUG_PRINTF("REBUILD MATCH id=%u offsets=[%llu,%llu]: ", id, - start + ci->buf_offset - rb_len, real_end); - printMatch(ci, start + ci->buf_offset - rb_len, real_end); + DEBUG_PRINTF("REBUILD MATCH id=%u end offset@%llu]: ", id, real_end); + u64a start = real_end < 8 ? 1 : real_end - 7; + printMatch(ci, start, real_end); printf("\n"); #endif @@ -87,10 +86,9 @@ hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, assert(id && id < t->size); // id is a program offset const u64a som = 0; - const size_t match_len = end - start + 1; const u8 flags = 0; UNUSED hwlmcb_rv_t rv = - roseRunProgram(t, scratch, id, som, real_end, match_len, flags); + roseRunProgram(t, scratch, id, som, real_end, flags); assert(rv != HWLM_TERMINATE_MATCHING); /* we are just repopulating the delay queue, groups should be @@ -200,8 +198,6 @@ int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx) { return MO_HALT_MATCHING; } - const size_t match_len = 0; - /* delayed literals need to be delivered before real literals; however * delayed literals only come from the floating table so if we are going * to deliver a literal here it must be too early for a delayed literal */ @@ -216,8 +212,8 @@ int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx) { // Note that the "id" we have been handed is the program offset. const u8 flags = ROSE_PROG_FLAG_IN_ANCHORED; - if (roseRunProgram(t, scratch, id, start, real_end, match_len, - flags) == HWLM_TERMINATE_MATCHING) { + if (roseRunProgram(t, scratch, id, start, real_end, flags) + == HWLM_TERMINATE_MATCHING) { assert(can_stop_matching(scratch)); DEBUG_PRINTF("caller requested termination\n"); return MO_HALT_MATCHING; @@ -237,12 +233,12 @@ int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx) { static really_inline hwlmcb_rv_t roseProcessMatchInline(const struct RoseEngine *t, struct hs_scratch *scratch, u64a end, - size_t match_len, u32 id) { + u32 id) { DEBUG_PRINTF("id=%u\n", id); assert(id && id < t->size); // id is an offset into bytecode const u64a som = 0; const u8 flags = 0; - return roseRunProgram_i(t, scratch, id, som, end, match_len, flags); + return roseRunProgram_i(t, scratch, id, som, end, flags); } static rose_inline @@ -274,7 +270,7 @@ hwlmcb_rv_t playDelaySlot(const struct RoseEngine *t, const u64a som = 0; const u8 flags = 0; hwlmcb_rv_t rv = roseRunProgram(t, scratch, programs[it], som, offset, - 0, flags); + flags); DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups); /* delayed literals can't safely set groups. @@ -311,7 +307,7 @@ hwlmcb_rv_t flushAnchoredLiteralAtLoc(const struct RoseEngine *t, const u64a som = 0; const u8 flags = 0; hwlmcb_rv_t rv = roseRunProgram(t, scratch, programs[it], som, curr_loc, - 0, flags); + flags); DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups); /* anchored literals can't safely set groups. @@ -476,7 +472,7 @@ anchored_leftovers:; } static really_inline -hwlmcb_rv_t roseCallback_i(size_t start, size_t end, u32 id, void *ctxt) { +hwlmcb_rv_t roseCallback_i(size_t end, u32 id, void *ctxt) { struct hs_scratch *scratch = ctxt; struct RoseContext *tctx = &scratch->tctxt; const struct RoseEngine *t = scratch->core_info.rose; @@ -484,9 +480,9 @@ hwlmcb_rv_t roseCallback_i(size_t start, size_t end, u32 id, void *ctxt) { u64a real_end = end + tctx->lit_offset_adjust; #if defined(DEBUG) - DEBUG_PRINTF("MATCH id=%u offsets=[%llu,%llu]: ", id, - start + tctx->lit_offset_adjust, real_end); - printMatch(&scratch->core_info, start + tctx->lit_offset_adjust, real_end); + DEBUG_PRINTF("MATCH id=%u end offset@%llu: ", id, real_end); + u64a start = real_end < 8 ? 1 : real_end - 7; + printMatch(&scratch->core_info, start, real_end); printf("\n"); #endif DEBUG_PRINTF("last end %llu\n", tctx->lastEndOffset); @@ -510,8 +506,7 @@ hwlmcb_rv_t roseCallback_i(size_t start, size_t end, u32 id, void *ctxt) { return HWLM_TERMINATE_MATCHING; } - size_t match_len = end - start + 1; - rv = roseProcessMatchInline(t, scratch, real_end, match_len, id); + rv = roseProcessMatchInline(t, scratch, real_end, id); DEBUG_PRINTF("DONE groups=0x%016llx\n", tctx->groups); @@ -524,15 +519,15 @@ hwlmcb_rv_t roseCallback_i(size_t start, size_t end, u32 id, void *ctxt) { return HWLM_TERMINATE_MATCHING; } -hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctxt) { - return roseCallback_i(start, end, id, ctxt); +hwlmcb_rv_t roseCallback(size_t end, u32 id, void *ctxt) { + return roseCallback_i(end, id, ctxt); } -hwlmcb_rv_t roseFloatingCallback(size_t start, size_t end, u32 id, void *ctxt) { +hwlmcb_rv_t roseFloatingCallback(size_t end, u32 id, void *ctxt) { struct hs_scratch *scratch = ctxt; const struct RoseEngine *t = scratch->core_info.rose; - return roseCallback_i(start, end, id, ctxt) & t->floating_group_mask; + return roseCallback_i(end, id, ctxt) & t->floating_group_mask; } /** @@ -567,10 +562,9 @@ int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program, scratch->tctxt.minMatchOffset = stream_offset; const u64a som = 0; - const size_t match_len = 0; const u8 flags = 0; hwlmcb_rv_t rv = roseRunProgram(rose, scratch, program, som, stream_offset, - match_len, flags); + flags); if (rv == HWLM_TERMINATE_MATCHING) { return MO_HALT_MATCHING; } @@ -588,10 +582,9 @@ int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context) { // Our match ID is the program offset. const u32 program = id; - const size_t match_len = 0; // Unused in this path. const u8 flags = ROSE_PROG_FLAG_SKIP_MPV_CATCHUP; hwlmcb_rv_t rv = - roseRunProgram(rose, scratch, program, start, end, match_len, flags); + roseRunProgram(rose, scratch, program, start, end, flags); if (rv == HWLM_TERMINATE_MATCHING) { return MO_HALT_MATCHING; } diff --git a/src/rose/match.h b/src/rose/match.h index b69ff158..7cd0541d 100644 --- a/src/rose/match.h +++ b/src/rose/match.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -52,10 +52,9 @@ int roseNfaAdaptor(u64a start, u64a end, ReportID id, void *context); /* Callbacks, defined in match.c */ -hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctx); -hwlmcb_rv_t roseFloatingCallback(size_t start, size_t end, u32 id, void *ctx); -hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, - void *ctx); +hwlmcb_rv_t roseCallback(size_t end, u32 id, void *ctx); +hwlmcb_rv_t roseFloatingCallback(size_t end, u32 id, void *ctx); +hwlmcb_rv_t roseDelayRebuildCallback(size_t end, u32 id, void *ctx); int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx); /* Common code, used all over Rose runtime */ diff --git a/src/rose/program_runtime.c b/src/rose/program_runtime.c index 23532d40..2f2a6aa3 100644 --- a/src/rose/program_runtime.c +++ b/src/rose/program_runtime.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -43,8 +43,6 @@ int roseNfaEarliestSom(u64a start, UNUSED u64a end, UNUSED ReportID id, hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, struct hs_scratch *scratch, u32 programOffset, - u64a som, u64a end, size_t match_len, - u8 prog_flags) { - return roseRunProgram_i(t, scratch, programOffset, som, end, match_len, - prog_flags); + u64a som, u64a end, u8 prog_flags) { + return roseRunProgram_i(t, scratch, programOffset, som, end, prog_flags); } diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index b140a2bc..83a34a39 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -69,7 +69,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, struct hs_scratch *scratch, u32 programOffset, - u64a som, u64a end, size_t match_len, u8 prog_flags); + u64a som, u64a end, u8 prog_flags); /* Inline implementation follows. */ @@ -1838,8 +1838,7 @@ void updateSeqPoint(struct RoseContext *tctxt, u64a offset, static rose_inline hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, struct hs_scratch *scratch, u32 programOffset, - u64a som, u64a end, UNUSED size_t match_len, - u8 prog_flags) { + u64a som, u64a end, u8 prog_flags) { DEBUG_PRINTF("program=%u, offsets [%llu,%llu], flags=%u\n", programOffset, som, end, prog_flags); diff --git a/src/rose/rose.h b/src/rose/rose.h index 9a50f0e9..568c2b40 100644 --- a/src/rose/rose.h +++ b/src/rose/rose.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -46,7 +46,7 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch); void roseStreamEodExec(const struct RoseEngine *t, u64a offset, struct hs_scratch *scratch); -hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *context); +hwlmcb_rv_t roseCallback(size_t end, u32 id, void *context); int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context); diff --git a/src/rose/stream.c b/src/rose/stream.c index c68cd8ab..1ee0b6d5 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -742,11 +742,9 @@ void roseStreamEodExec(const struct RoseEngine *t, u64a offset, assert(!scratch->tctxt.filledDelayedSlots); const u64a som = 0; - const size_t match_len = 0; const u8 flags = ROSE_PROG_FLAG_SKIP_MPV_CATCHUP; // Note: we ignore the result, as this is the last thing to ever happen on // a scan. - roseRunProgram(t, scratch, t->eodProgramOffset, som, offset, match_len, - flags); + roseRunProgram(t, scratch, t->eodProgramOffset, som, offset, flags); } diff --git a/unit/internal/fdr.cpp b/unit/internal/fdr.cpp index bd0bb4c0..aa14e5d9 100644 --- a/unit/internal/fdr.cpp +++ b/unit/internal/fdr.cpp @@ -70,53 +70,43 @@ using namespace ue2; namespace { struct match { - size_t start; size_t end; u32 id; - match(size_t start_in, size_t end_in, u32 id_in) - : start(start_in), end(end_in), id(id_in) {} + match(size_t end_in, u32 id_in) + : end(end_in), id(id_in) {} bool operator==(const match &b) const { - return start == b.start && end == b.end && id == b.id; + return end == b.end && id == b.id; } bool operator<(const match &b) const { - if (id < b.id) { - return true; - } else if (id == b.id) { - if (start < b.start) { - return true; - } else if (start == b.start) { - return end < b.end; - } - } - return false; + return tie(id, end) < tie(b.id, b.end); } match operator+(size_t adj) { - return match(start + adj, end + adj, id); + return match(end + adj, id); } }; extern "C" { static -hwlmcb_rv_t decentCallback(size_t start, size_t end, u32 id, void *ctxt) { - DEBUG_PRINTF("match %zu-%zu : %u\n", start, end, id); +hwlmcb_rv_t decentCallback(size_t end, u32 id, void *ctxt) { + DEBUG_PRINTF("match @%zu : %u\n", end, id); if (!ctxt) { return HWLM_CONTINUE_MATCHING; } vector *out = (vector *)ctxt; - out->push_back(match(start, end, id)); + out->push_back(match(end, id)); return HWLM_CONTINUE_MATCHING; } static -hwlmcb_rv_t decentCallbackT(size_t start, size_t end, u32 id, void *ctxt) { +hwlmcb_rv_t decentCallbackT(size_t end, u32 id, void *ctxt) { if (!ctxt) { return HWLM_TERMINATE_MATCHING; } vector *out = (vector *)ctxt; - out->push_back(match(start, end, id)); + out->push_back(match(end, id)); return HWLM_TERMINATE_MATCHING; } @@ -169,9 +159,9 @@ TEST_P(FDRp, Simple) { &matches, HWLM_ALL_GROUPS); ASSERT_EQ(3U, matches.size()); - EXPECT_EQ(match(0, 5, 0), matches[0]); - EXPECT_EQ(match(18, 23, 0), matches[1]); - EXPECT_EQ(match(78, 83, 0), matches[2]); + EXPECT_EQ(match(5, 0), matches[0]); + EXPECT_EQ(match(23, 0), matches[1]); + EXPECT_EQ(match(83, 0), matches[2]); } TEST_P(FDRp, SimpleSingle) { @@ -191,10 +181,10 @@ TEST_P(FDRp, SimpleSingle) { decentCallback, &matches, HWLM_ALL_GROUPS); ASSERT_EQ(4U, matches.size()); - EXPECT_EQ(match(0, 0, 0), matches[0]); - EXPECT_EQ(match(18, 18, 0), matches[1]); - EXPECT_EQ(match(78, 78, 0), matches[2]); - EXPECT_EQ(match(80, 80, 0), matches[3]); + EXPECT_EQ(match(0, 0), matches[0]); + EXPECT_EQ(match(18, 0), matches[1]); + EXPECT_EQ(match(78, 0), matches[2]); + EXPECT_EQ(match(80, 0), matches[3]); } TEST_P(FDRp, MultiLocation) { @@ -217,7 +207,7 @@ TEST_P(FDRp, MultiLocation) { fdrExec(fdr.get(), data.data(), testSize, 0, decentCallback, &matches, HWLM_ALL_GROUPS); ASSERT_EQ(1U, matches.size()); - EXPECT_EQ(match(i, i+2, 1), matches[0]); + EXPECT_EQ(match(i + 2, 1), matches[0]); memset(data.data() + i, 0, 3); } } @@ -239,7 +229,7 @@ TEST_P(FDRp, NoRepeat1) { decentCallback, &matches, HWLM_ALL_GROUPS); ASSERT_EQ(1U, matches.size()); - EXPECT_EQ(match(0, 0, 0), matches[0]); + EXPECT_EQ(match(0, 0), matches[0]); } TEST_P(FDRp, NoRepeat2) { @@ -260,8 +250,8 @@ TEST_P(FDRp, NoRepeat2) { decentCallback, &matches, HWLM_ALL_GROUPS); ASSERT_EQ(3U, matches.size()); - EXPECT_EQ(match(0, 0, 0), matches[0]); - EXPECT_EQ(match(78, 78, 0), matches[2]); + EXPECT_EQ(match(0, 0), matches[0]); + EXPECT_EQ(match(78, 0), matches[2]); } TEST_P(FDRp, NoRepeat3) { @@ -282,7 +272,7 @@ TEST_P(FDRp, NoRepeat3) { decentCallback, &matches, HWLM_ALL_GROUPS); ASSERT_EQ(1U, matches.size()); - EXPECT_EQ(match(31, 32, 0), matches[0]); + EXPECT_EQ(match(32, 0), matches[0]); } /** @@ -315,9 +305,9 @@ TEST_P(FDRp, SmallStreaming) { CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); vector expected, matches; - expected.push_back(match(0, 0, 1)); - expected.push_back(match(1, 1, 1)); - expected.push_back(match(2, 2, 1)); + expected.push_back(match(0, 1)); + expected.push_back(match(1, 1)); + expected.push_back(match(2, 1)); safeExecStreaming(fdr.get(), (const u8 *)"", 0, (const u8 *)"aaar", 4, 0, decentCallback, &matches, HWLM_ALL_GROUPS); @@ -328,8 +318,8 @@ TEST_P(FDRp, SmallStreaming) { expected.clear(); matches.clear(); - expected.push_back(match(6, 6, 1)); - expected.push_back(match(1, 8, 10)); + expected.push_back(match(6, 1)); + expected.push_back(match(8, 10)); safeExecStreaming(fdr.get(), (const u8 *)"aaar", 4, (const u8 *)"dvark", 5, 0, decentCallback, &matches, HWLM_ALL_GROUPS); @@ -352,12 +342,12 @@ TEST_P(FDRp, SmallStreaming2) { CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); vector expected, matches; - expected.push_back(match(6,6,1)); - expected.push_back(match(7,7,1)); - expected.push_back(match(11,11,1)); - expected.push_back(match(6,13,10)); - expected.push_back(match(13,14,2)); - expected.push_back(match(14,15,2)); + expected.push_back(match(6,1)); + expected.push_back(match(7,1)); + expected.push_back(match(11,1)); + expected.push_back(match(13,10)); + expected.push_back(match(14,2)); + expected.push_back(match(15,2)); safeExecStreaming(fdr.get(), (const u8 *)"foobar", 6, (const u8 *)"aardvarkkk", 10, 0, decentCallback, &matches, @@ -402,7 +392,7 @@ TEST_P(FDRp, moveByteStream) { ASSERT_EQ(0, fdrStatus); ASSERT_EQ(1U, matches.size()); - EXPECT_EQ(match(12, 17, 0), matches[0]); + EXPECT_EQ(match(17, 0), matches[0]); } TEST_P(FDRp, Stream1) { @@ -431,7 +421,7 @@ TEST_P(FDRp, Stream1) { ASSERT_EQ(4U, matches.size()); for (size_t i = 0; i < matches.size(); i++) { - EXPECT_EQ(match(i, i, 0), matches[i]); + EXPECT_EQ(match(i, 0), matches[i]); } } @@ -506,8 +496,8 @@ TEST_P(FDRpp, AlignAndTooEarly) { // we should get two and only two matches - at the beginning and // at the end of unaligned buffer ASSERT_EQ(2U, matches.size()); - ASSERT_EQ(match(0, litLen - 1, 0), matches[0]); - ASSERT_EQ(match(4 * buf_alignment - litLen, 4 * buf_alignment - 1, 0), matches[1]); + ASSERT_EQ(match(litLen - 1, 0), matches[0]); + ASSERT_EQ(match(4 * buf_alignment - 1, 0), matches[1]); matches.clear(); } else { // "Too early" / "too late" condition - should not match anything @@ -628,7 +618,7 @@ TEST_P(FDRpa, ShortWritings) { for (int j = 0; j <= (int)bufLen - (int)patLen; j++) { if (!buf.compare(j, patLen, pat)) { - expMatches.push_back(match(j, j + patLen - 1, + expMatches.push_back(match(j + patLen - 1, testSigs[pIdx].id)); } } diff --git a/unit/internal/fdr_flood.cpp b/unit/internal/fdr_flood.cpp index 952fffc1..3dc79442 100644 --- a/unit/internal/fdr_flood.cpp +++ b/unit/internal/fdr_flood.cpp @@ -64,34 +64,23 @@ using namespace ue2; namespace { struct match { - size_t start; size_t end; u32 id; - match(size_t start_in, size_t end_in, u32 id_in) - : start(start_in), end(end_in), id(id_in) {} + match(size_t end_in, u32 id_in) : end(end_in), id(id_in) {} bool operator==(const match &b) const { - return start == b.start && end == b.end && id == b.id; + return end == b.end && id == b.id; } bool operator<(const match &b) const { - if (id < b.id) { - return true; - } else if (id == b.id) { - if (start < b.start) { - return true; - } else if (start == b.start) { - return end < b.end; - } - } - return false; + return tie(id, end) < tie(b.id, b.end); } match operator+(size_t adj) { - return match(start + adj, end + adj, id); + return match(end + adj, id); } }; template T &operator<<(T &a, const match &b) { - a << "(" << b.start << ", " << b.end << ", " << b.id << ")"; + a << "(" << b.end << ", " << b.id << ")"; return a; } @@ -107,8 +96,7 @@ T &operator<<(T &a, const vector &b) { extern "C" { -static hwlmcb_rv_t countCallback(UNUSED size_t start, UNUSED size_t end, u32 id, - void *cntxt) { +static hwlmcb_rv_t countCallback(UNUSED size_t end, u32 id, void *cntxt) { if (cntxt) { map *matchesCounts = (map *)cntxt; (*matchesCounts)[id]++; diff --git a/unit/internal/noodle.cpp b/unit/internal/noodle.cpp index 5df66236..460e77c5 100644 --- a/unit/internal/noodle.cpp +++ b/unit/internal/noodle.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -45,22 +45,21 @@ using std::vector; using namespace ue2; struct hlmMatchEntry { - size_t from; size_t to; u32 id; - hlmMatchEntry(size_t start, size_t end, u32 identifier) : - from(start), to(end), id(identifier) {} + hlmMatchEntry(size_t end, u32 identifier) : + to(end), id(identifier) {} }; typedef vector hlmMatchRecord; static -hwlmcb_rv_t hlmSimpleCallback(size_t from, size_t to, u32 id, void *context) { +hwlmcb_rv_t hlmSimpleCallback(size_t to, u32 id, void *context) { hlmMatchRecord *mr = (hlmMatchRecord *)context; DEBUG_PRINTF("match @%zu = %u,%p\n", to, id, context); - mr->push_back(hlmMatchEntry(from, to, id)); + mr->push_back(hlmMatchEntry(to, id)); return HWLM_CONTINUE_MATCHING; } @@ -89,7 +88,6 @@ TEST(Noodle, nood1) { noodleMatch(data, data_len, "a", 1, 0, hlmSimpleCallback, &ctxt); ASSERT_EQ(1024U, ctxt.size()); for (i = 0; i < 1024; i++) { - ASSERT_EQ(i, ctxt[i].from); ASSERT_EQ(i, ctxt[i].to); } @@ -101,7 +99,6 @@ TEST(Noodle, nood1) { noodleMatch(data, data_len, "A", 1, 1, hlmSimpleCallback, &ctxt); ASSERT_EQ(1024U, ctxt.size()); for (i = 0; i < 1024; i++) { - ASSERT_EQ(i, ctxt[i].from); ASSERT_EQ(i, ctxt[i].to); } @@ -111,7 +108,6 @@ TEST(Noodle, nood1) { &ctxt); ASSERT_EQ(1024 - j, ctxt.size()); for (i = 0; i < 1024 - j; i++) { - ASSERT_EQ(i, ctxt[i].from); ASSERT_EQ(i, ctxt[i].to); } @@ -119,7 +115,6 @@ TEST(Noodle, nood1) { noodleMatch(data, data_len - j, "A", 1, 1, hlmSimpleCallback, &ctxt); ASSERT_EQ(1024 - j, ctxt.size()); for (i = 0; i < 1024 - j; i++) { - ASSERT_EQ(i, ctxt[i].from); ASSERT_EQ(i, ctxt[i].to); } } @@ -136,7 +131,6 @@ TEST(Noodle, nood2) { noodleMatch(data, data_len, "aa", 2, 0, hlmSimpleCallback, &ctxt); ASSERT_EQ(1023U, ctxt.size()); for (i = 0; i < 1023; i++) { - ASSERT_EQ(i, ctxt[i].from); ASSERT_EQ(i + 1, ctxt[i].to); } @@ -152,7 +146,6 @@ TEST(Noodle, nood2) { noodleMatch(data, data_len, "aa", 2, 1, hlmSimpleCallback, &ctxt); ASSERT_EQ(1023U, ctxt.size()); for (i = 0; i < 1023; i++) { - ASSERT_EQ(i, ctxt[i].from); ASSERT_EQ(i + 1, ctxt[i].to); } @@ -160,7 +153,6 @@ TEST(Noodle, nood2) { noodleMatch(data, data_len, "Aa", 2, 1, hlmSimpleCallback, &ctxt); ASSERT_EQ(1023U, ctxt.size()); for (i = 0; i < 1023; i++) { - ASSERT_EQ(i, ctxt[i].from); ASSERT_EQ(i + 1, ctxt[i].to); } @@ -168,7 +160,6 @@ TEST(Noodle, nood2) { noodleMatch(data, data_len, "AA", 2, 1, hlmSimpleCallback, &ctxt); ASSERT_EQ(1023U, ctxt.size()); for (i = 0; i < 1023; i++) { - ASSERT_EQ(i, ctxt[i].from); ASSERT_EQ(i + 1, ctxt[i].to); } @@ -178,7 +169,6 @@ TEST(Noodle, nood2) { &ctxt); ASSERT_EQ(1023 - j, ctxt.size()); for (i = 0; i < 1023 - j; i++) { - ASSERT_EQ(i, ctxt[i].from); ASSERT_EQ(i + 1, ctxt[i].to); } @@ -186,7 +176,6 @@ TEST(Noodle, nood2) { noodleMatch(data, data_len - j, "aA", 2, 1, hlmSimpleCallback, &ctxt); ASSERT_EQ(1023 - j, ctxt.size()); for (i = 0; i < 1023 - j; i++) { - ASSERT_EQ(i, ctxt[i].from); ASSERT_EQ(i + 1, ctxt[i].to); } } @@ -203,7 +192,6 @@ TEST(Noodle, noodLong) { noodleMatch(data, data_len, "aaaa", 4, 0, hlmSimpleCallback, &ctxt); ASSERT_EQ(1021U, ctxt.size()); for (i = 0; i < 1021; i++) { - ASSERT_EQ(i, ctxt[i].from); ASSERT_EQ(i + 3, ctxt[i].to); } @@ -215,7 +203,6 @@ TEST(Noodle, noodLong) { noodleMatch(data, data_len, "aaAA", 4, 1, hlmSimpleCallback, &ctxt); ASSERT_EQ(1021U, ctxt.size()); for (i = 0; i < 1021; i++) { - ASSERT_EQ(i, ctxt[i].from); ASSERT_EQ(i + 3, ctxt[i].to); } @@ -225,7 +212,6 @@ TEST(Noodle, noodLong) { &ctxt); ASSERT_EQ(1021 - j, ctxt.size()); for (i = 0; i < 1021 - j; i++) { - ASSERT_EQ(i, ctxt[i].from); ASSERT_EQ(i + 3, ctxt[i].to); } @@ -234,7 +220,6 @@ TEST(Noodle, noodLong) { &ctxt); ASSERT_EQ(1021 - j, ctxt.size()); for (i = 0; i < 1021 - j; i++) { - ASSERT_EQ(i, ctxt[i].from); ASSERT_EQ(i + 3, ctxt[i].to); } } @@ -253,7 +238,6 @@ TEST(Noodle, noodCutoverSingle) { noodleMatch(data + align, len, "a", 1, 0, hlmSimpleCallback, &ctxt); EXPECT_EQ(len, ctxt.size()); for (u32 i = 0; i < ctxt.size(); i++) { - ASSERT_EQ(i, ctxt[i].from); ASSERT_EQ(i, ctxt[i].to); } } @@ -274,7 +258,6 @@ TEST(Noodle, noodCutoverDouble) { &ctxt); EXPECT_EQ(len ? len - 1 : 0U, ctxt.size()); for (u32 i = 0; i < ctxt.size(); i++) { - ASSERT_EQ(i, ctxt[i].from); ASSERT_EQ(i + 1, ctxt[i].to); } } From 815be3fa2b40cfa25ed47126226ed2dedda8818d Mon Sep 17 00:00:00 2001 From: "Wang, Xiang W" Date: Fri, 7 Jul 2017 08:14:35 -0400 Subject: [PATCH 091/190] flood detection: debug output fix --- src/fdr/flood_compile.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/fdr/flood_compile.cpp b/src/fdr/flood_compile.cpp index 6304ab2f..ff805ca3 100644 --- a/src/fdr/flood_compile.cpp +++ b/src/fdr/flood_compile.cpp @@ -85,7 +85,7 @@ void addFlood(vector &tmpFlood, u8 c, const hwlmLiteral &lit, // when idCount gets to max_ids this flood no longer happens // only incremented one more time to avoid arithmetic overflow DEBUG_PRINTF("Added Flood for char '%c' suffix=%u len[%hu]=%u\n", - c, fl.suffix, fl.idCount, suffix); + c, fl.suffix, fl.idCount, suffix); fl.idCount++; } } @@ -181,8 +181,7 @@ bytecode_ptr setupFDRFloodControl(const vector &lits, printf("i is %02x fl->idCount is %hd fl->suffix is %d fl->allGroups is " "%016llx\n", i, fl.idCount, fl.suffix, fl.allGroups); for (u32 j = 0; j < fl.idCount; j++) { - printf("j is %d fl.groups[j] %016llx fl.len[j] %d \n", j, - fl.groups[j], fl.len[j]); + printf("j is %d fl.groups[j] %016llx\n", j, fl.groups[j]); } } #endif From bc232d272fca7c485fbd42ec80adb7c22a566c9e Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 12 Jul 2017 11:08:45 +1000 Subject: [PATCH 092/190] ng_find_matches: speed up edge lookups Improves the performance of step() on graphs with vertices with large degree. --- util/ng_find_matches.cpp | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/util/ng_find_matches.cpp b/util/ng_find_matches.cpp index 0a1f796f..97a18375 100644 --- a/util/ng_find_matches.cpp +++ b/util/ng_find_matches.cpp @@ -752,12 +752,34 @@ bool operator==(const StateSet::State &a, const StateSet::State &b) { a.som == b.som; } +/** \brief Cache to speed up edge lookups, rather than hitting the graph. */ +struct EdgeCache { + explicit EdgeCache(const NGHolder &g) { + cache.reserve(num_vertices(g)); + for (auto e : edges_range(g)) { + cache.emplace(make_pair(source(e, g), target(e, g)), e); + } + } + + NFAEdge get(NFAVertex u, NFAVertex v) const { + auto it = cache.find(make_pair(u, v)); + if (it != cache.end()) { + return it->second; + } + return NFAEdge(); + } + +private: + unordered_map, NFAEdge> cache; +}; + struct fmstate { const size_t num_states; // number of vertices in graph StateSet states; // currently active states StateSet next; // states on after this iteration GraphCache &gc; vector vertices; // mapping from index to vertex + EdgeCache edge_cache; size_t offset = 0; unsigned char cur = 0; unsigned char prev = 0; @@ -771,7 +793,7 @@ struct fmstate { states(num_states, edit_distance), next(num_states, edit_distance), gc(gc_in), vertices(num_vertices(g), NGHolder::null_vertex()), - utf8(utf8_in), allowStartDs(aSD_in), rm(rm_in) { + edge_cache(g), utf8(utf8_in), allowStartDs(aSD_in), rm(rm_in) { // init states states.activateState( StateSet::State {g[g.start].index, 0, 0, @@ -889,7 +911,7 @@ void getAcceptMatches(const NGHolder &g, MatchSet &matches, eod ? state.gc.vertex_eod_reports_by_level[cur.level][u] : state.gc.vertex_reports_by_level[cur.level][u]; - NFAEdge e = edge(u, accept_vertex, g); + NFAEdge e = state.edge_cache.get(u, accept_vertex); // we assume edge assertions only exist at level 0 if (e && !canReach(g, e, state)) { @@ -965,7 +987,7 @@ void step(const NGHolder &g, fmstate &state, StateSet::WorkingData &wd) { } else { // we assume edge assertions only exist on level 0 const CharReach &cr = g[v].char_reach; - NFAEdge e = edge(u, v, g); + NFAEdge e = state.edge_cache.get(u, v); if (cr.test(state.cur) && (!e || canReach(g, e, state))) { From 67a8f43355bb7e242b51eaf783b54deb8faefef8 Mon Sep 17 00:00:00 2001 From: "Wang, Xiang W" Date: Tue, 4 Jul 2017 12:24:11 -0400 Subject: [PATCH 093/190] literal matchers: change context passed to callback to scratch --- src/fdr/fdr.c | 12 +++-- src/fdr/fdr.h | 12 +++-- src/fdr/fdr_confirm_runtime.h | 2 +- src/fdr/fdr_internal.h | 4 +- src/fdr/flood_runtime.h | 60 ++++++++++----------- src/hwlm/hwlm.c | 18 +++---- src/hwlm/hwlm.h | 20 +++---- src/hwlm/noodle_engine.c | 16 +++--- src/hwlm/noodle_engine.h | 6 ++- src/rose/match.c | 17 +++--- src/rose/match.h | 8 +-- src/rose/rose.h | 2 +- src/rose/stream.c | 4 +- src/runtime.c | 2 +- unit/internal/fdr.cpp | 98 +++++++++++++++++++---------------- unit/internal/fdr_flood.cpp | 38 +++++++------- unit/internal/noodle.cpp | 72 ++++++++++++------------- 17 files changed, 203 insertions(+), 188 deletions(-) diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c index c77e31ff..8d072ea2 100644 --- a/src/fdr/fdr.c +++ b/src/fdr/fdr.c @@ -32,6 +32,7 @@ #include "fdr_internal.h" #include "fdr_loadval.h" #include "flood_runtime.h" +#include "scratch.h" #include "teddy.h" #include "teddy_internal.h" #include "util/arch.h" @@ -824,8 +825,8 @@ static const FDRFUNCTYPE funcs[] = { static const u8 fake_history[FAKE_HISTORY_SIZE]; hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len, - size_t start, HWLMCallback cb, void *ctxt, - hwlm_group_t groups) { + size_t start, HWLMCallback cb, + struct hs_scratch *scratch, hwlm_group_t groups) { // We guarantee (for safezone construction) that it is safe to read 16 // bytes before the end of the history buffer. const u8 *hbuf = fake_history + FAKE_HISTORY_SIZE; @@ -837,7 +838,7 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len, 0, start, cb, - ctxt, + scratch, nextFloodDetect(buf, len, FLOOD_BACKOFF_START), 0 }; @@ -851,7 +852,8 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len, hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf, size_t hlen, const u8 *buf, size_t len, - size_t start, HWLMCallback cb, void *ctxt, + size_t start, HWLMCallback cb, + struct hs_scratch *scratch, hwlm_group_t groups) { struct FDR_Runtime_Args a = { buf, @@ -860,7 +862,7 @@ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf, hlen, start, cb, - ctxt, + scratch, nextFloodDetect(buf, len, FLOOD_BACKOFF_START), /* we are guaranteed to always have 16 initialised bytes at the end of * the history buffer (they may be garbage). */ diff --git a/src/fdr/fdr.h b/src/fdr/fdr.h index e2b80056..77157a10 100644 --- a/src/fdr/fdr.h +++ b/src/fdr/fdr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,6 +42,7 @@ extern "C" { #endif struct FDR; +struct hs_scratch; /** * \brief Block-mode scan. @@ -51,11 +52,11 @@ struct FDR; * \param len Length of buffer to scan. * \param start First offset in buf at which a match may end. * \param cb Callback to call when a match is found. - * \param ctxt Caller-provided context pointer supplied to callback on match. + * \param scratch Scratch supplied to callback on match. * \param groups Initial groups mask. */ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len, - size_t start, HWLMCallback cb, void *ctxt, + size_t start, HWLMCallback cb, struct hs_scratch *scratch, hwlm_group_t groups); /** @@ -68,12 +69,13 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len, * \param len Length of buffer to scan (buf). * \param start First offset in buf at which a match may end. * \param cb Callback to call when a match is found. - * \param ctxt Caller-provided context pointer supplied to callback on match. + * \param scratch Scratch supplied to callback on match. * \param groups Initial groups mask. */ hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf, size_t hlen, const u8 *buf, size_t len, - size_t start, HWLMCallback cb, void *ctxt, + size_t start, HWLMCallback cb, + struct hs_scratch *scratch, hwlm_group_t groups); #ifdef __cplusplus diff --git a/src/fdr/fdr_confirm_runtime.h b/src/fdr/fdr_confirm_runtime.h index 557873b7..86a3bfa4 100644 --- a/src/fdr/fdr_confirm_runtime.h +++ b/src/fdr/fdr_confirm_runtime.h @@ -88,7 +88,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a } *last_match = li->id; - *control = a->cb(i, li->id, a->ctxt); + *control = a->cb(i, li->id, a->scratch); out: oldNext = li->next; // oldNext is either 0 or an 'adjust' value li++; diff --git a/src/fdr/fdr_internal.h b/src/fdr/fdr_internal.h index 41470997..c79f61c1 100644 --- a/src/fdr/fdr_internal.h +++ b/src/fdr/fdr_internal.h @@ -36,6 +36,8 @@ #include "ue2common.h" #include "hwlm/hwlm.h" // for hwlm_group_t, HWLMCallback +struct hs_scratch; + typedef enum { NOT_CAUTIOUS, //!< not near a boundary (quantify?) VECTORING //!< potentially vectoring @@ -95,7 +97,7 @@ struct FDR_Runtime_Args { size_t len_history; size_t start_offset; HWLMCallback cb; - void *ctxt; + struct hs_scratch *scratch; const u8 *firstFloodDetect; const u64a histBytes; }; diff --git a/src/fdr/flood_runtime.h b/src/fdr/flood_runtime.h index 93079afb..2d5a32d9 100644 --- a/src/fdr/flood_runtime.h +++ b/src/fdr/flood_runtime.h @@ -94,7 +94,7 @@ const u8 * floodDetect(const struct FDR * fdr, const u8 * buf = a->buf; const size_t len = a->len; HWLMCallback cb = a->cb; - void * ctxt = a->ctxt; + struct hs_scratch *scratch = a->scratch; const u8 * ptr = *ptrPtr; // tryFloodDetect is never put in places where unconditional @@ -197,67 +197,67 @@ const u8 * floodDetect(const struct FDR * fdr, t += 4) { DEBUG_PRINTF("aaa %u %llx\n", t, fl->groups[0]); if (*control & fl->groups[0]) { - *control = cb(i + t + 0, fl->ids[0], ctxt); + *control = cb(i + t + 0, fl->ids[0], scratch); } if (*control & fl->groups[0]) { - *control = cb(i + t + 1, fl->ids[0], ctxt); + *control = cb(i + t + 1, fl->ids[0], scratch); } if (*control & fl->groups[0]) { - *control = cb(i + t + 2, fl->ids[0], ctxt); + *control = cb(i + t + 2, fl->ids[0], scratch); } if (*control & fl->groups[0]) { - *control = cb(i + t + 3, fl->ids[0], ctxt); + *control = cb(i + t + 3, fl->ids[0], scratch); } } break; case 2: for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 4) { if (*control & fl->groups[0]) { - *control = cb(i + t, fl->ids[0], ctxt); + *control = cb(i + t, fl->ids[0], scratch); } if (*control & fl->groups[1]) { - *control = cb(i + t, fl->ids[1], ctxt); + *control = cb(i + t, fl->ids[1], scratch); } if (*control & fl->groups[0]) { *control = - cb(i + t + 1, fl->ids[0], ctxt); + cb(i + t + 1, fl->ids[0], scratch); } if (*control & fl->groups[1]) { - *control = cb(i + t + 1, fl->ids[1], ctxt); + *control = cb(i + t + 1, fl->ids[1], scratch); } if (*control & fl->groups[0]) { - *control = cb(i + t + 2, fl->ids[0], ctxt); + *control = cb(i + t + 2, fl->ids[0], scratch); } if (*control & fl->groups[1]) { - *control = cb(i + t + 2, fl->ids[1], ctxt); + *control = cb(i + t + 2, fl->ids[1], scratch); } if (*control & fl->groups[0]) { - *control = cb(i + t + 3, fl->ids[0], ctxt); + *control = cb(i + t + 3, fl->ids[0], scratch); } if (*control & fl->groups[1]) { - *control = cb(i + t + 3, fl->ids[1], ctxt); + *control = cb(i + t + 3, fl->ids[1], scratch); } } break; case 3: for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) { if (*control & fl->groups[0]) { - *control = cb(i + t, fl->ids[0], ctxt); + *control = cb(i + t, fl->ids[0], scratch); } if (*control & fl->groups[1]) { - *control = cb(i + t, fl->ids[1], ctxt); + *control = cb(i + t, fl->ids[1], scratch); } if (*control & fl->groups[2]) { - *control = cb(i + t, fl->ids[2], ctxt); + *control = cb(i + t, fl->ids[2], scratch); } if (*control & fl->groups[0]) { - *control = cb(i + t + 1, fl->ids[0], ctxt); + *control = cb(i + t + 1, fl->ids[0], scratch); } if (*control & fl->groups[1]) { - *control = cb(i + t + 1, fl->ids[1], ctxt); + *control = cb(i + t + 1, fl->ids[1], scratch); } if (*control & fl->groups[2]) { - *control = cb(i + t + 1, fl->ids[2], ctxt); + *control = cb(i + t + 1, fl->ids[2], scratch); } } break; @@ -266,40 +266,40 @@ const u8 * floodDetect(const struct FDR * fdr, for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) { if (*control & fl->groups[0]) { - *control = cb(i + t, fl->ids[0], ctxt); + *control = cb(i + t, fl->ids[0], scratch); } if (*control & fl->groups[1]) { - *control = cb(i + t, fl->ids[1], ctxt); + *control = cb(i + t, fl->ids[1], scratch); } if (*control & fl->groups[2]) { - *control = cb(i + t, fl->ids[2], ctxt); + *control = cb(i + t, fl->ids[2], scratch); } if (*control & fl->groups[3]) { - *control = cb(i + t, fl->ids[3], ctxt); + *control = cb(i + t, fl->ids[3], scratch); } for (u32 t2 = 4; t2 < fl->idCount; t2++) { if (*control & fl->groups[t2]) { - *control = cb(i + t, fl->ids[t2], ctxt); + *control = cb(i + t, fl->ids[t2], scratch); } } if (*control & fl->groups[0]) { - *control = cb(i + t + 1, fl->ids[0], ctxt); + *control = cb(i + t + 1, fl->ids[0], scratch); } if (*control & fl->groups[1]) { - *control = cb(i + t + 1, fl->ids[1], ctxt); + *control = cb(i + t + 1, fl->ids[1], scratch); } if (*control & fl->groups[2]) { - *control = cb(i + t + 1, fl->ids[2], ctxt); + *control = cb(i + t + 1, fl->ids[2], scratch); } if (*control & fl->groups[3]) { - *control = cb(i + t + 1, fl->ids[3], ctxt); + *control = cb(i + t + 1, fl->ids[3], scratch); } for (u32 t2 = 4; t2 < fl->idCount; t2++) { if (*control & fl->groups[t2]) { - *control = cb(i + t + 1, fl->ids[t2], ctxt); + *control = cb(i + t + 1, fl->ids[t2], scratch); } } } @@ -310,7 +310,7 @@ const u8 * floodDetect(const struct FDR * fdr, for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t++) { for (u32 t2 = 0; t2 < fl->idCount; t2++) { if (*control & fl->groups[t2]) { - *control = cb(i + t, fl->ids[t2], ctxt); + *control = cb(i + t, fl->ids[t2], scratch); } } } diff --git a/src/hwlm/hwlm.c b/src/hwlm/hwlm.c index 4af987c5..8cf585a9 100644 --- a/src/hwlm/hwlm.c +++ b/src/hwlm/hwlm.c @@ -170,7 +170,7 @@ void do_accel_streaming(const union AccelAux *aux, const u8 *hbuf, size_t hlen, } hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len, - size_t start, HWLMCallback cb, void *ctxt, + size_t start, HWLMCallback cb, struct hs_scratch *scratch, hwlm_group_t groups) { assert(t); @@ -184,7 +184,7 @@ hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len, if (t->type == HWLM_ENGINE_NOOD) { DEBUG_PRINTF("calling noodExec\n"); - return noodExec(HWLM_C_DATA(t), buf, len, start, cb, ctxt); + return noodExec(HWLM_C_DATA(t), buf, len, start, cb, scratch); } assert(t->type == HWLM_ENGINE_FDR); @@ -195,12 +195,12 @@ hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len, } do_accel_block(aa, buf, len, &start); DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups, start); - return fdrExec(HWLM_C_DATA(t), buf, len, start, cb, ctxt, groups); + return fdrExec(HWLM_C_DATA(t), buf, len, start, cb, scratch, groups); } -hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch, - size_t len, size_t start, HWLMCallback cb, - void *ctxt, hwlm_group_t groups) { +hwlm_error_t hwlmExecStreaming(const struct HWLM *t, size_t len, size_t start, + HWLMCallback cb, struct hs_scratch *scratch, + hwlm_group_t groups) { assert(t); assert(scratch); @@ -222,10 +222,10 @@ hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch, // If we've been handed a start offset, we can use a block mode scan at // that offset. if (start) { - return noodExec(HWLM_C_DATA(t), buf, len, start, cb, ctxt); + return noodExec(HWLM_C_DATA(t), buf, len, start, cb, scratch); } else { return noodExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, cb, - ctxt); + scratch); } } @@ -238,5 +238,5 @@ hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch, do_accel_streaming(aa, hbuf, hlen, buf, len, &start); DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups, start); return fdrExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, start, cb, - ctxt, groups); + scratch, groups); } diff --git a/src/hwlm/hwlm.h b/src/hwlm/hwlm.h index 92d4bfdb..9262e80c 100644 --- a/src/hwlm/hwlm.h +++ b/src/hwlm/hwlm.h @@ -95,7 +95,8 @@ struct HWLM; * belonging to the literal which was active at the when the end match location * was first reached. */ -typedef hwlmcb_rv_t (*HWLMCallback)(size_t end, u32 id, void *context); +typedef hwlmcb_rv_t (*HWLMCallback)(size_t end, u32 id, + struct hs_scratch *scratch); /** \brief Match strings in table. * @@ -112,29 +113,28 @@ typedef hwlmcb_rv_t (*HWLMCallback)(size_t end, u32 id, void *context); * the first possible match of a literal which is in the initial group mask. */ hwlm_error_t hwlmExec(const struct HWLM *tab, const u8 *buf, size_t len, - size_t start, HWLMCallback callback, void *context, - hwlm_group_t groups); + size_t start, HWLMCallback callback, + struct hs_scratch *scratch, hwlm_group_t groups); /** \brief As for \ref hwlmExec, but a streaming case across two buffers. - * - * \p scratch is used to access fdr_temp_buf and to access the history buffer, - * history length and the main buffer. * * \p len is the length of the main buffer to be scanned. * * \p start is an advisory hint representing the first offset at which a match * may start. Some underlying literal matches may not respect it. * + * \p scratch is used to access the history buffer, history length and + * the main buffer. + * * Two buffers/lengths are provided. Matches that occur entirely within * the history buffer will not be reported by this function. The offsets * reported for the main buffer are relative to the start of that buffer (a * match at byte 10 of the main buffer is reported as 10). Matches that start * in the history buffer will have starts reported with 'negative' values. */ -hwlm_error_t hwlmExecStreaming(const struct HWLM *tab, - struct hs_scratch *scratch, size_t len, - size_t start, HWLMCallback callback, - void *context, hwlm_group_t groups); +hwlm_error_t hwlmExecStreaming(const struct HWLM *tab, size_t len, size_t start, + HWLMCallback callback, + struct hs_scratch *scratch, hwlm_group_t groups); #ifdef __cplusplus } /* extern "C" */ diff --git a/src/hwlm/noodle_engine.c b/src/hwlm/noodle_engine.c index 009c4b98..d4f6902a 100644 --- a/src/hwlm/noodle_engine.c +++ b/src/hwlm/noodle_engine.c @@ -32,6 +32,7 @@ #include "hwlm.h" #include "noodle_engine.h" #include "noodle_internal.h" +#include "scratch.h" #include "ue2common.h" #include "util/arch.h" #include "util/bitutils.h" @@ -50,7 +51,7 @@ struct cb_info { HWLMCallback cb; //!< callback function called on match u32 id; //!< ID to pass to callback on match - void *ctx; //!< caller-supplied context to pass to callback + struct hs_scratch *scratch; //!< scratch to pass to callback size_t offsetAdj; //!< used in streaming mode }; @@ -129,7 +130,7 @@ hwlm_error_t final(const struct noodTable *n, const u8 *buf, UNUSED size_t len, match: pos -= cbi->offsetAdj; DEBUG_PRINTF("match @ %zu\n", pos + n->key_offset); - hwlmcb_rv_t rv = cbi->cb(pos + n->key_offset - 1, cbi->id, cbi->ctx); + hwlmcb_rv_t rv = cbi->cb(pos + n->key_offset - 1, cbi->id, cbi->scratch); if (rv == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATED; } @@ -371,10 +372,11 @@ hwlm_error_t scan(const struct noodTable *n, const u8 *buf, size_t len, /** \brief Block-mode scanner. */ hwlm_error_t noodExec(const struct noodTable *n, const u8 *buf, size_t len, - size_t start, HWLMCallback cb, void *ctxt) { + size_t start, HWLMCallback cb, + struct hs_scratch *scratch) { assert(n && buf); - struct cb_info cbi = {cb, n->id, ctxt, 0}; + struct cb_info cbi = {cb, n->id, scratch, 0}; DEBUG_PRINTF("nood scan of %zu bytes for %*s @ %p\n", len, n->msk_len, (const char *)&n->cmp, buf); @@ -384,7 +386,7 @@ hwlm_error_t noodExec(const struct noodTable *n, const u8 *buf, size_t len, /** \brief Streaming-mode scanner. */ hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf, size_t hlen, const u8 *buf, size_t len, - HWLMCallback cb, void *ctxt) { + HWLMCallback cb, struct hs_scratch *scratch) { assert(n); if (len + hlen < n->msk_len) { @@ -392,7 +394,7 @@ hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf, return HWLM_SUCCESS; } - struct cb_info cbi = {cb, n->id, ctxt, 0}; + struct cb_info cbi = {cb, n->id, scratch, 0}; DEBUG_PRINTF("nood scan of %zu bytes (%zu hlen) for %*s @ %p\n", len, hlen, n->msk_len, (const char *)&n->cmp, buf); @@ -425,7 +427,7 @@ hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf, if ((v & n->msk) == n->cmp) { size_t m_end = -tl1 + i + n->msk_len - 1; DEBUG_PRINTF("match @ %zu (i %zu)\n", m_end, i); - hwlmcb_rv_t rv = cb(m_end, n->id, ctxt); + hwlmcb_rv_t rv = cb(m_end, n->id, scratch); if (rv == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATED; } diff --git a/src/hwlm/noodle_engine.h b/src/hwlm/noodle_engine.h index 18847e5a..64422c41 100644 --- a/src/hwlm/noodle_engine.h +++ b/src/hwlm/noodle_engine.h @@ -41,15 +41,17 @@ extern "C" #endif struct noodTable; +struct hs_scratch; /** \brief Block-mode scanner. */ hwlm_error_t noodExec(const struct noodTable *n, const u8 *buf, size_t len, - size_t start, HWLMCallback cb, void *ctxt); + size_t start, HWLMCallback cb, + struct hs_scratch *scratch); /** \brief Streaming-mode scanner. */ hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf, size_t hlen, const u8 *buf, size_t len, - HWLMCallback cb, void *ctxt); + HWLMCallback cb, struct hs_scratch *scratch); #ifdef __cplusplus } /* extern "C" */ diff --git a/src/rose/match.c b/src/rose/match.c index 91e045a5..5d1b6e07 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -66,8 +66,8 @@ void printMatch(const struct core_info *ci, u64a start, u64a end) { } #endif -hwlmcb_rv_t roseDelayRebuildCallback(size_t end, u32 id, void *ctx) { - struct hs_scratch *scratch = ctx; +hwlmcb_rv_t roseDelayRebuildCallback(size_t end, u32 id, + struct hs_scratch *scratch) { struct RoseContext *tctx = &scratch->tctxt; struct core_info *ci = &scratch->core_info; const struct RoseEngine *t = ci->rose; @@ -472,8 +472,7 @@ anchored_leftovers:; } static really_inline -hwlmcb_rv_t roseCallback_i(size_t end, u32 id, void *ctxt) { - struct hs_scratch *scratch = ctxt; +hwlmcb_rv_t roseCallback_i(size_t end, u32 id, struct hs_scratch *scratch) { struct RoseContext *tctx = &scratch->tctxt; const struct RoseEngine *t = scratch->core_info.rose; @@ -519,15 +518,15 @@ hwlmcb_rv_t roseCallback_i(size_t end, u32 id, void *ctxt) { return HWLM_TERMINATE_MATCHING; } -hwlmcb_rv_t roseCallback(size_t end, u32 id, void *ctxt) { - return roseCallback_i(end, id, ctxt); +hwlmcb_rv_t roseCallback(size_t end, u32 id, struct hs_scratch *scratch) { + return roseCallback_i(end, id, scratch); } -hwlmcb_rv_t roseFloatingCallback(size_t end, u32 id, void *ctxt) { - struct hs_scratch *scratch = ctxt; +hwlmcb_rv_t roseFloatingCallback(size_t end, u32 id, + struct hs_scratch *scratch) { const struct RoseEngine *t = scratch->core_info.rose; - return roseCallback_i(end, id, ctxt) & t->floating_group_mask; + return roseCallback_i(end, id, scratch) & t->floating_group_mask; } /** diff --git a/src/rose/match.h b/src/rose/match.h index 7cd0541d..0d4fb19c 100644 --- a/src/rose/match.h +++ b/src/rose/match.h @@ -52,9 +52,11 @@ int roseNfaAdaptor(u64a start, u64a end, ReportID id, void *context); /* Callbacks, defined in match.c */ -hwlmcb_rv_t roseCallback(size_t end, u32 id, void *ctx); -hwlmcb_rv_t roseFloatingCallback(size_t end, u32 id, void *ctx); -hwlmcb_rv_t roseDelayRebuildCallback(size_t end, u32 id, void *ctx); +hwlmcb_rv_t roseCallback(size_t end, u32 id, struct hs_scratch *scratch); +hwlmcb_rv_t roseFloatingCallback(size_t end, u32 id, + struct hs_scratch *scratch); +hwlmcb_rv_t roseDelayRebuildCallback(size_t end, u32 id, + struct hs_scratch *scratch); int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx); /* Common code, used all over Rose runtime */ diff --git a/src/rose/rose.h b/src/rose/rose.h index 568c2b40..b29519b6 100644 --- a/src/rose/rose.h +++ b/src/rose/rose.h @@ -46,7 +46,7 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch); void roseStreamEodExec(const struct RoseEngine *t, u64a offset, struct hs_scratch *scratch); -hwlmcb_rv_t roseCallback(size_t end, u32 id, void *context); +hwlmcb_rv_t roseCallback(size_t end, u32 id, struct hs_scratch *scratch); int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context); diff --git a/src/rose/stream.c b/src/rose/stream.c index 1ee0b6d5..d667ae56 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -659,8 +659,8 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { DEBUG_PRINTF("start=%zu\n", start); DEBUG_PRINTF("BEGIN FLOATING (over %zu/%zu)\n", flen, length); - hwlmExecStreaming(ftable, scratch, flen, start, roseFloatingCallback, - scratch, tctxt->groups & t->floating_group_mask); + hwlmExecStreaming(ftable, flen, start, roseFloatingCallback, scratch, + tctxt->groups & t->floating_group_mask); } flush_delay_and_exit: diff --git a/src/runtime.c b/src/runtime.c index 5725cf93..17f13382 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -764,7 +764,7 @@ void pureLiteralStreamExec(struct hs_stream *stream_state, // start the match region at zero. const size_t start = 0; - hwlmExecStreaming(ftable, scratch, len2, start, roseCallback, scratch, + hwlmExecStreaming(ftable, len2, start, roseCallback, scratch, rose->initialGroups & rose->floating_group_mask); if (!told_to_stop_matching(scratch) && diff --git a/unit/internal/fdr.cpp b/unit/internal/fdr.cpp index aa14e5d9..399147e2 100644 --- a/unit/internal/fdr.cpp +++ b/unit/internal/fdr.cpp @@ -39,6 +39,7 @@ #include "util/alloc.h" #include "database.h" +#include "scratch.h" #include "gtest/gtest.h" #include @@ -85,28 +86,23 @@ struct match { } }; +vector matches; + extern "C" { static -hwlmcb_rv_t decentCallback(size_t end, u32 id, void *ctxt) { +hwlmcb_rv_t decentCallback(size_t end, u32 id, + UNUSED struct hs_scratch *scratch) { DEBUG_PRINTF("match @%zu : %u\n", end, id); - if (!ctxt) { - return HWLM_CONTINUE_MATCHING; - } - vector *out = (vector *)ctxt; - out->push_back(match(end, id)); + matches.push_back(match(end, id)); return HWLM_CONTINUE_MATCHING; } static -hwlmcb_rv_t decentCallbackT(size_t end, u32 id, void *ctxt) { - if (!ctxt) { - return HWLM_TERMINATE_MATCHING; - } - - vector *out = (vector *)ctxt; - out->push_back(match(end, id)); +hwlmcb_rv_t decentCallbackT(size_t end, u32 id, + UNUSED struct hs_scratch *scratch) { + matches.push_back(match(end, id)); return HWLM_TERMINATE_MATCHING; } @@ -154,14 +150,15 @@ TEST_P(FDRp, Simple) { auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); - vector matches; + struct hs_scratch scratch; fdrExec(fdr.get(), (const u8 *)data, sizeof(data), 0, decentCallback, - &matches, HWLM_ALL_GROUPS); + &scratch, HWLM_ALL_GROUPS); ASSERT_EQ(3U, matches.size()); EXPECT_EQ(match(5, 0), matches[0]); EXPECT_EQ(match(23, 0), matches[1]); EXPECT_EQ(match(83, 0), matches[2]); + matches.clear(); } TEST_P(FDRp, SimpleSingle) { @@ -176,15 +173,16 @@ TEST_P(FDRp, SimpleSingle) { auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); - vector matches; + struct hs_scratch scratch; fdrExec(fdr.get(), (const u8 *)data, sizeof(data) - 1 /* skip nul */, 0, - decentCallback, &matches, HWLM_ALL_GROUPS); + decentCallback, &scratch, HWLM_ALL_GROUPS); ASSERT_EQ(4U, matches.size()); EXPECT_EQ(match(0, 0), matches[0]); EXPECT_EQ(match(18, 0), matches[1]); EXPECT_EQ(match(78, 0), matches[2]); EXPECT_EQ(match(80, 0), matches[3]); + matches.clear(); } TEST_P(FDRp, MultiLocation) { @@ -201,14 +199,15 @@ TEST_P(FDRp, MultiLocation) { vector data(testSize, 0); + struct hs_scratch scratch; for (u32 i = 0; i < testSize - 3; i++) { memcpy(data.data() + i, "abc", 3); - vector matches; - fdrExec(fdr.get(), data.data(), testSize, 0, decentCallback, &matches, + fdrExec(fdr.get(), data.data(), testSize, 0, decentCallback, &scratch, HWLM_ALL_GROUPS); ASSERT_EQ(1U, matches.size()); EXPECT_EQ(match(i + 2, 1), matches[0]); memset(data.data() + i, 0, 3); + matches.clear(); } } @@ -224,12 +223,13 @@ TEST_P(FDRp, NoRepeat1) { auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); - vector matches; + struct hs_scratch scratch; fdrExec(fdr.get(), (const u8 *)data, sizeof(data) - 1 /* skip nul */, 0, - decentCallback, &matches, HWLM_ALL_GROUPS); + decentCallback, &scratch, HWLM_ALL_GROUPS); ASSERT_EQ(1U, matches.size()); EXPECT_EQ(match(0, 0), matches[0]); + matches.clear(); } TEST_P(FDRp, NoRepeat2) { @@ -245,13 +245,14 @@ TEST_P(FDRp, NoRepeat2) { auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); - vector matches; + struct hs_scratch scratch; fdrExec(fdr.get(), (const u8 *)data, sizeof(data) - 1 /* skip nul */, 0, - decentCallback, &matches, HWLM_ALL_GROUPS); + decentCallback, &scratch, HWLM_ALL_GROUPS); ASSERT_EQ(3U, matches.size()); EXPECT_EQ(match(0, 0), matches[0]); EXPECT_EQ(match(78, 0), matches[2]); + matches.clear(); } TEST_P(FDRp, NoRepeat3) { @@ -267,12 +268,13 @@ TEST_P(FDRp, NoRepeat3) { auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); - vector matches; + struct hs_scratch scratch; fdrExec(fdr.get(), (const u8 *)data, sizeof(data) - 1 /* skip nul */, 0, - decentCallback, &matches, HWLM_ALL_GROUPS); + decentCallback, &scratch, HWLM_ALL_GROUPS); ASSERT_EQ(1U, matches.size()); EXPECT_EQ(match(32, 0), matches[0]); + matches.clear(); } /** @@ -282,8 +284,7 @@ TEST_P(FDRp, NoRepeat3) { static hwlm_error_t safeExecStreaming(const FDR *fdr, const u8 *hbuf, size_t hlen, const u8 *buf, size_t len, size_t start, - HWLMCallback cb, void *ctxt, - hwlm_group_t groups) { + HWLMCallback cb, hwlm_group_t groups) { array wrapped_history = {{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}}; if (hlen < 16) { @@ -291,7 +292,9 @@ hwlm_error_t safeExecStreaming(const FDR *fdr, const u8 *hbuf, size_t hlen, memcpy(new_hbuf, hbuf, hlen); hbuf = new_hbuf; } - return fdrExecStreaming(fdr, hbuf, hlen, buf, len, start, cb, ctxt, groups); + struct hs_scratch scratch; + return fdrExecStreaming(fdr, hbuf, hlen, buf, len, start, cb, &scratch, + groups); } TEST_P(FDRp, SmallStreaming) { @@ -304,13 +307,13 @@ TEST_P(FDRp, SmallStreaming) { auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); - vector expected, matches; + vector expected; expected.push_back(match(0, 1)); expected.push_back(match(1, 1)); expected.push_back(match(2, 1)); safeExecStreaming(fdr.get(), (const u8 *)"", 0, (const u8 *)"aaar", 4, 0, - decentCallback, &matches, HWLM_ALL_GROUPS); + decentCallback, HWLM_ALL_GROUPS); for (u32 i = 0; i < MIN(expected.size(), matches.size()); i++) { EXPECT_EQ(expected[i], matches[i]); } @@ -322,12 +325,13 @@ TEST_P(FDRp, SmallStreaming) { expected.push_back(match(8, 10)); safeExecStreaming(fdr.get(), (const u8 *)"aaar", 4, (const u8 *)"dvark", 5, - 0, decentCallback, &matches, HWLM_ALL_GROUPS); + 0, decentCallback, HWLM_ALL_GROUPS); for (u32 i = 0; i < MIN(expected.size(), matches.size()); i++) { EXPECT_EQ(expected[i], matches[i] + 4); } ASSERT_EQ(expected.size(), matches.size()); + matches.clear(); } TEST_P(FDRp, SmallStreaming2) { @@ -341,7 +345,7 @@ TEST_P(FDRp, SmallStreaming2) { auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); - vector expected, matches; + vector expected; expected.push_back(match(6,1)); expected.push_back(match(7,1)); expected.push_back(match(11,1)); @@ -350,13 +354,14 @@ TEST_P(FDRp, SmallStreaming2) { expected.push_back(match(15,2)); safeExecStreaming(fdr.get(), (const u8 *)"foobar", 6, - (const u8 *)"aardvarkkk", 10, 0, decentCallback, &matches, + (const u8 *)"aardvarkkk", 10, 0, decentCallback, HWLM_ALL_GROUPS); for (u32 i = 0; i < MIN(expected.size(), matches.size()); i++) { EXPECT_EQ(expected[i], matches[i] + 6); } ASSERT_EQ(expected.size(), matches.size()); + matches.clear(); } TEST_P(FDRp, moveByteStream) { @@ -384,15 +389,16 @@ TEST_P(FDRp, moveByteStream) { } // check matches - vector matches; + struct hs_scratch scratch; hwlm_error_t fdrStatus = fdrExec(fdrTable.get(), (const u8 *)data, - data_len, 0, decentCallback, &matches, + data_len, 0, decentCallback, &scratch, HWLM_ALL_GROUPS); ASSERT_EQ(0, fdrStatus); ASSERT_EQ(1U, matches.size()); EXPECT_EQ(match(17, 0), matches[0]); + matches.clear(); } TEST_P(FDRp, Stream1) { @@ -412,17 +418,17 @@ TEST_P(FDRp, Stream1) { CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); // check matches - vector matches; fdrStatus = safeExecStreaming(fdr.get(), (const u8 *)data1, data_len1, (const u8 *)data2, data_len2, 0, - decentCallback, &matches, HWLM_ALL_GROUPS); + decentCallback, HWLM_ALL_GROUPS); ASSERT_EQ(0, fdrStatus); ASSERT_EQ(4U, matches.size()); for (size_t i = 0; i < matches.size(); i++) { EXPECT_EQ(match(i, 0), matches[i]); } + matches.clear(); } INSTANTIATE_TEST_CASE_P(FDR, FDRp, ValuesIn(getValidFdrEngines())); @@ -463,6 +469,7 @@ TEST_P(FDRpp, AlignAndTooEarly) { aligned_free_internal); vector lits; + struct hs_scratch scratch; for (size_t litLen = 1; litLen <= patLen; litLen++) { // building literal from pattern substring of variable length 1-patLen @@ -482,11 +489,10 @@ TEST_P(FDRpp, AlignAndTooEarly) { pattern.data(), litLen); for (size_t j = 0; j <= litLen; j++) { - vector matches; hwlm_error_t fdrStatus = fdrExec(fdr.get(), (const u8 *)dataBufAligned.get() + i + j, 4 * buf_alignment - j * 2, 0, decentCallback, - &matches, HWLM_ALL_GROUPS); + &scratch, HWLM_ALL_GROUPS); ASSERT_EQ(0, fdrStatus); // j == 0 means that start and end matches are entirely within // searched buffer. Otherwise they are out of buffer boundaries @@ -585,6 +591,7 @@ TEST_P(FDRpa, ShortWritings) { } // run the literal matching through all generated literals + struct hs_scratch scratch; for (size_t patIdx = 0; patIdx < pats.size();) { // group them in the sets of 32 vector testSigs; @@ -603,9 +610,8 @@ TEST_P(FDRpa, ShortWritings) { const string &buf = bufs[bufIdx]; size_t bufLen = buf.size(); - vector matches; hwlm_error_t fdrStatus = fdrExec(fdr.get(), (const u8 *)buf.data(), - bufLen, 0, decentCallback, &matches, HWLM_ALL_GROUPS); + bufLen, 0, decentCallback, &scratch, HWLM_ALL_GROUPS); ASSERT_EQ(0, fdrStatus); // build the set of expected matches using standard @@ -627,6 +633,7 @@ TEST_P(FDRpa, ShortWritings) { sort(expMatches.begin(), expMatches.end()); sort(matches.begin(), matches.end()); ASSERT_EQ(expMatches, matches); + matches.clear(); } } } @@ -656,14 +663,14 @@ TEST(FDR, FDRTermS) { ASSERT_TRUE(fdr != nullptr); // check matches - vector matches; fdrStatus = safeExecStreaming(fdr.get(), (const u8 *)data1, data_len1, (const u8 *)data2, data_len2, 0, - decentCallbackT, &matches, HWLM_ALL_GROUPS); + decentCallbackT, HWLM_ALL_GROUPS); ASSERT_EQ(HWLM_TERMINATED, fdrStatus); ASSERT_EQ(1U, matches.size()); + matches.clear(); } TEST(FDR, FDRTermB) { @@ -679,11 +686,12 @@ TEST(FDR, FDRTermB) { ASSERT_TRUE(fdr != nullptr); // check matches - vector matches; + struct hs_scratch scratch; fdrStatus = fdrExec(fdr.get(), (const u8 *)data1, data_len1, - 0, decentCallbackT, &matches, HWLM_ALL_GROUPS); + 0, decentCallbackT, &scratch, HWLM_ALL_GROUPS); ASSERT_EQ(HWLM_TERMINATED, fdrStatus); ASSERT_EQ(1U, matches.size()); + matches.clear(); } diff --git a/unit/internal/fdr_flood.cpp b/unit/internal/fdr_flood.cpp index 3dc79442..8bdd0763 100644 --- a/unit/internal/fdr_flood.cpp +++ b/unit/internal/fdr_flood.cpp @@ -36,6 +36,7 @@ #include "fdr/fdr_engine_description.h" #include "fdr/teddy_compile.h" #include "fdr/teddy_engine_description.h" +#include "scratch.h" #include "util/alloc.h" #include "util/bitutils.h" @@ -94,13 +95,13 @@ T &operator<<(T &a, const vector &b) { return a; } +map matchesCounts; + extern "C" { -static hwlmcb_rv_t countCallback(UNUSED size_t end, u32 id, void *cntxt) { - if (cntxt) { - map *matchesCounts = (map *)cntxt; - (*matchesCounts)[id]++; - } +static hwlmcb_rv_t countCallback(UNUSED size_t end, u32 id, + UNUSED struct hs_scratch *scratch) { + matchesCounts[id]++; return HWLM_CONTINUE_MATCHING; } @@ -140,6 +141,7 @@ TEST_P(FDRFloodp, NoMask) { vector data(dataSize); u8 c = 0; + struct hs_scratch scratch; while (1) { SCOPED_TRACE((unsigned int)c); u8 bit = 1 << (c & 0x7); @@ -171,10 +173,8 @@ TEST_P(FDRFloodp, NoMask) { Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); - map matchesCounts; - hwlm_error_t fdrStatus = fdrExec(fdr.get(), &data[0], dataSize, - 0, countCallback, (void *)&matchesCounts, HWLM_ALL_GROUPS); + 0, countCallback, &scratch, HWLM_ALL_GROUPS); ASSERT_EQ(0, fdrStatus); for (u8 i = 0; i < 4; i++) { @@ -199,7 +199,7 @@ TEST_P(FDRFloodp, NoMask) { matchesCounts.clear(); memset(&data[0], cAlt, dataSize); fdrStatus = fdrExec(fdr.get(), &data[0], dataSize, - 0, countCallback, (void *)&matchesCounts, HWLM_ALL_GROUPS); + 0, countCallback, &scratch, HWLM_ALL_GROUPS); ASSERT_EQ(0, fdrStatus); for (u8 i = 0; i < 4; i++) { @@ -219,6 +219,7 @@ TEST_P(FDRFloodp, NoMask) { ASSERT_EQ(0, matchesCounts[i * 8 + 6]); } } + matchesCounts.clear(); if (++c == 0) { break; @@ -233,6 +234,7 @@ TEST_P(FDRFloodp, WithMask) { vector data(dataSize); u8 c = '\0'; + struct hs_scratch scratch; while (1) { u8 bit = 1 << (c & 0x7); u8 cAlt = c ^ bit; @@ -307,10 +309,8 @@ TEST_P(FDRFloodp, WithMask) { Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); - map matchesCounts; - hwlm_error_t fdrStatus = fdrExec(fdr.get(), &data[0], dataSize, - 0, countCallback, &matchesCounts, HWLM_ALL_GROUPS); + 0, countCallback, &scratch, HWLM_ALL_GROUPS); ASSERT_EQ(0, fdrStatus); const u32 cnt4 = dataSize - 4 + 1; @@ -348,7 +348,7 @@ TEST_P(FDRFloodp, WithMask) { memset(&data[0], cAlt, dataSize); matchesCounts.clear(); fdrStatus = fdrExec(fdr.get(), &data[0], dataSize, - 0, countCallback, &matchesCounts, HWLM_ALL_GROUPS); + 0, countCallback, &scratch, HWLM_ALL_GROUPS); ASSERT_EQ(0, fdrStatus); for (u8 i = 0; i < 4; i++) { @@ -381,6 +381,7 @@ TEST_P(FDRFloodp, WithMask) { ASSERT_EQ(0, matchesCounts[i * 12 + 11]); } } + matchesCounts.clear(); if (++c == '\0') { break; @@ -398,6 +399,7 @@ TEST_P(FDRFloodp, StreamingMask) { vector tempdata(dataSize + fake_history_size); // headroom u8 c = '\0'; + struct hs_scratch scratch; while (1) { u8 bit = 1 << (c & 0x7); u8 cAlt = c ^ bit; @@ -472,7 +474,6 @@ TEST_P(FDRFloodp, StreamingMask) { Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); - map matchesCounts; hwlm_error_t fdrStatus; const u32 cnt4 = dataSize - 4 + 1; @@ -482,7 +483,7 @@ TEST_P(FDRFloodp, StreamingMask) { // reference past the end of fake history to allow headroom const u8 *fhist = fake_history.data() + fake_history_size; fdrStatus = fdrExecStreaming(fdr.get(), fhist, 0, d, streamChunk, 0, - countCallback, &matchesCounts, + countCallback, &scratch, HWLM_ALL_GROUPS); ASSERT_EQ(0, fdrStatus); for (u32 j = streamChunk; j < dataSize; j += streamChunk) { @@ -493,13 +494,11 @@ TEST_P(FDRFloodp, StreamingMask) { const u8 *tmp_d = tempdata.data() + fake_history_size; fdrStatus = fdrExecStreaming(fdr.get(), tmp_d, j, tmp_d + j, streamChunk, 0, countCallback, - &matchesCounts, - HWLM_ALL_GROUPS); + &scratch, HWLM_ALL_GROUPS); } else { fdrStatus = fdrExecStreaming(fdr.get(), d + j - 8, 8, d + j, streamChunk, 0, countCallback, - &matchesCounts, - HWLM_ALL_GROUPS); + &scratch, HWLM_ALL_GROUPS); } ASSERT_EQ(0, fdrStatus); } @@ -540,6 +539,7 @@ TEST_P(FDRFloodp, StreamingMask) { break; } } + matchesCounts.clear(); } INSTANTIATE_TEST_CASE_P(FDRFlood, FDRFloodp, ValuesIn(getValidFdrEngines())); diff --git a/unit/internal/noodle.cpp b/unit/internal/noodle.cpp index 460e77c5..7cf5744f 100644 --- a/unit/internal/noodle.cpp +++ b/unit/internal/noodle.cpp @@ -33,6 +33,7 @@ #include "hwlm/noodle_engine.h" #include "hwlm/hwlm.h" #include "hwlm/hwlm_literal.h" +#include "scratch.h" #include "util/alloc.h" #include "util/ue2string.h" @@ -51,52 +52,51 @@ struct hlmMatchEntry { to(end), id(identifier) {} }; -typedef vector hlmMatchRecord; +vector ctxt; static -hwlmcb_rv_t hlmSimpleCallback(size_t to, u32 id, void *context) { - hlmMatchRecord *mr = (hlmMatchRecord *)context; +hwlmcb_rv_t hlmSimpleCallback(size_t to, u32 id, + UNUSED struct hs_scratch *scratch) { + DEBUG_PRINTF("match @%zu = %u\n", to, id); - DEBUG_PRINTF("match @%zu = %u,%p\n", to, id, context); - - mr->push_back(hlmMatchEntry(to, id)); + ctxt.push_back(hlmMatchEntry(to, id)); return HWLM_CONTINUE_MATCHING; } static void noodleMatch(const u8 *data, size_t data_len, const char *lit_str, - size_t lit_len, char nocase, HWLMCallback cb, void *ctxt) { + size_t lit_len, char nocase, HWLMCallback cb) { u32 id = 1000; hwlmLiteral lit(std::string(lit_str, lit_len), nocase, id); auto n = noodBuildTable(lit); ASSERT_TRUE(n != nullptr); hwlm_error_t rv; - rv = noodExec(n.get(), data, data_len, 0, cb, ctxt); + struct hs_scratch scratch; + rv = noodExec(n.get(), data, data_len, 0, cb, &scratch); ASSERT_EQ(HWLM_SUCCESS, rv); } TEST(Noodle, nood1) { const size_t data_len = 1024; unsigned int i, j; - hlmMatchRecord ctxt; u8 data[data_len]; memset(data, 'a', data_len); - noodleMatch(data, data_len, "a", 1, 0, hlmSimpleCallback, &ctxt); + noodleMatch(data, data_len, "a", 1, 0, hlmSimpleCallback); ASSERT_EQ(1024U, ctxt.size()); for (i = 0; i < 1024; i++) { ASSERT_EQ(i, ctxt[i].to); } ctxt.clear(); - noodleMatch(data, data_len, "A", 1, 0, hlmSimpleCallback, &ctxt); + noodleMatch(data, data_len, "A", 1, 0, hlmSimpleCallback); ASSERT_EQ(0U, ctxt.size()); ctxt.clear(); - noodleMatch(data, data_len, "A", 1, 1, hlmSimpleCallback, &ctxt); + noodleMatch(data, data_len, "A", 1, 1, hlmSimpleCallback); ASSERT_EQ(1024U, ctxt.size()); for (i = 0; i < 1024; i++) { ASSERT_EQ(i, ctxt[i].to); @@ -104,60 +104,59 @@ TEST(Noodle, nood1) { for (j = 0; j < 16; j++) { ctxt.clear(); - noodleMatch(data + j, data_len - j, "A", 1, 1, hlmSimpleCallback, - &ctxt); + noodleMatch(data + j, data_len - j, "A", 1, 1, hlmSimpleCallback); ASSERT_EQ(1024 - j, ctxt.size()); for (i = 0; i < 1024 - j; i++) { ASSERT_EQ(i, ctxt[i].to); } ctxt.clear(); - noodleMatch(data, data_len - j, "A", 1, 1, hlmSimpleCallback, &ctxt); + noodleMatch(data, data_len - j, "A", 1, 1, hlmSimpleCallback); ASSERT_EQ(1024 - j, ctxt.size()); for (i = 0; i < 1024 - j; i++) { ASSERT_EQ(i, ctxt[i].to); } } + ctxt.clear(); } TEST(Noodle, nood2) { const size_t data_len = 1024; unsigned int i, j; - hlmMatchRecord ctxt; u8 data[data_len]; memset(data, 'a', data_len); - noodleMatch(data, data_len, "aa", 2, 0, hlmSimpleCallback, &ctxt); + noodleMatch(data, data_len, "aa", 2, 0, hlmSimpleCallback); ASSERT_EQ(1023U, ctxt.size()); for (i = 0; i < 1023; i++) { ASSERT_EQ(i + 1, ctxt[i].to); } ctxt.clear(); - noodleMatch(data, data_len, "aA", 2, 0, hlmSimpleCallback, &ctxt); + noodleMatch(data, data_len, "aA", 2, 0, hlmSimpleCallback); ASSERT_EQ(0U, ctxt.size()); ctxt.clear(); - noodleMatch(data, data_len, "AA", 2, 0, hlmSimpleCallback, &ctxt); + noodleMatch(data, data_len, "AA", 2, 0, hlmSimpleCallback); ASSERT_EQ(0U, ctxt.size()); ctxt.clear(); - noodleMatch(data, data_len, "aa", 2, 1, hlmSimpleCallback, &ctxt); + noodleMatch(data, data_len, "aa", 2, 1, hlmSimpleCallback); ASSERT_EQ(1023U, ctxt.size()); for (i = 0; i < 1023; i++) { ASSERT_EQ(i + 1, ctxt[i].to); } ctxt.clear(); - noodleMatch(data, data_len, "Aa", 2, 1, hlmSimpleCallback, &ctxt); + noodleMatch(data, data_len, "Aa", 2, 1, hlmSimpleCallback); ASSERT_EQ(1023U, ctxt.size()); for (i = 0; i < 1023; i++) { ASSERT_EQ(i + 1, ctxt[i].to); } ctxt.clear(); - noodleMatch(data, data_len, "AA", 2, 1, hlmSimpleCallback, &ctxt); + noodleMatch(data, data_len, "AA", 2, 1, hlmSimpleCallback); ASSERT_EQ(1023U, ctxt.size()); for (i = 0; i < 1023; i++) { ASSERT_EQ(i + 1, ctxt[i].to); @@ -165,42 +164,41 @@ TEST(Noodle, nood2) { for (j = 0; j < 16; j++) { ctxt.clear(); - noodleMatch(data + j, data_len - j, "Aa", 2, 1, hlmSimpleCallback, - &ctxt); + noodleMatch(data + j, data_len - j, "Aa", 2, 1, hlmSimpleCallback); ASSERT_EQ(1023 - j, ctxt.size()); for (i = 0; i < 1023 - j; i++) { ASSERT_EQ(i + 1, ctxt[i].to); } ctxt.clear(); - noodleMatch(data, data_len - j, "aA", 2, 1, hlmSimpleCallback, &ctxt); + noodleMatch(data, data_len - j, "aA", 2, 1, hlmSimpleCallback); ASSERT_EQ(1023 - j, ctxt.size()); for (i = 0; i < 1023 - j; i++) { ASSERT_EQ(i + 1, ctxt[i].to); } } + ctxt.clear(); } TEST(Noodle, noodLong) { const size_t data_len = 1024; unsigned int i, j; - hlmMatchRecord ctxt; u8 data[data_len]; memset(data, 'a', data_len); - noodleMatch(data, data_len, "aaaa", 4, 0, hlmSimpleCallback, &ctxt); + noodleMatch(data, data_len, "aaaa", 4, 0, hlmSimpleCallback); ASSERT_EQ(1021U, ctxt.size()); for (i = 0; i < 1021; i++) { ASSERT_EQ(i + 3, ctxt[i].to); } ctxt.clear(); - noodleMatch(data, data_len, "aaAA", 4, 0, hlmSimpleCallback, &ctxt); + noodleMatch(data, data_len, "aaAA", 4, 0, hlmSimpleCallback); ASSERT_EQ(0U, ctxt.size()); ctxt.clear(); - noodleMatch(data, data_len, "aaAA", 4, 1, hlmSimpleCallback, &ctxt); + noodleMatch(data, data_len, "aaAA", 4, 1, hlmSimpleCallback); ASSERT_EQ(1021U, ctxt.size()); for (i = 0; i < 1021; i++) { ASSERT_EQ(i + 3, ctxt[i].to); @@ -208,26 +206,24 @@ TEST(Noodle, noodLong) { for (j = 0; j < 16; j++) { ctxt.clear(); - noodleMatch(data + j, data_len - j, "AAaa", 4, 1, hlmSimpleCallback, - &ctxt); + noodleMatch(data + j, data_len - j, "AAaa", 4, 1, hlmSimpleCallback); ASSERT_EQ(1021 - j, ctxt.size()); for (i = 0; i < 1021 - j; i++) { ASSERT_EQ(i + 3, ctxt[i].to); } ctxt.clear(); - noodleMatch(data + j, data_len - j, "aaaA", 4, 1, hlmSimpleCallback, - &ctxt); + noodleMatch(data + j, data_len - j, "aaaA", 4, 1, hlmSimpleCallback); ASSERT_EQ(1021 - j, ctxt.size()); for (i = 0; i < 1021 - j; i++) { ASSERT_EQ(i + 3, ctxt[i].to); } } + ctxt.clear(); } TEST(Noodle, noodCutoverSingle) { const size_t max_data_len = 128; - hlmMatchRecord ctxt; u8 data[max_data_len + 15]; memset(data, 'a', max_data_len + 15); @@ -235,18 +231,18 @@ TEST(Noodle, noodCutoverSingle) { for (u32 align = 0; align < 16; align++) { for (u32 len = 0; len < max_data_len; len++) { ctxt.clear(); - noodleMatch(data + align, len, "a", 1, 0, hlmSimpleCallback, &ctxt); + noodleMatch(data + align, len, "a", 1, 0, hlmSimpleCallback); EXPECT_EQ(len, ctxt.size()); for (u32 i = 0; i < ctxt.size(); i++) { ASSERT_EQ(i, ctxt[i].to); } } } + ctxt.clear(); } TEST(Noodle, noodCutoverDouble) { const size_t max_data_len = 128; - hlmMatchRecord ctxt; u8 data[max_data_len + 15]; memset(data, 'a', max_data_len + 15); @@ -254,13 +250,13 @@ TEST(Noodle, noodCutoverDouble) { for (u32 align = 0; align < 16; align++) { for (u32 len = 0; len < max_data_len; len++) { ctxt.clear(); - noodleMatch(data + align, len, "aa", 2, 0, hlmSimpleCallback, - &ctxt); + noodleMatch(data + align, len, "aa", 2, 0, hlmSimpleCallback); EXPECT_EQ(len ? len - 1 : 0U, ctxt.size()); for (u32 i = 0; i < ctxt.size(); i++) { ASSERT_EQ(i + 1, ctxt[i].to); } } } + ctxt.clear(); } From 340773481ef59aa9d7ff5bfb58c76b4a62e5f338 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 20 Jul 2017 15:11:50 +1000 Subject: [PATCH 094/190] smallwrite: batch dfa merge to reduce compile time --- src/grey.cpp | 2 + src/grey.h | 1 + src/smallwrite/smallwrite_build.cpp | 141 +++++++++++++--------------- 3 files changed, 66 insertions(+), 78 deletions(-) diff --git a/src/grey.cpp b/src/grey.cpp index 24140c05..3762a497 100644 --- a/src/grey.cpp +++ b/src/grey.cpp @@ -139,6 +139,7 @@ Grey::Grey(void) : limitSmallWriteOutfixSize(1048576), // 1 MB smallWriteMaxPatterns(10000), smallWriteMaxLiterals(10000), + smallWriteMergeBatchSize(20), allowTamarama(true), // Tamarama engine tamaChunkSize(100), dumpFlags(0), @@ -302,6 +303,7 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(limitSmallWriteOutfixSize); G_UPDATE(smallWriteMaxPatterns); G_UPDATE(smallWriteMaxLiterals); + G_UPDATE(smallWriteMergeBatchSize); G_UPDATE(allowTamarama); G_UPDATE(tamaChunkSize); G_UPDATE(limitPatternCount); diff --git a/src/grey.h b/src/grey.h index 50519418..34c62918 100644 --- a/src/grey.h +++ b/src/grey.h @@ -157,6 +157,7 @@ struct Grey { u32 limitSmallWriteOutfixSize; //!< max total size of outfix DFAs u32 smallWriteMaxPatterns; // only try small writes if fewer patterns u32 smallWriteMaxLiterals; // only try small writes if fewer literals + u32 smallWriteMergeBatchSize; // number of DFAs to merge in a batch // Tamarama engine bool allowTamarama; diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index bb933cbe..c041155b 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -132,12 +132,10 @@ public: set all_reports() const override; - bool determiniseLiterals(); - const ReportManager &rm; const CompileContext &cc; - unique_ptr rdfa; + vector> dfas; LitTrie lit_trie; LitTrie lit_trie_nocase; size_t num_literals = 0; @@ -226,6 +224,40 @@ bool pruneOverlong(NGHolder &g, const depth &max_depth, return modified; } +/** + * \brief Attempt to merge the set of DFAs given down into a single raw_dfa. + * Returns false on failure. + */ +static +bool mergeDfas(vector> &dfas, const ReportManager &rm, + const CompileContext &cc) { + assert(!dfas.empty()); + + if (dfas.size() == 1) { + return true; + } + + DEBUG_PRINTF("attempting to merge %zu DFAs\n", dfas.size()); + + vector dfa_ptrs; + dfa_ptrs.reserve(dfas.size()); + for (auto &d : dfas) { + dfa_ptrs.push_back(d.get()); + } + + auto merged = mergeAllDfas(dfa_ptrs, DFA_MERGE_MAX_STATES, &rm, cc.grey); + if (!merged) { + DEBUG_PRINTF("merge failed\n"); + return false; + } + + DEBUG_PRINTF("merge succeeded, result has %zu states\n", + merged->states.size()); + dfas.clear(); + dfas.push_back(std::move(merged)); + return true; +} + void SmallWriteBuildImpl::add(const NGHolder &g, const ExpressionInfo &expr) { // If the graph is poisoned (i.e. we can't build a SmallWrite version), // we don't even try. @@ -283,19 +315,14 @@ void SmallWriteBuildImpl::add(const NGHolder &g, const ExpressionInfo &expr) { minimize_hopcroft(*r, cc.grey); } - if (rdfa) { - // do a merge of the new dfa with the existing dfa - auto merged = mergeTwoDfas(rdfa.get(), r.get(), DFA_MERGE_MAX_STATES, - &rm, cc.grey); - if (!merged) { - DEBUG_PRINTF("merge failed\n"); + dfas.push_back(std::move(r)); + + if (dfas.size() >= cc.grey.smallWriteMergeBatchSize) { + if (!mergeDfas(dfas, rm, cc)) { + dfas.clear(); poisoned = true; return; } - DEBUG_PRINTF("merge succeeded, built %p\n", merged.get()); - rdfa = move(merged); - } else { - rdfa = move(r); } } @@ -710,64 +737,6 @@ unique_ptr buildDfa(LitTrie &trie, bool nocase) { return rdfa; } -bool SmallWriteBuildImpl::determiniseLiterals() { - DEBUG_PRINTF("handling literals\n"); - assert(!poisoned); - assert(num_literals <= cc.grey.smallWriteMaxLiterals); - - if (is_empty(lit_trie) && is_empty(lit_trie_nocase)) { - DEBUG_PRINTF("no literals\n"); - return true; /* nothing to do */ - } - - vector> dfas; - - if (!is_empty(lit_trie)) { - dfas.push_back(buildDfa(lit_trie, false)); - DEBUG_PRINTF("caseful literal dfa with %zu states\n", - dfas.back()->states.size()); - } - if (!is_empty(lit_trie_nocase)) { - dfas.push_back(buildDfa(lit_trie_nocase, true)); - DEBUG_PRINTF("nocase literal dfa with %zu states\n", - dfas.back()->states.size()); - } - - if (rdfa) { - dfas.push_back(move(rdfa)); - DEBUG_PRINTF("general dfa with %zu states\n", - dfas.back()->states.size()); - } - - // If we only have one DFA, no merging is necessary. - if (dfas.size() == 1) { - DEBUG_PRINTF("only one dfa\n"); - rdfa = move(dfas.front()); - return true; - } - - // Merge all DFAs. - vector to_merge; - for (const auto &d : dfas) { - to_merge.push_back(d.get()); - } - - auto merged = mergeAllDfas(to_merge, DFA_MERGE_MAX_STATES, &rm, cc.grey); - - if (!merged) { - DEBUG_PRINTF("merge failed\n"); - poisoned = true; - return false; - } - - DEBUG_PRINTF("merge succeeded, built dfa with %zu states\n", - merged->states.size()); - - // Replace our only DFA with the merged one. - rdfa = move(merged); - return true; -} - #define MAX_GOOD_ACCEL_DEPTH 4 static @@ -890,8 +859,8 @@ unique_ptr makeSmallWriteBuilder(size_t num_patterns, bytecode_ptr SmallWriteBuildImpl::build(u32 roseQuality) { const bool has_literals = !is_empty(lit_trie) || !is_empty(lit_trie_nocase); - const bool has_non_literals = rdfa != nullptr; - if (!rdfa && !has_literals) { + const bool has_non_literals = !dfas.empty(); + if (dfas.empty() && !has_literals) { DEBUG_PRINTF("no smallwrite engine\n"); poisoned = true; return nullptr; @@ -914,16 +883,31 @@ bytecode_ptr SmallWriteBuildImpl::build(u32 roseQuality) { } } - if (!determiniseLiterals()) { - DEBUG_PRINTF("some literal could not be made into a smallwrite dfa\n"); - return nullptr; + if (!is_empty(lit_trie)) { + dfas.push_back(buildDfa(lit_trie, false)); + DEBUG_PRINTF("caseful literal dfa with %zu states\n", + dfas.back()->states.size()); + } + if (!is_empty(lit_trie_nocase)) { + dfas.push_back(buildDfa(lit_trie_nocase, true)); + DEBUG_PRINTF("nocase literal dfa with %zu states\n", + dfas.back()->states.size()); } - if (!rdfa) { + if (dfas.empty()) { DEBUG_PRINTF("no dfa, pruned everything away\n"); return nullptr; } + if (!mergeDfas(dfas, rm, cc)) { + dfas.clear(); + return nullptr; + } + + assert(dfas.size() == 1); + auto rdfa = std::move(dfas.front()); + dfas.clear(); + DEBUG_PRINTF("building rdfa %p\n", rdfa.get()); u32 start_offset; @@ -957,7 +941,8 @@ set SmallWriteBuildImpl::all_reports() const { if (poisoned) { return reports; } - if (rdfa) { + + for (const auto &rdfa : dfas) { insert(&reports, ::ue2::all_reports(*rdfa)); } From 68e08d8e18b9ccc3ecdd81040e4214ffb300fb76 Mon Sep 17 00:00:00 2001 From: "Chang, Harry" Date: Wed, 5 Jul 2017 18:42:17 -0700 Subject: [PATCH 095/190] AVX512 reinforced teddy. --- src/fdr/teddy.c | 641 +++++++++++++++++++++------------ src/fdr/teddy_avx2.c | 113 ++---- src/fdr/teddy_runtime_common.h | 80 +++- src/util/simd_utils.h | 17 + 4 files changed, 544 insertions(+), 307 deletions(-) diff --git a/src/fdr/teddy.c b/src/fdr/teddy.c index fd149016..f28d0a50 100644 --- a/src/fdr/teddy.c +++ b/src/fdr/teddy.c @@ -74,7 +74,294 @@ const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} }; -#if defined(__AVX2__) // reinforced teddy +#define CONF_CHUNK_64(chunk, bucket, off, reason, conf_fn) \ +do { \ + if (unlikely(chunk != ones_u64a)) { \ + chunk = ~chunk; \ + conf_fn(&chunk, bucket, off, confBase, reason, a, ptr, \ + &control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ +} while(0) + +#define CONF_CHUNK_32(chunk, bucket, off, reason, conf_fn) \ +do { \ + if (unlikely(chunk != ones_u32a)) { \ + chunk = ~chunk; \ + conf_fn(&chunk, bucket, off, confBase, reason, a, ptr, \ + &control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ +} while(0) + +#if defined(HAVE_AVX512) // AVX512 reinforced teddy + +#ifdef ARCH_64_BIT +#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn) \ +do { \ + if (unlikely(diff512(var, ones512()))) { \ + m128 p128_0 = extract128from512(var, 0); \ + m128 p128_1 = extract128from512(var, 1); \ + m128 p128_2 = extract128from512(var, 2); \ + m128 p128_3 = extract128from512(var, 3); \ + u64a part1 = movq(p128_0); \ + u64a part2 = movq(rshiftbyte_m128(p128_0, 8)); \ + u64a part3 = movq(p128_1); \ + u64a part4 = movq(rshiftbyte_m128(p128_1, 8)); \ + u64a part5 = movq(p128_2); \ + u64a part6 = movq(rshiftbyte_m128(p128_2, 8)); \ + u64a part7 = movq(p128_3); \ + u64a part8 = movq(rshiftbyte_m128(p128_3, 8)); \ + CONF_CHUNK_64(part1, bucket, offset, reason, conf_fn); \ + CONF_CHUNK_64(part2, bucket, offset + 8, reason, conf_fn); \ + CONF_CHUNK_64(part3, bucket, offset + 16, reason, conf_fn); \ + CONF_CHUNK_64(part4, bucket, offset + 24, reason, conf_fn); \ + CONF_CHUNK_64(part5, bucket, offset + 32, reason, conf_fn); \ + CONF_CHUNK_64(part6, bucket, offset + 40, reason, conf_fn); \ + CONF_CHUNK_64(part7, bucket, offset + 48, reason, conf_fn); \ + CONF_CHUNK_64(part8, bucket, offset + 56, reason, conf_fn); \ + } \ +} while(0) +#else +#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn) \ +do { \ + if (unlikely(diff512(var, ones512()))) { \ + m128 p128_0 = extract128from512(var, 0); \ + m128 p128_1 = extract128from512(var, 1); \ + m128 p128_2 = extract128from512(var, 2); \ + m128 p128_3 = extract128from512(var, 3); \ + u32 part1 = movd(p128_0); \ + u32 part2 = movd(rshiftbyte_m128(p128_0, 4)); \ + u32 part3 = movd(rshiftbyte_m128(p128_0, 8)); \ + u32 part4 = movd(rshiftbyte_m128(p128_0, 12)); \ + u32 part5 = movd(p128_1); \ + u32 part6 = movd(rshiftbyte_m128(p128_1, 4)); \ + u32 part7 = movd(rshiftbyte_m128(p128_1, 8)); \ + u32 part8 = movd(rshiftbyte_m128(p128_1, 12)); \ + u32 part9 = movd(p128_2); \ + u32 part10 = movd(rshiftbyte_m128(p128_2, 4)); \ + u32 part11 = movd(rshiftbyte_m128(p128_2, 8)); \ + u32 part12 = movd(rshiftbyte_m128(p128_2, 12)); \ + u32 part13 = movd(p128_3); \ + u32 part14 = movd(rshiftbyte_m128(p128_3, 4)); \ + u32 part15 = movd(rshiftbyte_m128(p128_3, 8)); \ + u32 part16 = movd(rshiftbyte_m128(p128_3, 12)); \ + CONF_CHUNK_32(part1, bucket, offset, reason, conf_fn); \ + CONF_CHUNK_32(part2, bucket, offset + 4, reason, conf_fn); \ + CONF_CHUNK_32(part3, bucket, offset + 8, reason, conf_fn); \ + CONF_CHUNK_32(part4, bucket, offset + 12, reason, conf_fn); \ + CONF_CHUNK_32(part5, bucket, offset + 16, reason, conf_fn); \ + CONF_CHUNK_32(part6, bucket, offset + 20, reason, conf_fn); \ + CONF_CHUNK_32(part7, bucket, offset + 24, reason, conf_fn); \ + CONF_CHUNK_32(part8, bucket, offset + 28, reason, conf_fn); \ + CONF_CHUNK_32(part9, bucket, offset + 32, reason, conf_fn); \ + CONF_CHUNK_32(part10, bucket, offset + 36, reason, conf_fn); \ + CONF_CHUNK_32(part11, bucket, offset + 40, reason, conf_fn); \ + CONF_CHUNK_32(part12, bucket, offset + 44, reason, conf_fn); \ + CONF_CHUNK_32(part13, bucket, offset + 48, reason, conf_fn); \ + CONF_CHUNK_32(part14, bucket, offset + 52, reason, conf_fn); \ + CONF_CHUNK_32(part15, bucket, offset + 56, reason, conf_fn); \ + CONF_CHUNK_32(part16, bucket, offset + 60, reason, conf_fn); \ + } \ +} while(0) +#endif + +#define PREP_SHUF_MASK_NO_REINFORCEMENT(val) \ + m512 lo = and512(val, *lo_mask); \ + m512 hi = and512(rshift64_m512(val, 4), *lo_mask) + +#define PREP_SHUF_MASK \ + PREP_SHUF_MASK_NO_REINFORCEMENT(load512(ptr)); \ + *c_16 = *(ptr + 15); \ + *c_32 = *(ptr + 31); \ + *c_48 = *(ptr + 47); \ + m512 r_msk = set512_64(0ULL, r_msk_base[*c_48], 0ULL, r_msk_base[*c_32],\ + 0ULL, r_msk_base[*c_16], 0ULL, r_msk_base[*c_0]);\ + *c_0 = *(ptr + 63) + +#define SHIFT_OR_M1 \ + or512(pshufb_m512(dup_mask[0], lo), pshufb_m512(dup_mask[1], hi)) + +#define SHIFT_OR_M2 \ + or512(lshift128_m512(or512(pshufb_m512(dup_mask[2], lo), \ + pshufb_m512(dup_mask[3], hi)), \ + 1), SHIFT_OR_M1) + +#define SHIFT_OR_M3 \ + or512(lshift128_m512(or512(pshufb_m512(dup_mask[4], lo), \ + pshufb_m512(dup_mask[5], hi)), \ + 2), SHIFT_OR_M2) + +#define SHIFT_OR_M4 \ + or512(lshift128_m512(or512(pshufb_m512(dup_mask[6], lo), \ + pshufb_m512(dup_mask[7], hi)), \ + 3), SHIFT_OR_M3) + +static really_inline +m512 prep_conf_teddy_no_reinforcement_m1(const m512 *lo_mask, + const m512 *dup_mask, + const m512 val) { + PREP_SHUF_MASK_NO_REINFORCEMENT(val); + return SHIFT_OR_M1; +} + +static really_inline +m512 prep_conf_teddy_no_reinforcement_m2(const m512 *lo_mask, + const m512 *dup_mask, + const m512 val) { + PREP_SHUF_MASK_NO_REINFORCEMENT(val); + return SHIFT_OR_M2; +} + +static really_inline +m512 prep_conf_teddy_no_reinforcement_m3(const m512 *lo_mask, + const m512 *dup_mask, + const m512 val) { + PREP_SHUF_MASK_NO_REINFORCEMENT(val); + return SHIFT_OR_M3; +} + +static really_inline +m512 prep_conf_teddy_no_reinforcement_m4(const m512 *lo_mask, + const m512 *dup_mask, + const m512 val) { + PREP_SHUF_MASK_NO_REINFORCEMENT(val); + return SHIFT_OR_M4; +} + +static really_inline +m512 prep_conf_teddy_m1(const m512 *lo_mask, const m512 *dup_mask, + const u8 *ptr, const u64a *r_msk_base, + u32 *c_0, u32 *c_16, u32 *c_32, u32 *c_48) { + PREP_SHUF_MASK; + return or512(SHIFT_OR_M1, r_msk); +} + +static really_inline +m512 prep_conf_teddy_m2(const m512 *lo_mask, const m512 *dup_mask, + const u8 *ptr, const u64a *r_msk_base, + u32 *c_0, u32 *c_16, u32 *c_32, u32 *c_48) { + PREP_SHUF_MASK; + return or512(SHIFT_OR_M2, r_msk); +} + +static really_inline +m512 prep_conf_teddy_m3(const m512 *lo_mask, const m512 *dup_mask, + const u8 *ptr, const u64a *r_msk_base, + u32 *c_0, u32 *c_16, u32 *c_32, u32 *c_48) { + PREP_SHUF_MASK; + return or512(SHIFT_OR_M3, r_msk); +} + +static really_inline +m512 prep_conf_teddy_m4(const m512 *lo_mask, const m512 *dup_mask, + const u8 *ptr, const u64a *r_msk_base, + u32 *c_0, u32 *c_16, u32 *c_32, u32 *c_48) { + PREP_SHUF_MASK; + return or512(SHIFT_OR_M4, r_msk); +} + +#define PREP_CONF_FN_NO_REINFORCEMENT(val, n) \ + prep_conf_teddy_no_reinforcement_m##n(&lo_mask, dup_mask, val) + +#define PREP_CONF_FN(ptr, n) \ + prep_conf_teddy_m##n(&lo_mask, dup_mask, ptr, r_msk_base, \ + &c_0, &c_16, &c_32, &c_48) + +#define PREPARE_MASKS_1 \ + dup_mask[0] = set4x128(maskBase[0]); \ + dup_mask[1] = set4x128(maskBase[1]); + +#define PREPARE_MASKS_2 \ + PREPARE_MASKS_1 \ + dup_mask[2] = set4x128(maskBase[2]); \ + dup_mask[3] = set4x128(maskBase[3]); + +#define PREPARE_MASKS_3 \ + PREPARE_MASKS_2 \ + dup_mask[4] = set4x128(maskBase[4]); \ + dup_mask[5] = set4x128(maskBase[5]); + +#define PREPARE_MASKS_4 \ + PREPARE_MASKS_3 \ + dup_mask[6] = set4x128(maskBase[6]); \ + dup_mask[7] = set4x128(maskBase[7]); + +#define PREPARE_MASKS(n) \ + m512 lo_mask = set64x8(0xf); \ + m512 dup_mask[n * 2]; \ + PREPARE_MASKS_##n + +#define FDR_EXEC_TEDDY(fdr, a, control, n_msk, conf_fn) \ +do { \ + const u8 *buf_end = a->buf + a->len; \ + const u8 *ptr = a->buf + a->start_offset; \ + u32 floodBackoff = FLOOD_BACKOFF_START; \ + const u8 *tryFloodDetect = a->firstFloodDetect; \ + u32 last_match = (u32)-1; \ + const struct Teddy *teddy = (const struct Teddy *)fdr; \ + const size_t iterBytes = 128; \ + DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", \ + a->buf, a->len, a->start_offset); \ + \ + const m128 *maskBase = getMaskBase(teddy); \ + PREPARE_MASKS(n_msk); \ + const u32 *confBase = getConfBase(teddy); \ + \ + const u64a *r_msk_base = getReinforcedMaskBase(teddy, n_msk); \ + u32 c_0 = 0x100; \ + u32 c_16 = 0x100; \ + u32 c_32 = 0x100; \ + u32 c_48 = 0x100; \ + const u8 *mainStart = ROUNDUP_PTR(ptr, 64); \ + DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); \ + if (ptr < mainStart) { \ + ptr = mainStart - 64; \ + m512 p_mask; \ + m512 val_0 = vectoredLoad512(&p_mask, ptr, a->start_offset, \ + a->buf, buf_end, \ + a->buf_history, a->len_history, n_msk); \ + m512 r_0 = PREP_CONF_FN_NO_REINFORCEMENT(val_0, n_msk); \ + r_0 = or512(r_0, p_mask); \ + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn); \ + ptr += 64; \ + } \ + \ + if (ptr + 64 <= buf_end) { \ + m512 r_0 = PREP_CONF_FN(ptr, n_msk); \ + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn); \ + ptr += 64; \ + } \ + \ + for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { \ + __builtin_prefetch(ptr + (iterBytes * 4)); \ + CHECK_FLOOD; \ + m512 r_0 = PREP_CONF_FN(ptr, n_msk); \ + CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, conf_fn); \ + m512 r_1 = PREP_CONF_FN(ptr + 64, n_msk); \ + CONFIRM_TEDDY(r_1, 8, 64, NOT_CAUTIOUS, conf_fn); \ + } \ + \ + if (ptr + 64 <= buf_end) { \ + m512 r_0 = PREP_CONF_FN(ptr, n_msk); \ + CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, conf_fn); \ + ptr += 64; \ + } \ + \ + assert(ptr + 64 > buf_end); \ + if (ptr < buf_end) { \ + m512 p_mask; \ + m512 val_0 = vectoredLoad512(&p_mask, ptr, 0, ptr, buf_end, \ + a->buf_history, a->len_history, n_msk); \ + m512 r_0 = PREP_CONF_FN_NO_REINFORCEMENT(val_0, n_msk); \ + r_0 = or512(r_0, p_mask); \ + CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn); \ + } \ + \ + return HWLM_SUCCESS; \ +} while(0) + +#elif defined(HAVE_AVX2) // not HAVE_AVX512 but HAVE_AVX2 reinforced teddy #ifdef ARCH_64_BIT #define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn) \ @@ -86,30 +373,10 @@ do { \ u64a part2 = movq(rshiftbyte_m128(lo, 8)); \ u64a part3 = movq(hi); \ u64a part4 = movq(rshiftbyte_m128(hi, 8)); \ - if (unlikely(part1 != ones_u64a)) { \ - part1 = ~part1; \ - conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ - if (unlikely(part2 != ones_u64a)) { \ - part2 = ~part2; \ - conf_fn(&part2, bucket, offset + 8, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ - if (unlikely(part3 != ones_u64a)) { \ - part3 = ~part3; \ - conf_fn(&part3, bucket, offset + 16, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ - if (unlikely(part4 != ones_u64a)) { \ - part4 = ~part4; \ - conf_fn(&part4, bucket, offset + 24, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ + CONF_CHUNK_64(part1, bucket, offset, reason, conf_fn); \ + CONF_CHUNK_64(part2, bucket, offset + 8, reason, conf_fn); \ + CONF_CHUNK_64(part3, bucket, offset + 16, reason, conf_fn); \ + CONF_CHUNK_64(part4, bucket, offset + 24, reason, conf_fn); \ } \ } while(0) #else @@ -126,54 +393,14 @@ do { \ u32 part6 = movd(rshiftbyte_m128(hi, 4)); \ u32 part7 = movd(rshiftbyte_m128(hi, 8)); \ u32 part8 = movd(rshiftbyte_m128(hi, 12)); \ - if (unlikely(part1 != ones_u32)) { \ - part1 = ~part1; \ - conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ - if (unlikely(part2 != ones_u32)) { \ - part2 = ~part2; \ - conf_fn(&part2, bucket, offset + 4, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ - if (unlikely(part3 != ones_u32)) { \ - part3 = ~part3; \ - conf_fn(&part3, bucket, offset + 8, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ - if (unlikely(part4 != ones_u32)) { \ - part4 = ~part4; \ - conf_fn(&part4, bucket, offset + 12, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ - if (unlikely(part5 != ones_u32)) { \ - part5 = ~part5; \ - conf_fn(&part5, bucket, offset + 16, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ - if (unlikely(part6 != ones_u32)) { \ - part6 = ~part6; \ - conf_fn(&part6, bucket, offset + 20, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ - if (unlikely(part7 != ones_u32)) { \ - part7 = ~part7; \ - conf_fn(&part7, bucket, offset + 24, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ - if (unlikely(part8 != ones_u32)) { \ - part8 = ~part8; \ - conf_fn(&part8, bucket, offset + 28, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ + CONF_CHUNK_32(part1, bucket, offset, reason, conf_fn); \ + CONF_CHUNK_32(part2, bucket, offset + 4, reason, conf_fn); \ + CONF_CHUNK_32(part3, bucket, offset + 8, reason, conf_fn); \ + CONF_CHUNK_32(part4, bucket, offset + 12, reason, conf_fn); \ + CONF_CHUNK_32(part5, bucket, offset + 16, reason, conf_fn); \ + CONF_CHUNK_32(part6, bucket, offset + 20, reason, conf_fn); \ + CONF_CHUNK_32(part7, bucket, offset + 24, reason, conf_fn); \ + CONF_CHUNK_32(part8, bucket, offset + 28, reason, conf_fn); \ } \ } while(0) #endif @@ -270,121 +497,6 @@ m256 prep_conf_teddy_m4(const m256 *lo_mask, const m256 *dup_mask, return or256(SHIFT_OR_M4, r_msk); } -#else // not defined __AVX2__ - -#ifdef ARCH_64_BIT -#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn) \ -do { \ - if (unlikely(diff128(var, ones128()))) { \ - u64a lo = movq(var); \ - u64a hi = movq(rshiftbyte_m128(var, 8)); \ - if (unlikely(lo != ones_u64a)) { \ - lo = ~lo; \ - conf_fn(&lo, bucket, offset, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ - if (unlikely(hi != ones_u64a)) { \ - hi = ~hi; \ - conf_fn(&hi, bucket, offset + 8, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ - } \ -} while(0) -#else -#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn) \ -do { \ - if (unlikely(diff128(var, ones128()))) { \ - u32 part1 = movd(var); \ - u32 part2 = movd(rshiftbyte_m128(var, 4)); \ - u32 part3 = movd(rshiftbyte_m128(var, 8)); \ - u32 part4 = movd(rshiftbyte_m128(var, 12)); \ - if (unlikely(part1 != ones_u32)) { \ - part1 = ~part1; \ - conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ - if (unlikely(part2 != ones_u32)) { \ - part2 = ~part2; \ - conf_fn(&part2, bucket, offset + 4, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ - if (unlikely(part3 != ones_u32)) { \ - part3 = ~part3; \ - conf_fn(&part3, bucket, offset + 8, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ - if (unlikely(part4 != ones_u32)) { \ - part4 = ~part4; \ - conf_fn(&part4, bucket, offset + 12, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ - } \ -} while(0) -#endif - -static really_inline -m128 prep_conf_teddy_m1(const m128 *maskBase, m128 val) { - m128 mask = set16x8(0xf); - m128 lo = and128(val, mask); - m128 hi = and128(rshift64_m128(val, 4), mask); - return or128(pshufb_m128(maskBase[0 * 2], lo), - pshufb_m128(maskBase[0 * 2 + 1], hi)); -} - -static really_inline -m128 prep_conf_teddy_m2(const m128 *maskBase, m128 *old_1, m128 val) { - m128 mask = set16x8(0xf); - m128 lo = and128(val, mask); - m128 hi = and128(rshift64_m128(val, 4), mask); - m128 r = prep_conf_teddy_m1(maskBase, val); - - m128 res_1 = or128(pshufb_m128(maskBase[1 * 2], lo), - pshufb_m128(maskBase[1 * 2 + 1], hi)); - m128 res_shifted_1 = palignr(res_1, *old_1, 16 - 1); - *old_1 = res_1; - return or128(r, res_shifted_1); -} - -static really_inline -m128 prep_conf_teddy_m3(const m128 *maskBase, m128 *old_1, m128 *old_2, - m128 val) { - m128 mask = set16x8(0xf); - m128 lo = and128(val, mask); - m128 hi = and128(rshift64_m128(val, 4), mask); - m128 r = prep_conf_teddy_m2(maskBase, old_1, val); - - m128 res_2 = or128(pshufb_m128(maskBase[2 * 2], lo), - pshufb_m128(maskBase[2 * 2 + 1], hi)); - m128 res_shifted_2 = palignr(res_2, *old_2, 16 - 2); - *old_2 = res_2; - return or128(r, res_shifted_2); -} - -static really_inline -m128 prep_conf_teddy_m4(const m128 *maskBase, m128 *old_1, m128 *old_2, - m128 *old_3, m128 val) { - m128 mask = set16x8(0xf); - m128 lo = and128(val, mask); - m128 hi = and128(rshift64_m128(val, 4), mask); - m128 r = prep_conf_teddy_m3(maskBase, old_1, old_2, val); - - m128 res_3 = or128(pshufb_m128(maskBase[3 * 2], lo), - pshufb_m128(maskBase[3 * 2 + 1], hi)); - m128 res_shifted_3 = palignr(res_3, *old_3, 16 - 3); - *old_3 = res_3; - return or128(r, res_shifted_3); -} - -#endif // __AVX2__ - -#if defined(__AVX2__) // reinforced teddy - #define PREP_CONF_FN_NO_REINFORCEMENT(val, n) \ prep_conf_teddy_no_reinforcement_m##n(&lo_mask, dup_mask, val) @@ -415,42 +527,6 @@ m128 prep_conf_teddy_m4(const m128 *maskBase, m128 *old_1, m128 *old_2, m256 dup_mask[n * 2]; \ PREPARE_MASKS_##n -#else // not defined __AVX2__ - -#define FDR_EXEC_TEDDY_RES_OLD_1 - -#define FDR_EXEC_TEDDY_RES_OLD_2 \ - m128 res_old_1 = zeroes128(); - -#define FDR_EXEC_TEDDY_RES_OLD_3 \ - m128 res_old_1 = zeroes128(); \ - m128 res_old_2 = zeroes128(); - -#define FDR_EXEC_TEDDY_RES_OLD_4 \ - m128 res_old_1 = zeroes128(); \ - m128 res_old_2 = zeroes128(); \ - m128 res_old_3 = zeroes128(); - -#define FDR_EXEC_TEDDY_RES_OLD(n) FDR_EXEC_TEDDY_RES_OLD_##n - -#define PREP_CONF_FN_1(mask_base, val) \ - prep_conf_teddy_m1(mask_base, val) - -#define PREP_CONF_FN_2(mask_base, val) \ - prep_conf_teddy_m2(mask_base, &res_old_1, val) - -#define PREP_CONF_FN_3(mask_base, val) \ - prep_conf_teddy_m3(mask_base, &res_old_1, &res_old_2, val) - -#define PREP_CONF_FN_4(mask_base, val) \ - prep_conf_teddy_m4(mask_base, &res_old_1, &res_old_2, &res_old_3, val) - -#define PREP_CONF_FN(mask_base, val, n) \ - PREP_CONF_FN_##n(mask_base, val) -#endif // __AVX2__ - - -#if defined(__AVX2__) // reinforced teddy #define FDR_EXEC_TEDDY(fdr, a, control, n_msk, conf_fn) \ do { \ const u8 *buf_end = a->buf + a->len; \ @@ -517,7 +593,119 @@ do { \ \ return HWLM_SUCCESS; \ } while(0) -#else // not defined __AVX2__ + +#else // not defined HAVE_AVX2 + +#ifdef ARCH_64_BIT +#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn) \ +do { \ + if (unlikely(diff128(var, ones128()))) { \ + u64a lo = movq(var); \ + u64a hi = movq(rshiftbyte_m128(var, 8)); \ + CONF_CHUNK_64(lo, bucket, offset, reason, conf_fn); \ + CONF_CHUNK_64(hi, bucket, offset + 8, reason, conf_fn); \ + } \ +} while(0) +#else +#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn) \ +do { \ + if (unlikely(diff128(var, ones128()))) { \ + u32 part1 = movd(var); \ + u32 part2 = movd(rshiftbyte_m128(var, 4)); \ + u32 part3 = movd(rshiftbyte_m128(var, 8)); \ + u32 part4 = movd(rshiftbyte_m128(var, 12)); \ + CONF_CHUNK_32(part1, bucket, offset, reason, conf_fn); \ + CONF_CHUNK_32(part2, bucket, offset + 4, reason, conf_fn); \ + CONF_CHUNK_32(part3, bucket, offset + 8, reason, conf_fn); \ + CONF_CHUNK_32(part4, bucket, offset + 12, reason, conf_fn); \ + } \ +} while(0) +#endif + +static really_inline +m128 prep_conf_teddy_m1(const m128 *maskBase, m128 val) { + m128 mask = set16x8(0xf); + m128 lo = and128(val, mask); + m128 hi = and128(rshift64_m128(val, 4), mask); + return or128(pshufb_m128(maskBase[0 * 2], lo), + pshufb_m128(maskBase[0 * 2 + 1], hi)); +} + +static really_inline +m128 prep_conf_teddy_m2(const m128 *maskBase, m128 *old_1, m128 val) { + m128 mask = set16x8(0xf); + m128 lo = and128(val, mask); + m128 hi = and128(rshift64_m128(val, 4), mask); + m128 r = prep_conf_teddy_m1(maskBase, val); + + m128 res_1 = or128(pshufb_m128(maskBase[1 * 2], lo), + pshufb_m128(maskBase[1 * 2 + 1], hi)); + m128 res_shifted_1 = palignr(res_1, *old_1, 16 - 1); + *old_1 = res_1; + return or128(r, res_shifted_1); +} + +static really_inline +m128 prep_conf_teddy_m3(const m128 *maskBase, m128 *old_1, m128 *old_2, + m128 val) { + m128 mask = set16x8(0xf); + m128 lo = and128(val, mask); + m128 hi = and128(rshift64_m128(val, 4), mask); + m128 r = prep_conf_teddy_m2(maskBase, old_1, val); + + m128 res_2 = or128(pshufb_m128(maskBase[2 * 2], lo), + pshufb_m128(maskBase[2 * 2 + 1], hi)); + m128 res_shifted_2 = palignr(res_2, *old_2, 16 - 2); + *old_2 = res_2; + return or128(r, res_shifted_2); +} + +static really_inline +m128 prep_conf_teddy_m4(const m128 *maskBase, m128 *old_1, m128 *old_2, + m128 *old_3, m128 val) { + m128 mask = set16x8(0xf); + m128 lo = and128(val, mask); + m128 hi = and128(rshift64_m128(val, 4), mask); + m128 r = prep_conf_teddy_m3(maskBase, old_1, old_2, val); + + m128 res_3 = or128(pshufb_m128(maskBase[3 * 2], lo), + pshufb_m128(maskBase[3 * 2 + 1], hi)); + m128 res_shifted_3 = palignr(res_3, *old_3, 16 - 3); + *old_3 = res_3; + return or128(r, res_shifted_3); +} + +#define FDR_EXEC_TEDDY_RES_OLD_1 + +#define FDR_EXEC_TEDDY_RES_OLD_2 \ + m128 res_old_1 = zeroes128(); + +#define FDR_EXEC_TEDDY_RES_OLD_3 \ + m128 res_old_1 = zeroes128(); \ + m128 res_old_2 = zeroes128(); + +#define FDR_EXEC_TEDDY_RES_OLD_4 \ + m128 res_old_1 = zeroes128(); \ + m128 res_old_2 = zeroes128(); \ + m128 res_old_3 = zeroes128(); + +#define FDR_EXEC_TEDDY_RES_OLD(n) FDR_EXEC_TEDDY_RES_OLD_##n + +#define PREP_CONF_FN_1(mask_base, val) \ + prep_conf_teddy_m1(mask_base, val) + +#define PREP_CONF_FN_2(mask_base, val) \ + prep_conf_teddy_m2(mask_base, &res_old_1, val) + +#define PREP_CONF_FN_3(mask_base, val) \ + prep_conf_teddy_m3(mask_base, &res_old_1, &res_old_2, val) + +#define PREP_CONF_FN_4(mask_base, val) \ + prep_conf_teddy_m4(mask_base, &res_old_1, &res_old_2, &res_old_3, val) + +#define PREP_CONF_FN(mask_base, val, n) \ + PREP_CONF_FN_##n(mask_base, val) + #define FDR_EXEC_TEDDY(fdr, a, control, n_msk, conf_fn) \ do { \ const u8 *buf_end = a->buf + a->len; \ @@ -581,7 +769,8 @@ do { \ \ return HWLM_SUCCESS; \ } while(0) -#endif // __AVX2__ + +#endif // HAVE_AVX2 HAVE_AVX512 hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr, const struct FDR_Runtime_Args *a, diff --git a/src/fdr/teddy_avx2.c b/src/fdr/teddy_avx2.c index 11ea0f8e..4091963c 100644 --- a/src/fdr/teddy_avx2.c +++ b/src/fdr/teddy_avx2.c @@ -109,6 +109,31 @@ const u8 ALIGN_AVX_DIRECTIVE p_mask_arr256[33][64] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} }; +#define CONF_FAT_CHUNK_64(chunk, bucket, off, reason, conf_fn) \ +do { \ + if (unlikely(chunk != ones_u64a)) { \ + chunk = ~chunk; \ + conf_fn(&chunk, bucket, off, confBase, reason, a, ptr, \ + &control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ +} while(0) + +#define CONF_FAT_CHUNK_32(chunk, bucket, off, reason, conf_fn) \ +do { \ + if (unlikely(chunk != ones_u32a)) { \ + chunk = ~chunk; \ + conf_fn(&chunk, bucket, off, confBase, reason, a, ptr, \ + &control, &last_match); \ + CHECK_HWLM_TERMINATE_MATCHING; \ + } \ +} while(0) + +static really_inline +const m256 *getMaskBase_avx2(const struct Teddy *teddy) { + return (const m256 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy))); +} + #ifdef ARCH_64_BIT #define CONFIRM_FAT_TEDDY(var, bucket, offset, reason, conf_fn) \ do { \ @@ -120,30 +145,10 @@ do { \ r = interleave256hi(var, swap); \ u64a part3 = extractlow64from256(r); \ u64a part4 = extract64from256(r, 1); \ - if (unlikely(part1 != ones_u64a)) { \ - part1 = ~part1; \ - conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ - if (unlikely(part2 != ones_u64a)) { \ - part2 = ~part2; \ - conf_fn(&part2, bucket, offset + 4, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ - if (unlikely(part3 != ones_u64a)) { \ - part3 = ~part3; \ - conf_fn(&part3, bucket, offset + 8, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ - if (unlikely(part4 != ones_u64a)) { \ - part4 = ~part4; \ - conf_fn(&part4, bucket, offset + 12, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ + CONF_FAT_CHUNK_64(part1, bucket, offset, reason, conf_fn); \ + CONF_FAT_CHUNK_64(part2, bucket, offset + 4, reason, conf_fn); \ + CONF_FAT_CHUNK_64(part3, bucket, offset + 8, reason, conf_fn); \ + CONF_FAT_CHUNK_64(part4, bucket, offset + 12, reason, conf_fn); \ } \ } while(0) #else @@ -161,53 +166,14 @@ do { \ u32 part6 = extract32from256(r, 1); \ u32 part7 = extract32from256(r, 2); \ u32 part8 = extract32from256(r, 3); \ - if (unlikely(part1 != ones_u32)) { \ - part1 = ~part1; \ - conf_fn(&part1, bucket, offset, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ - if (unlikely(part2 != ones_u32)) { \ - part2 = ~part2; \ - conf_fn(&part2, bucket, offset + 2, confBase, reason, a, ptr, \ - &control, &last_match); \ - } \ - if (unlikely(part3 != ones_u32)) { \ - part3 = ~part3; \ - conf_fn(&part3, bucket, offset + 4, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ - if (unlikely(part4 != ones_u32)) { \ - part4 = ~part4; \ - conf_fn(&part4, bucket, offset + 6, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ - if (unlikely(part5 != ones_u32)) { \ - part5 = ~part5; \ - conf_fn(&part5, bucket, offset + 8, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ - if (unlikely(part6 != ones_u32)) { \ - part6 = ~part6; \ - conf_fn(&part6, bucket, offset + 10, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ - if (unlikely(part7 != ones_u32)) { \ - part7 = ~part7; \ - conf_fn(&part7, bucket, offset + 12, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ - if (unlikely(part8 != ones_u32)) { \ - part8 = ~part8; \ - conf_fn(&part8, bucket, offset + 14, confBase, reason, a, ptr, \ - &control, &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ + CONF_FAT_CHUNK_32(part1, bucket, offset, reason, conf_fn); \ + CONF_FAT_CHUNK_32(part2, bucket, offset + 2, reason, conf_fn); \ + CONF_FAT_CHUNK_32(part3, bucket, offset + 4, reason, conf_fn); \ + CONF_FAT_CHUNK_32(part4, bucket, offset + 6, reason, conf_fn); \ + CONF_FAT_CHUNK_32(part5, bucket, offset + 8, reason, conf_fn); \ + CONF_FAT_CHUNK_32(part6, bucket, offset + 10, reason, conf_fn); \ + CONF_FAT_CHUNK_32(part7, bucket, offset + 12, reason, conf_fn); \ + CONF_FAT_CHUNK_32(part8, bucket, offset + 14, reason, conf_fn); \ } \ } while(0) #endif @@ -277,11 +243,6 @@ m256 prep_conf_fat_teddy_m4(const m256 *maskBase, m256 *old_1, m256 *old_2, return or256(r, res_shifted_3); } -static really_inline -const m256 *getMaskBase_avx2(const struct Teddy *teddy) { - return (const m256 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy))); -} - #define FDR_EXEC_FAT_TEDDY_RES_OLD_1 \ do { \ } while(0) diff --git a/src/fdr/teddy_runtime_common.h b/src/fdr/teddy_runtime_common.h index c1333964..6b809cce 100644 --- a/src/fdr/teddy_runtime_common.h +++ b/src/fdr/teddy_runtime_common.h @@ -41,7 +41,7 @@ #include "util/uniform_ops.h" extern const u8 ALIGN_DIRECTIVE p_mask_arr[17][32]; -#if defined(__AVX2__) +#if defined(HAVE_AVX2) extern const u8 ALIGN_DIRECTIVE p_mask_arr256[33][64]; #endif @@ -123,7 +123,7 @@ void copyRuntBlock128(u8 *dst, const u8 *src, size_t len) { // for start zone, see below // lo ptr hi hi // |----------|-------|----------------|............| -// start 0 start+offset end(<=16) +// -start 0 -start+offset MIN(avail,16) // p_mask ffff..ff0000...........00ffff.......... // ptr < lo: // only start zone. @@ -182,7 +182,7 @@ m128 vectoredLoad128(m128 *p_mask, const u8 *ptr, const size_t start_offset, return u.val128; } -#if defined(__AVX2__) +#if defined(HAVE_AVX2) /* * \brief Copy a block of [0,31] bytes efficiently. * @@ -251,7 +251,7 @@ void copyRuntBlock256(u8 *dst, const u8 *src, size_t len) { // for start zone, see below // lo ptr hi hi // |----------|-------|----------------|............| -// start 0 start+offset end(<=32) +// -start 0 -start+offset MIN(avail,32) // p_mask ffff..ff0000...........00ffff.......... // ptr < lo: // only start zone. @@ -309,7 +309,77 @@ m256 vectoredLoad256(m256 *p_mask, const u8 *ptr, const size_t start_offset, return u.val256; } -#endif // __AVX2__ +#endif // HAVE_AVX2 + +#if defined(HAVE_AVX512) +// Note: p_mask is an output param that initialises a poison mask. +// u64a k = ones_u64a << n' >> m'; // m' < n' +// *p_mask = set_mask_m512(~k); +// means p_mask is consist of: +// (n' - m') poison bytes "0xff" at the beginning, +// followed by (64 - n') valid bytes "0x00", +// then followed by the rest m' poison bytes "0xff". +// ptr >= lo: +// no history. +// for end/short zone, ptr==lo and start_offset==0 +// for start zone, see below +// lo ptr hi hi +// |----------|-------|----------------|............| +// -start 0 -start+offset MIN(avail,64) +// p_mask ffff..ff0000...........00ffff.......... +// ptr < lo: +// only start zone. +// history +// ptr lo hi hi +// |----------|-------|----------------|............| +// 0 start start+offset end(<=64) +// p_mask ffff.....ffffff..ff0000...........00ffff.......... +static really_inline +m512 vectoredLoad512(m512 *p_mask, const u8 *ptr, const size_t start_offset, + const u8 *lo, const u8 *hi, const u8 *hbuf, size_t hlen, + const u32 nMasks) { + m512 val; + + uintptr_t copy_start; + uintptr_t copy_len; + + if (ptr >= lo) { // short/end/start zone + uintptr_t start = (uintptr_t)(ptr - lo); + uintptr_t avail = (uintptr_t)(hi - ptr); + if (avail >= 64) { + assert(start_offset - start <= 64); + u64a k = ones_u64a << (start_offset - start); + *p_mask = set_mask_m512(~k); + return loadu512(ptr); + } + assert(start_offset - start <= avail); + u64a k = ones_u64a << (64 - avail + start_offset - start) + >> (64 - avail); + *p_mask = set_mask_m512(~k); + copy_start = 0; + copy_len = avail; + } else { //start zone + uintptr_t need = MIN((uintptr_t)(lo - ptr), + MIN(hlen, nMasks - 1)); + uintptr_t start = (uintptr_t)(lo - ptr); + u64a j = 0x7fffffffffffffffULL >> (63 - need) << (start - need); + val = loadu_maskz_m512(j, &hbuf[hlen - start]); + uintptr_t end = MIN(64, (uintptr_t)(hi - ptr)); + assert(start + start_offset <= end); + u64a k = ones_u64a << (64 - end + start + start_offset) >> (64 - end); + *p_mask = set_mask_m512(~k); + copy_start = start; + copy_len = end - start; + } + + assert(copy_len < 64); + assert(copy_len > 0); + u64a j = ones_u64a >> (64 - copy_len) << copy_start; + val = loadu_mask_m512(val, j, ptr); + + return val; +} +#endif // HAVE_AVX512 static really_inline u64a getConfVal(const struct FDR_Runtime_Args *a, const u8 *ptr, u32 byte, diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index 047cdbab..8c469d16 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -755,6 +755,10 @@ m256 combine2x128(m128 hi, m128 lo) { } #endif //AVX2 +#if defined(HAVE_AVX512) +#define extract128from512(a, imm) _mm512_extracti32x4_epi32(a, imm) +#endif + /**** **** 384-bit Primitives ****/ @@ -969,6 +973,13 @@ m512 set8x64(u64a a) { return _mm512_set1_epi64(a); } +static really_inline +m512 set512_64(u64a hi_3, u64a hi_2, u64a hi_1, u64a hi_0, + u64a lo_3, u64a lo_2, u64a lo_1, u64a lo_0) { + return _mm512_set_epi64(hi_3, hi_2, hi_1, hi_0, + lo_3, lo_2, lo_1, lo_0); +} + static really_inline m512 set4x128(m128 a) { return _mm512_broadcast_i32x4(a); @@ -1059,6 +1070,7 @@ m512 lshift64_m512(m512 a, unsigned b) { #if defined(HAVE_AVX512) #define rshift64_m512(a, b) _mm512_srli_epi64((a), (b)) #define rshift128_m512(a, count_immed) _mm512_bsrli_epi128(a, count_immed) +#define lshift128_m512(a, count_immed) _mm512_bslli_epi128(a, count_immed) #endif #if !defined(_MM_CMPINT_NE) @@ -1169,6 +1181,11 @@ static really_inline m512 loadu_mask_m512(m512 src, __mmask64 k, const void *ptr) { return _mm512_mask_loadu_epi8(src, k, ptr); } + +static really_inline +m512 set_mask_m512(__mmask64 k) { + return _mm512_movm_epi8(k); +} #endif // packed unaligned store of first N bytes From d2b5523dd88f2dc43c220271f50754c789e2de25 Mon Sep 17 00:00:00 2001 From: "Chang, Harry" Date: Mon, 24 Jul 2017 11:05:46 +0800 Subject: [PATCH 096/190] fix typo "ones_u32a" => "ones_u32" --- src/fdr/teddy.c | 2 +- src/fdr/teddy_avx2.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fdr/teddy.c b/src/fdr/teddy.c index f28d0a50..db68749a 100644 --- a/src/fdr/teddy.c +++ b/src/fdr/teddy.c @@ -86,7 +86,7 @@ do { \ #define CONF_CHUNK_32(chunk, bucket, off, reason, conf_fn) \ do { \ - if (unlikely(chunk != ones_u32a)) { \ + if (unlikely(chunk != ones_u32)) { \ chunk = ~chunk; \ conf_fn(&chunk, bucket, off, confBase, reason, a, ptr, \ &control, &last_match); \ diff --git a/src/fdr/teddy_avx2.c b/src/fdr/teddy_avx2.c index 4091963c..1d037028 100644 --- a/src/fdr/teddy_avx2.c +++ b/src/fdr/teddy_avx2.c @@ -121,7 +121,7 @@ do { \ #define CONF_FAT_CHUNK_32(chunk, bucket, off, reason, conf_fn) \ do { \ - if (unlikely(chunk != ones_u32a)) { \ + if (unlikely(chunk != ones_u32)) { \ chunk = ~chunk; \ conf_fn(&chunk, bucket, off, confBase, reason, a, ptr, \ &control, &last_match); \ From 86c5f7feb1efede55a8f93cc35d852063049688e Mon Sep 17 00:00:00 2001 From: "Wang, Xiang W" Date: Thu, 22 Jun 2017 04:50:45 -0400 Subject: [PATCH 097/190] FDR: Squash buckets of included literals in FDR confirm - Change the compile of literal matchers to two passes. - Reverse the bucket assignment in FDR, bucket with longer literals has smaller bucket id. - Squash the buckets of included literals and jump to the the program of included literals directly from parent literal program without going through FDR confirm for included iterals. --- src/fdr/fdr.c | 2 +- src/fdr/fdr_compile.cpp | 291 ++++++++++++++++++++++++--- src/fdr/fdr_compile.h | 22 +- src/fdr/fdr_compile_internal.h | 9 +- src/fdr/fdr_confirm_compile.cpp | 6 +- src/fdr/fdr_confirm_runtime.h | 10 +- src/fdr/teddy_compile.cpp | 94 +++++---- src/fdr/teddy_compile.h | 10 +- src/fdr/teddy_runtime_common.h | 3 +- src/hwlm/hwlm_build.cpp | 111 +++++++--- src/hwlm/hwlm_build.h | 62 +++++- src/hwlm/hwlm_literal.h | 17 ++ src/rose/program_runtime.h | 17 ++ src/rose/rose_build_bytecode.cpp | 216 +++++++++++++++++--- src/rose/rose_build_dump.cpp | 6 + src/rose/rose_build_instructions.cpp | 8 + src/rose/rose_build_instructions.h | 28 +++ src/rose/rose_build_matchers.cpp | 151 +++++++------- src/rose/rose_build_matchers.h | 85 ++++++-- src/rose/rose_build_program.cpp | 8 + src/rose/rose_build_program.h | 1 + src/rose/rose_program.h | 13 +- src/scratch.c | 1 + src/scratch.h | 3 + unit/internal/fdr.cpp | 79 ++++++-- unit/internal/fdr_flood.cpp | 26 ++- 26 files changed, 1017 insertions(+), 262 deletions(-) diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c index 8d072ea2..f7da6981 100644 --- a/src/fdr/fdr.c +++ b/src/fdr/fdr.c @@ -359,7 +359,7 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control, } u64a confVal = unaligned_load_u64a(confLoc + byte - sizeof(u64a) + 1); confWithBit(fdrc, a, ptr_main - a->buf + byte, control, - last_match_id, confVal); + last_match_id, confVal, conf, bit); } while (unlikely(!!*conf)); } diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index 6f2de3d9..181f9512 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -42,10 +42,13 @@ #include "ue2common.h" #include "hwlm/hwlm_build.h" #include "util/compare.h" +#include "util/container.h" #include "util/dump_mask.h" +#include "util/make_unique.h" #include "util/math.h" #include "util/noncopyable.h" #include "util/target_info.h" +#include "util/ue2_containers.h" #include "util/ue2string.h" #include "util/verify_types.h" @@ -81,7 +84,6 @@ private: bool make_small; u8 *tabIndexToMask(u32 indexInTable); - void assignStringsToBuckets(); #ifdef DEBUG void dumpMasks(const u8 *defaultMask); #endif @@ -90,10 +92,13 @@ private: void createInitialState(FDR *fdr); public: - FDRCompiler(vector lits_in, const FDREngineDescription &eng_in, + FDRCompiler(vector lits_in, + map> bucketToLits_in, + const FDREngineDescription &eng_in, bool make_small_in, const Grey &grey_in) : eng(eng_in), grey(grey_in), tab(eng_in.getTabSizeBytes()), - lits(move(lits_in)), make_small(make_small_in) {} + lits(move(lits_in)), bucketToLits(move(bucketToLits_in)), + make_small(make_small_in) {} bytecode_ptr build(); }; @@ -309,7 +314,10 @@ next_literal: return chunks; } -void FDRCompiler::assignStringsToBuckets() { +static +map> assignStringsToBuckets( + vector &lits, + const FDREngineDescription &eng) { const double MAX_SCORE = numeric_limits::max(); assert(!lits.empty()); // Shouldn't be called with no literals. @@ -393,6 +401,7 @@ void FDRCompiler::assignStringsToBuckets() { // our best score is in t[0][N_BUCKETS-1] and we can follow the links // to find where our buckets should start and what goes into them + vector> buckets; for (u32 i = 0, n = numBuckets; n && (i != numChunks - 1); n--) { u32 j = t[i][n - 1].second; if (j == 0) { @@ -403,21 +412,33 @@ void FDRCompiler::assignStringsToBuckets() { u32 first_id = chunks[i].first_id; u32 last_id = chunks[j].first_id; assert(first_id < last_id); - u32 bucket = numBuckets - n; UNUSED const auto &first_lit = lits[first_id]; UNUSED const auto &last_lit = lits[last_id - 1]; - DEBUG_PRINTF("placing [%u-%u) in bucket %u (%u lits, len %zu-%zu, " + DEBUG_PRINTF("placing [%u-%u) in one bucket (%u lits, len %zu-%zu, " "score %0.4f)\n", - first_id, last_id, bucket, last_id - first_id, + first_id, last_id, last_id - first_id, first_lit.s.length(), last_lit.s.length(), getScoreUtil(first_lit.s.length(), last_id - first_id)); - auto &bucket_lits = bucketToLits[bucket]; - for (u32 k = first_id; k < last_id; k++) { - bucket_lits.push_back(k); + vector litIds; + u32 cnt = last_id - first_id; + // long literals first for included literals checking + for (u32 k = 0; k < cnt; k++) { + litIds.push_back(last_id - k - 1); } + i = j; + buckets.push_back(litIds); } + + // reverse bucket id, longer literals come first + map> bucketToLits; + size_t bucketCnt = buckets.size(); + for (size_t i = 0; i < bucketCnt; i++) { + bucketToLits.emplace(bucketCnt - i - 1, move(buckets[i])); + } + + return bucketToLits; } #ifdef DEBUG @@ -541,24 +562,216 @@ void FDRCompiler::setupTab() { } bytecode_ptr FDRCompiler::build() { - assignStringsToBuckets(); setupTab(); return setupFDR(); } +static +bool isSuffix(const hwlmLiteral &lit1, const hwlmLiteral &lit2) { + auto s1 = lit1.s; + auto s2 = lit2.s; + if (lit1.nocase || lit2.nocase) { + upperString(s1); + upperString(s2); + } + size_t len1 = s1.length(); + size_t len2 = s2.length(); + assert(len1 >= len2); + return equal(s2.begin(), s2.end(), s1.begin() + len1 - len2); +} + +/* + * if lit2 is a suffix of lit1 but the case sensitivity, groups or mask info + * of lit2 is a subset of lit1, then lit1 can't squash lit2 and lit2 can + * possibly match when lit1 matches. In this case, we can't do bucket + * squashing. e.g. AAA(no case) in bucket 0, AA(no case) and aa in bucket 1, + * we can't squash bucket 1 if we have input like "aaa" as aa can also match. + */ +static +bool includedCheck(const hwlmLiteral &lit1, const hwlmLiteral &lit2) { + /* lit1 is caseless and lit2 is case sensitive */ + if ((lit1.nocase && !lit2.nocase)) { + return true; + } + + /* lit2's group is a subset of lit1 */ + if (lit1.groups != lit2.groups && + (lit2.groups == (lit1.groups & lit2.groups))) { + return true; + } + + /* TODO: narrow down cases for mask check */ + if (lit1.cmp != lit2.cmp || lit1.msk != lit2.msk) { + return true; + } + + return false; +} + +/* + * if lit2 is an included literal of both lit1 and lit0, and lit1 is an + * exceptional literal of lit0 - lit1 sometimes matches when lit0 matches, + * then we give up squashing for lit1. e.g. lit0:AAA(no case), lit1:aa, + * lit2:A(no case). We can have duplicate matches for input "aaa" if lit0 + * and lit1 both squash lit2. + */ +static +bool checkParentLit( + u32 pos1, const unordered_set &parent_map, + const unordered_map> &exception_map) { + for (const auto pos2 : parent_map) { + if (contains(exception_map, pos2)) { + const auto &exception_pos = exception_map.at(pos2); + if (contains(exception_pos, pos1)) { + return false; + } + } + } + + return true; +} + +static +void buildSquashMask(vector &lits, u32 id1, u32 bucket1, + size_t start, const vector> &group, + unordered_map> &parent_map, + unordered_map> &exception_map) { + auto &lit1 = lits[id1]; + DEBUG_PRINTF("b:%u len:%zu\n", bucket1, lit1.s.length()); + + size_t cnt = group.size(); + bool included = false; + bool exception = false; + u32 child_id = ~0U; + for (size_t i = start; i < cnt; i++) { + u32 bucket2 = group[i].first; + assert(bucket2 >= bucket1); + + u32 id2 = group[i].second; + auto &lit2 = lits[id2]; + // check if lit2 is a suffix of lit1 + if (isSuffix(lit1, lit2)) { + /* if we have a included literal in the same bucket, + * quit and let the included literal to do possible squashing + */ + if (bucket1 == bucket2) { + DEBUG_PRINTF("same bucket\n"); + return; + } + /* + * if lit2 is a suffix but doesn't pass included checks for + * extra info, we give up sqaushing + */ + if (includedCheck(lit1, lit2)) { + DEBUG_PRINTF("find exceptional suffix %u\n", lit2.id); + exception_map[id1].insert(id2); + exception = true; + } else if (checkParentLit(id1, parent_map[id2], exception_map)) { + if (lit1.included_id == INVALID_LIT_ID) { + DEBUG_PRINTF("find suffix lit1 %u lit2 %u\n", + lit1.id, lit2.id); + lit1.included_id = lit2.id; + } else { + /* + * if we have multiple included literals in one bucket, + * give up squashing. + */ + DEBUG_PRINTF("multiple included literals\n"); + lit1.included_id = INVALID_LIT_ID; + return; + } + child_id = id2; + included = true; + } + } + + size_t next = i + 1; + u32 nextBucket = next < cnt ? group[next].first : ~0U; + if (bucket2 != nextBucket) { + if (included) { + if (exception) { + /* + * give up if we have exception literals + * in the same bucket as the included literal + */ + lit1.included_id = INVALID_LIT_ID; + } else { + parent_map[child_id].insert(id1); + + lit1.squash |= 1U << bucket2; + DEBUG_PRINTF("build squash mask %2x for %u\n", + lit1.squash, lit1.id); + } + return; + } + exception = false; + } + } +} + +static constexpr u32 INCLUDED_LIMIT = 1000; + +static +void findIncludedLits(vector &lits, + const vector>> &lastCharMap) { + /** Map for finding the positions of literal which includes a literal + * in FDR hwlm literal vector. + */ + unordered_map> parent_map; + + /** Map for finding the positions of exception literals which could + * sometimes match if a literal matches in FDR hwlm literal vector. + */ + unordered_map> exception_map; + for (const auto &group : lastCharMap) { + size_t cnt = group.size(); + if (cnt > INCLUDED_LIMIT) { + continue; + } + for (size_t i = 0; i < cnt; i++) { + u32 bucket1 = group[i].first; + u32 id1 = group[i].second; + buildSquashMask(lits, id1, bucket1, i + 1, group, parent_map, + exception_map); + } + } +} + +static +void addIncludedInfo( + vector &lits, u32 nBuckets, + map> &bucketToLits) { + vector>> lastCharMap(256); + + for (BucketIndex b = 0; b < nBuckets; b++) { + if (!bucketToLits[b].empty()) { + for (const LiteralIndex &lit_idx : bucketToLits[b]) { + const auto &lit = lits[lit_idx]; + u8 c = mytoupper(lit.s.back()); + lastCharMap[c].emplace_back(b, lit_idx); + } + } + } + + findIncludedLits(lits, lastCharMap); +} + } // namespace static -bytecode_ptr fdrBuildTableInternal(const vector &lits, - bool make_small, const target_t &target, - const Grey &grey, u32 hint) { +unique_ptr fdrBuildProtoInternal(u8 engType, + vector &lits, + bool make_small, + const target_t &target, + const Grey &grey, u32 hint) { DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2"); if (grey.fdrAllowTeddy) { - auto fdr = teddyBuildTableHinted(lits, make_small, hint, target, grey); - if (fdr) { + auto proto = teddyBuildProtoHinted(engType, lits, make_small, hint, + target); + if (proto) { DEBUG_PRINTF("build with teddy succeeded\n"); - return fdr; + return proto; } else { DEBUG_PRINTF("build with teddy failed, will try with FDR\n"); } @@ -576,23 +789,47 @@ bytecode_ptr fdrBuildTableInternal(const vector &lits, des->stride = 1; } - FDRCompiler fc(lits, *des, make_small, grey); + auto bucketToLits = assignStringsToBuckets(lits, *des); + addIncludedInfo(lits, des->getNumBuckets(), bucketToLits); + auto proto = + ue2::make_unique(engType, move(des), lits, bucketToLits, + make_small); + return proto; +} + +unique_ptr fdrBuildProto(u8 engType, vector lits, + bool make_small, const target_t &target, + const Grey &grey) { + return fdrBuildProtoInternal(engType, lits, make_small, target, grey, + HINT_INVALID); +} + +static +bytecode_ptr fdrBuildTableInternal(const HWLMProto &proto, + const Grey &grey) { + + if (proto.teddyEng) { + return teddyBuildTable(proto, grey); + } + + FDRCompiler fc(proto.lits, proto.bucketToLits, *(proto.fdrEng), + proto.make_small, grey); return fc.build(); } -bytecode_ptr fdrBuildTable(const vector &lits, - bool make_small, const target_t &target, - const Grey &grey) { - return fdrBuildTableInternal(lits, make_small, target, grey, HINT_INVALID); +bytecode_ptr fdrBuildTable(const HWLMProto &proto, const Grey &grey) { + return fdrBuildTableInternal(proto, grey); } #if !defined(RELEASE_BUILD) -bytecode_ptr fdrBuildTableHinted(const vector &lits, - bool make_small, u32 hint, - const target_t &target, - const Grey &grey) { - return fdrBuildTableInternal(lits, make_small, target, grey, hint); +unique_ptr fdrBuildProtoHinted(u8 engType, + vector lits, + bool make_small, u32 hint, + const target_t &target, + const Grey &grey) { + return fdrBuildProtoInternal(engType, lits, make_small, target, grey, + hint); } #endif diff --git a/src/fdr/fdr_compile.h b/src/fdr/fdr_compile.h index 58047600..f0ce4925 100644 --- a/src/fdr/fdr_compile.h +++ b/src/fdr/fdr_compile.h @@ -34,6 +34,7 @@ #define FDR_COMPILE_H #include "ue2common.h" +#include "hwlm/hwlm_build.h" #include "util/bytecode_ptr.h" #include @@ -46,18 +47,23 @@ struct hwlmLiteral; struct Grey; struct target_t; -bytecode_ptr fdrBuildTable(const std::vector &lits, - bool make_small, const target_t &target, - const Grey &grey); +bytecode_ptr fdrBuildTable(const HWLMProto &proto, const Grey &grey); #if !defined(RELEASE_BUILD) - -bytecode_ptr fdrBuildTableHinted(const std::vector &lits, - bool make_small, u32 hint, - const target_t &target, const Grey &grey); - +std::unique_ptr fdrBuildProtoHinted( + u8 engType, + std::vector lits, + bool make_small, u32 hint, + const target_t &target, + const Grey &grey); #endif +std::unique_ptr fdrBuildProto( + u8 engType, + std::vector lits, + bool make_small, const target_t &target, + const Grey &grey); + /** \brief Returns size in bytes of the given FDR engine. */ size_t fdrSize(const struct FDR *fdr); diff --git a/src/fdr/fdr_compile_internal.h b/src/fdr/fdr_compile_internal.h index 756fe8e7..3879960a 100644 --- a/src/fdr/fdr_compile_internal.h +++ b/src/fdr/fdr_compile_internal.h @@ -57,10 +57,11 @@ class FDREngineDescription; struct hwlmStreamingControl; struct Grey; -bytecode_ptr setupFullConfs(const std::vector &lits, - const EngineDescription &eng, - std::map> &bucketToLits, - bool make_small); +bytecode_ptr setupFullConfs( + const std::vector &lits, + const EngineDescription &eng, + const std::map> &bucketToLits, + bool make_small); // all suffixes include an implicit max_bucket_width suffix to ensure that // we always read a full-scale flood "behind" us in terms of what's in our diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index a6eee4cf..c75f8d17 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -292,7 +292,7 @@ bytecode_ptr getFDRConfirm(const vector &lits, bytecode_ptr setupFullConfs(const vector &lits, const EngineDescription &eng, - map> &bucketToLits, + const map> &bucketToLits, bool make_small) { unique_ptr teddyDescr = getTeddyDescription(eng.getID()); @@ -300,9 +300,9 @@ setupFullConfs(const vector &lits, BC2CONF bc2Conf; u32 totalConfirmSize = 0; for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) { - if (!bucketToLits[b].empty()) { + if (contains(bucketToLits, b)) { vector vl; - for (const LiteralIndex &lit_idx : bucketToLits[b]) { + for (const LiteralIndex &lit_idx : bucketToLits.at(b)) { vl.push_back(lits[lit_idx]); } diff --git a/src/fdr/fdr_confirm_runtime.h b/src/fdr/fdr_confirm_runtime.h index 86a3bfa4..067e50e2 100644 --- a/src/fdr/fdr_confirm_runtime.h +++ b/src/fdr/fdr_confirm_runtime.h @@ -29,6 +29,7 @@ #ifndef FDR_CONFIRM_RUNTIME_H #define FDR_CONFIRM_RUNTIME_H +#include "scratch.h" #include "fdr_internal.h" #include "fdr_loadval.h" #include "hwlm/hwlm.h" @@ -41,7 +42,7 @@ static really_inline void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a, size_t i, hwlmcb_rv_t *control, u32 *last_match, - u64a conf_key) { + u64a conf_key, u64a *conf, u8 bit) { assert(i < a->len); assert(i >= a->start_offset); assert(ISALIGNED(fdrc)); @@ -57,6 +58,10 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a const struct LitInfo *li = (const struct LitInfo *)((const u8 *)fdrc + start); + struct hs_scratch *scratch = a->scratch; + assert(!scratch->fdr_conf); + scratch->fdr_conf = conf; + scratch->fdr_conf_offset = bit; u8 oldNext; // initialized in loop do { assert(ISALIGNED(li)); @@ -88,11 +93,12 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a } *last_match = li->id; - *control = a->cb(i, li->id, a->scratch); + *control = a->cb(i, li->id, scratch); out: oldNext = li->next; // oldNext is either 0 or an 'adjust' value li++; } while (oldNext); + scratch->fdr_conf = NULL; } #endif diff --git a/src/fdr/teddy_compile.cpp b/src/fdr/teddy_compile.cpp index a35e5900..bb02f759 100644 --- a/src/fdr/teddy_compile.cpp +++ b/src/fdr/teddy_compile.cpp @@ -42,9 +42,11 @@ #include "teddy_engine_description.h" #include "grey.h" #include "ue2common.h" +#include "hwlm/hwlm_build.h" #include "util/alloc.h" #include "util/compare.h" #include "util/container.h" +#include "util/make_unique.h" #include "util/noncopyable.h" #include "util/popcount.h" #include "util/target_info.h" @@ -77,17 +79,18 @@ class TeddyCompiler : noncopyable { const TeddyEngineDescription ŋ const Grey &grey; const vector &lits; + map> bucketToLits; bool make_small; public: TeddyCompiler(const vector &lits_in, + map> bucketToLits_in, const TeddyEngineDescription &eng_in, bool make_small_in, const Grey &grey_in) - : eng(eng_in), grey(grey_in), lits(lits_in), make_small(make_small_in) { - } + : eng(eng_in), grey(grey_in), lits(lits_in), + bucketToLits(move(bucketToLits_in)), make_small(make_small_in) {} bytecode_ptr build(); - bool pack(map> &bucketToLits); }; class TeddySet { @@ -216,8 +219,10 @@ public: } }; -bool TeddyCompiler::pack(map> &bucketToLits) { +static +bool pack(const vector &lits, + const TeddyEngineDescription &eng, + map> &bucketToLits) { set sts; for (u32 i = 0; i < lits.size(); i++) { @@ -473,30 +478,6 @@ void fillReinforcedTable(const map TeddyCompiler::build() { - assert(eng.numMasks <= MAX_NUM_MASKS); - - if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) { - DEBUG_PRINTF("too many literals: %zu\n", lits.size()); - return nullptr; - } - -#ifdef TEDDY_DEBUG - for (size_t i = 0; i < lits.size(); i++) { - printf("lit %zu (len = %zu, %s) is ", i, lits[i].s.size(), - lits[i].nocase ? "caseless" : "caseful"); - for (size_t j = 0; j < lits[i].s.size(); j++) { - printf("%02x", ((u32)lits[i].s[j])&0xff); - } - printf("\n"); - } -#endif - - map> bucketToLits; - if (!pack(bucketToLits)) { - DEBUG_PRINTF("more lits (%zu) than buckets (%u), can't pack.\n", - lits.size(), eng.getNumBuckets()); - return nullptr; - } u32 maskWidth = eng.getNumBuckets() / 8; size_t headerSize = sizeof(Teddy); @@ -565,12 +546,49 @@ bytecode_ptr TeddyCompiler::build() { return fdr; } + +static +bool assignStringsToBuckets( + const vector &lits, + TeddyEngineDescription &eng, + map> &bucketToLits) { + assert(eng.numMasks <= MAX_NUM_MASKS); + if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) { + DEBUG_PRINTF("too many literals: %zu\n", lits.size()); + return false; + } + +#ifdef TEDDY_DEBUG + for (size_t i = 0; i < lits.size(); i++) { + printf("lit %zu (len = %zu, %s) is ", i, lits[i].s.size(), + lits[i].nocase ? "caseless" : "caseful"); + for (size_t j = 0; j < lits[i].s.size(); j++) { + printf("%02x", ((u32)lits[i].s[j])&0xff); + } + printf("\n"); + } +#endif + + if (!pack(lits, eng, bucketToLits)) { + DEBUG_PRINTF("more lits (%zu) than buckets (%u), can't pack.\n", + lits.size(), eng.getNumBuckets()); + return false; + } + return true; +} + } // namespace -bytecode_ptr teddyBuildTableHinted(const vector &lits, - bool make_small, u32 hint, - const target_t &target, - const Grey &grey) { +bytecode_ptr teddyBuildTable(const HWLMProto &proto, const Grey &grey) { + TeddyCompiler tc(proto.lits, proto.bucketToLits, *(proto.teddyEng), + proto.make_small, grey); + return tc.build(); +} + + +unique_ptr teddyBuildProtoHinted( + u8 engType, const vector &lits, + bool make_small, u32 hint, const target_t &target) { unique_ptr des; if (hint == HINT_INVALID) { des = chooseTeddyEngine(target, lits); @@ -580,8 +598,14 @@ bytecode_ptr teddyBuildTableHinted(const vector &lits, if (!des) { return nullptr; } - TeddyCompiler tc(lits, *des, make_small, grey); - return tc.build(); + + map> bucketToLits; + if (!assignStringsToBuckets(lits, *des, bucketToLits)) { + return nullptr; + } + + return ue2::make_unique(engType, move(des), lits, + bucketToLits, make_small); } } // namespace ue2 diff --git a/src/fdr/teddy_compile.h b/src/fdr/teddy_compile.h index 5ff4d839..ec251310 100644 --- a/src/fdr/teddy_compile.h +++ b/src/fdr/teddy_compile.h @@ -35,6 +35,7 @@ #define TEDDY_COMPILE_H #include "ue2common.h" +#include "hwlm/hwlm_build.h" #include "util/bytecode_ptr.h" #include @@ -46,12 +47,13 @@ namespace ue2 { struct Grey; struct hwlmLiteral; struct target_t; +struct TeddyEngineDescription; -bytecode_ptr teddyBuildTableHinted(const std::vector &lits, - bool make_small, u32 hint, - const target_t &target, - const Grey &grey); +bytecode_ptr teddyBuildTable(const HWLMProto &proto, const Grey &grey); +std::unique_ptr teddyBuildProtoHinted( + u8 engType, const std::vector &lits, + bool make_small, u32 hint, const target_t &target); } // namespace ue2 #endif // TEDDY_COMPILE_H diff --git a/src/fdr/teddy_runtime_common.h b/src/fdr/teddy_runtime_common.h index 6b809cce..5332423e 100644 --- a/src/fdr/teddy_runtime_common.h +++ b/src/fdr/teddy_runtime_common.h @@ -419,9 +419,10 @@ void do_confWithBit_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset, if (!(fdrc->groups & *control)) { continue; } + u64a tmp = 0; u64a confVal = getConfVal(a, ptr, byte, reason); confWithBit(fdrc, a, ptr - a->buf + byte, control, - last_match, confVal); + last_match, confVal, &tmp, 0); } while (unlikely(*conf)); } diff --git a/src/hwlm/hwlm_build.cpp b/src/hwlm/hwlm_build.cpp index c2db5480..1b332815 100644 --- a/src/hwlm/hwlm_build.cpp +++ b/src/hwlm/hwlm_build.cpp @@ -41,8 +41,12 @@ #include "scratch.h" #include "ue2common.h" #include "fdr/fdr_compile.h" +#include "fdr/fdr_compile_internal.h" +#include "fdr/fdr_engine_description.h" +#include "fdr/teddy_engine_description.h" #include "util/compile_context.h" #include "util/compile_error.h" +#include "util/make_unique.h" #include "util/ue2string.h" #include @@ -53,6 +57,28 @@ using namespace std; namespace ue2 { +HWLMProto::HWLMProto(u8 engType_in, vector lits_in) + : engType(engType_in), lits(move(lits_in)) {} + +HWLMProto::HWLMProto(u8 engType_in, + unique_ptr eng_in, + vector lits_in, + map> bucketToLits_in, + bool make_small_in) + : engType(engType_in), fdrEng(move(eng_in)), lits(move(lits_in)), + bucketToLits(move(bucketToLits_in)), make_small(make_small_in) {} + +HWLMProto::HWLMProto(u8 engType_in, + unique_ptr eng_in, + vector lits_in, + map> bucketToLits_in, + bool make_small_in) + : engType(engType_in), teddyEng(move(eng_in)), + lits(move(lits_in)), + bucketToLits(move(bucketToLits_in)), make_small(make_small_in) {} + +HWLMProto::~HWLMProto() {} + static void dumpLits(UNUSED const vector &lits) { #ifdef DEBUG @@ -92,9 +118,52 @@ bool isNoodleable(const vector &lits, return true; } -bytecode_ptr hwlmBuild(const vector &lits, bool make_small, - const CompileContext &cc, +bytecode_ptr hwlmBuild(const HWLMProto &proto, const CompileContext &cc, UNUSED hwlm_group_t expected_groups) { + size_t engSize = 0; + shared_ptr eng; + + const auto &lits = proto.lits; + DEBUG_PRINTF("building table with %zu strings\n", lits.size()); + + if (proto.engType == HWLM_ENGINE_NOOD) { + DEBUG_PRINTF("build noodle table\n"); + const hwlmLiteral &lit = lits.front(); + auto noodle = noodBuildTable(lit); + if (noodle) { + engSize = noodle.size(); + } + eng = move(noodle); + } else { + DEBUG_PRINTF("building a new deal\n"); + auto fdr = fdrBuildTable(proto, cc.grey); + if (fdr) { + engSize = fdr.size(); + } + eng = move(fdr); + } + + if (!eng) { + return nullptr; + } + + assert(engSize); + if (engSize > cc.grey.limitLiteralMatcherSize) { + throw ResourceLimitError(); + } + + const size_t hwlm_len = ROUNDUP_CL(sizeof(HWLM)) + engSize; + auto h = make_zeroed_bytecode_ptr(hwlm_len, 64); + + h->type = proto.engType; + memcpy(HWLM_DATA(h.get()), eng.get(), engSize); + + return h; +} + +unique_ptr +hwlmBuildProto(vector &lits, bool make_small, + const CompileContext &cc) { assert(!lits.empty()); dumpLits(lits); @@ -124,9 +193,7 @@ bytecode_ptr hwlmBuild(const vector &lits, bool make_small, } } - u8 engType = 0; - size_t engSize = 0; - shared_ptr eng; + unique_ptr proto; DEBUG_PRINTF("building table with %zu strings\n", lits.size()); @@ -134,39 +201,17 @@ bytecode_ptr hwlmBuild(const vector &lits, bool make_small, if (isNoodleable(lits, cc)) { DEBUG_PRINTF("build noodle table\n"); - engType = HWLM_ENGINE_NOOD; - const hwlmLiteral &lit = lits.front(); - auto noodle = noodBuildTable(lit); - if (noodle) { - engSize = noodle.size(); - } - eng = move(noodle); + proto = ue2::make_unique(HWLM_ENGINE_NOOD, lits); } else { DEBUG_PRINTF("building a new deal\n"); - engType = HWLM_ENGINE_FDR; - auto fdr = fdrBuildTable(lits, make_small, cc.target_info, cc.grey); - if (fdr) { - engSize = fdr.size(); + proto = fdrBuildProto(HWLM_ENGINE_FDR, lits, make_small, + cc.target_info, cc.grey); + if (!proto) { + return nullptr; } - eng = move(fdr); } - if (!eng) { - return nullptr; - } - - assert(engSize); - if (engSize > cc.grey.limitLiteralMatcherSize) { - throw ResourceLimitError(); - } - - const size_t hwlm_len = ROUNDUP_CL(sizeof(HWLM)) + engSize; - auto h = make_zeroed_bytecode_ptr(hwlm_len, 64); - - h->type = engType; - memcpy(HWLM_DATA(h.get()), eng.get(), engSize); - - return h; + return proto; } size_t hwlmSize(const HWLM *h) { diff --git a/src/hwlm/hwlm_build.h b/src/hwlm/hwlm_build.h index f2691496..4aefc364 100644 --- a/src/hwlm/hwlm_build.h +++ b/src/hwlm/hwlm_build.h @@ -34,9 +34,11 @@ #define HWLM_BUILD_H #include "hwlm.h" +#include "hwlm_literal.h" #include "ue2common.h" #include "util/bytecode_ptr.h" +#include #include #include @@ -44,15 +46,62 @@ struct HWLM; namespace ue2 { +class FDREngineDescription; +class TeddyEngineDescription; struct CompileContext; struct Grey; -struct hwlmLiteral; + +/** \brief Class representing a literal matcher prototype. */ +struct HWLMProto { + /** + * \brief Engine type to distinguish noodle from FDR and Teddy. + */ + u8 engType; + + /** + * \brief FDR engine description. + */ + std::unique_ptr fdrEng; + + /** + * \brief Teddy engine description. + */ + std::unique_ptr teddyEng; + + /** + * \brief HWLM literals passed from Rose. + */ + std::vector lits; + + /** + * \brief Bucket assignment info in FDR and Teddy + */ + std::map> bucketToLits; + + /** + * \brief Flag to optimise matcher for small size from Rose. + */ + bool make_small; + + HWLMProto(u8 engType_in, std::vector lits_in); + + HWLMProto(u8 engType_in, std::unique_ptr eng_in, + std::vector lits_in, + std::map> bucketToLits_in, + bool make_small_in); + + HWLMProto(u8 engType_in, std::unique_ptr eng_in, + std::vector lits_in, + std::map> bucketToLits_in, + bool make_small_in); + + ~HWLMProto(); +}; /** \brief Build an \ref HWLM literal matcher runtime structure for a group of * literals. * - * \param lits The group of literals. - * \param make_small Optimise matcher for small size. + * \param proto Literal matcher prototype. * \param cc Compile context. * \param expected_groups FIXME: document me! * @@ -60,10 +109,13 @@ struct hwlmLiteral; * may result in a nullptr return value, or a std::bad_alloc exception being * thrown. */ -bytecode_ptr hwlmBuild(const std::vector &lits, - bool make_small, const CompileContext &cc, +bytecode_ptr hwlmBuild(const HWLMProto &proto, const CompileContext &cc, hwlm_group_t expected_groups = HWLM_ALL_GROUPS); +std::unique_ptr +hwlmBuildProto(std::vector &lits, bool make_small, + const CompileContext &cc); + /** * Returns an estimate of the number of repeated characters on the end of a * literal that will make a literal set of size \a numLiterals suffer diff --git a/src/hwlm/hwlm_literal.h b/src/hwlm/hwlm_literal.h index 9ae7744d..08510fb0 100644 --- a/src/hwlm/hwlm_literal.h +++ b/src/hwlm/hwlm_literal.h @@ -45,6 +45,8 @@ namespace ue2 { /** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */ #define HWLM_MASKLEN 8 +#define INVALID_LIT_ID ~0U + /** \brief Class representing a literal, fed to \ref hwlmBuild. */ struct hwlmLiteral { std::string s; //!< \brief The literal itself. @@ -64,6 +66,21 @@ struct hwlmLiteral { * can be quashed by the literal matcher. */ bool noruns; + /** \brief included literal id. */ + u32 included_id = INVALID_LIT_ID; + + /** \brief Squash mask for FDR's confirm mask for included literals. + * + * In FDR confirm, if we have included literal in another bucket, + * we can use this mask to squash the bit for the bucket in FDR confirm + * mask and then run programs of included literal directly and avoid + * confirm work. + * + * This value is calculated in FDR compile code once bucket assignment is + * completed + */ + u8 squash = 0; + /** \brief Set of groups that literal belongs to. * * Use \ref HWLM_ALL_GROUPS for a literal that could match regardless of diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 83a34a39..ab0934de 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -2570,6 +2570,23 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, } } PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(INCLUDED_JUMP) { + if (scratch->fdr_conf) { + // squash the bucket of included literal + u8 shift = scratch->fdr_conf_offset & ~7U; + u64a mask = ((~(u64a)ri->squash) << shift); + *(scratch->fdr_conf) &= mask; + + pc = getByOffset(t, ri->child_offset); + pc_base = pc; + programOffset = (const u8 *)pc_base -(const u8 *)t; + DEBUG_PRINTF("pc_base %p pc %p child_offset %u\n", + pc_base, pc, ri->child_offset); + continue; + } + } + PROGRAM_NEXT_INSTRUCTION } } diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 4d0793bf..a41f0322 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -49,6 +49,7 @@ #include "rose_internal.h" #include "rose_program.h" #include "hwlm/hwlm.h" /* engine types */ +#include "hwlm/hwlm_build.h" #include "hwlm/hwlm_literal.h" #include "nfa/castlecompile.h" #include "nfa/goughcompile.h" @@ -2803,7 +2804,7 @@ vector groupByFragment(const RoseBuildImpl &build) { auto groups = info.group_mask; if (lit.s.length() < ROSE_SHORT_LITERAL_LEN_MAX) { - fragments.emplace_back(frag_id, groups, lit_id); + fragments.emplace_back(frag_id, lit.s, groups, lit_id); frag_id++; continue; } @@ -2816,10 +2817,11 @@ vector groupByFragment(const RoseBuildImpl &build) { } for (auto &m : frag_info) { + auto &lit = m.first; auto &fi = m.second; DEBUG_PRINTF("frag %s -> ids: %s\n", dumpString(m.first.s).c_str(), as_string_list(fi.lit_ids).c_str()); - fragments.emplace_back(frag_id, fi.groups, move(fi.lit_ids)); + fragments.emplace_back(frag_id, lit.s, fi.groups, move(fi.lit_ids)); frag_id++; assert(frag_id == fragments.size()); } @@ -2827,33 +2829,181 @@ vector groupByFragment(const RoseBuildImpl &build) { return fragments; } +static +void buildIncludedIdMap(unordered_map> &includedIdMap, + const LitProto *litProto) { + if (!litProto) { + return; + } + const auto &proto = *litProto->hwlmProto; + for (const auto &lit : proto.lits) { + if (lit.included_id != INVALID_LIT_ID) { + includedIdMap[lit.id] = make_pair(lit.included_id, lit.squash); + } + } +} + +static +void findInclusionGroups(vector &fragments, + LitProto *fproto, LitProto *drproto, + LitProto *eproto, LitProto *sbproto) { + unordered_map> includedIdMap; + unordered_map> includedDelayIdMap; + buildIncludedIdMap(includedIdMap, fproto); + buildIncludedIdMap(includedDelayIdMap, drproto); + buildIncludedIdMap(includedIdMap, eproto); + buildIncludedIdMap(includedIdMap, sbproto); + + size_t fragNum = fragments.size(); + vector candidates; + for (size_t j = 0; j < fragNum; j++) { + DEBUG_PRINTF("frag id %lu\n", j); + u32 id = j; + if (contains(includedIdMap, id) || + contains(includedDelayIdMap, id)) { + candidates.push_back(j); + DEBUG_PRINTF("find candidate\n"); + } + } + + for (const auto &c : candidates) { + auto &frag = fragments[c]; + u32 id = c; + if (contains(includedIdMap, id)) { + const auto &childId = includedIdMap[id]; + frag.included_frag_id = childId.first; + frag.squash = childId.second; + DEBUG_PRINTF("frag id %u child frag id %u\n", c, + frag.included_frag_id); + } + + if (contains(includedDelayIdMap, id)) { + const auto &childId = includedDelayIdMap[id]; + frag.included_delay_frag_id = childId.first; + frag.delay_squash = childId.second; + + DEBUG_PRINTF("delay frag id %u child frag id %u\n", c, + frag.included_delay_frag_id); + } + } +} + +static +void buildFragmentPrograms(const RoseBuildImpl &build, + vector &fragments, + build_context &bc, ProgramBuild &prog_build, + const map> &lit_edge_map) { + // Sort fragments based on literal length and case info to build + // included literal programs before their parent programs. + vector ordered_fragments(fragments); + stable_sort(begin(ordered_fragments), end(ordered_fragments), + [](const LitFragment &a, const LitFragment &b) { + auto len1 = a.s.length(); + auto caseful1 = !a.s.any_nocase(); + auto len2 = b.s.length(); + auto caseful2 = !b.s.any_nocase(); + return tie(len1, caseful1) < tie(len2, caseful2); + }); + + for (auto &frag : ordered_fragments) { + auto &pfrag = fragments[frag.fragment_id]; + DEBUG_PRINTF("frag_id=%u, lit_ids=[%s]\n", pfrag.fragment_id, + as_string_list(pfrag.lit_ids).c_str()); + + auto lit_prog = makeFragmentProgram(build, bc, prog_build, + pfrag.lit_ids, lit_edge_map); + if (pfrag.included_frag_id != INVALID_FRAG_ID && + !lit_prog.empty()) { + auto &cfrag = fragments[pfrag.included_frag_id]; + assert(pfrag.s.length() >= cfrag.s.length() && + !pfrag.s.any_nocase() >= !cfrag.s.any_nocase()); + u32 child_offset = cfrag.lit_program_offset; + DEBUG_PRINTF("child %u offset %u\n", cfrag.fragment_id, + child_offset); + addIncludedJumpProgram(lit_prog, child_offset, pfrag.squash); + } + pfrag.lit_program_offset = writeProgram(bc, move(lit_prog)); + + // We only do delayed rebuild in streaming mode. + if (!build.cc.streaming) { + continue; + } + + auto rebuild_prog = makeDelayRebuildProgram(build, prog_build, + pfrag.lit_ids); + if (pfrag.included_delay_frag_id != INVALID_FRAG_ID && + !rebuild_prog.empty()) { + auto &cfrag = fragments[pfrag.included_delay_frag_id]; + assert(pfrag.s.length() >= cfrag.s.length() && + !pfrag.s.any_nocase() >= !cfrag.s.any_nocase()); + u32 child_offset = cfrag.delay_program_offset; + DEBUG_PRINTF("child %u offset %u\n", cfrag.fragment_id, + child_offset); + addIncludedJumpProgram(rebuild_prog, child_offset, + pfrag.delay_squash); + } + pfrag.delay_program_offset = writeProgram(bc, move(rebuild_prog)); + } +} + +static +void updateLitProtoProgramOffset(vector &fragments, + LitProto &litProto, bool delay) { + auto &proto = *litProto.hwlmProto; + for (auto &lit : proto.lits) { + auto fragId = lit.id; + auto &frag = fragments[fragId]; + if (delay) { + DEBUG_PRINTF("delay_program_offset:%u\n", + frag.delay_program_offset); + lit.id = frag.delay_program_offset; + } else { + DEBUG_PRINTF("lit_program_offset:%u\n", + frag.lit_program_offset); + lit.id = frag.lit_program_offset; + } + } +} + +static +void updateLitProgramOffset(vector &fragments, + LitProto *fproto, LitProto *drproto, + LitProto *eproto, LitProto *sbproto) { + if (fproto) { + updateLitProtoProgramOffset(fragments, *fproto, false); + } + + if (drproto) { + updateLitProtoProgramOffset(fragments, *drproto, true); + } + + if (eproto) { + updateLitProtoProgramOffset(fragments, *eproto, false); + } + + if (sbproto) { + updateLitProtoProgramOffset(fragments, *sbproto, false); + } +} + /** * \brief Build the interpreter programs for each literal. */ static void buildLiteralPrograms(const RoseBuildImpl &build, vector &fragments, build_context &bc, - ProgramBuild &prog_build) { + ProgramBuild &prog_build, LitProto *fproto, + LitProto *drproto, LitProto *eproto, + LitProto *sbproto) { DEBUG_PRINTF("%zu fragments\n", fragments.size()); auto lit_edge_map = findEdgesByLiteral(build); - for (auto &frag : fragments) { - DEBUG_PRINTF("frag_id=%u, lit_ids=[%s]\n", frag.fragment_id, - as_string_list(frag.lit_ids).c_str()); + findInclusionGroups(fragments, fproto, drproto, eproto, sbproto); - auto lit_prog = makeFragmentProgram(build, bc, prog_build, frag.lit_ids, - lit_edge_map); - frag.lit_program_offset = writeProgram(bc, move(lit_prog)); + buildFragmentPrograms(build, fragments, bc, prog_build, lit_edge_map); - // We only do delayed rebuild in streaming mode. - if (!build.cc.streaming) { - continue; - } - - auto rebuild_prog = makeDelayRebuildProgram(build, prog_build, - frag.lit_ids); - frag.delay_program_offset = writeProgram(bc, move(rebuild_prog)); - } + // update literal program offsets for literal matcher prototypes + updateLitProgramOffset(fragments, fproto, drproto, eproto, sbproto); } /** @@ -3470,7 +3620,24 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { tie(proto.delayProgramOffset, proto.delay_count) = writeDelayPrograms(*this, fragments, bc, prog_build); - buildLiteralPrograms(*this, fragments, bc, prog_build); + // Build floating HWLM matcher prototype. + rose_group fgroups = 0; + auto fproto = buildFloatingMatcherProto(*this, fragments, + longLitLengthThreshold, + &fgroups, &historyRequired); + + // Build delay rebuild HWLM matcher prototype. + auto drproto = buildDelayRebuildMatcherProto(*this, fragments, + longLitLengthThreshold); + + // Build EOD-anchored HWLM matcher prototype. + auto eproto = buildEodAnchoredMatcherProto(*this, fragments); + + // Build small-block HWLM matcher prototype. + auto sbproto = buildSmallBlockMatcherProto(*this, fragments); + + buildLiteralPrograms(*this, fragments, bc, prog_build, fproto.get(), + drproto.get(), eproto.get(), sbproto.get()); auto eod_prog = makeEodProgram(*this, bc, prog_build, eodNfaIterOffset); proto.eodProgramOffset = writeProgram(bc, move(eod_prog)); @@ -3497,29 +3664,26 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { } // Build floating HWLM matcher. - rose_group fgroups = 0; - auto ftable = buildFloatingMatcher(*this, fragments, longLitLengthThreshold, - &fgroups, &historyRequired); + auto ftable = buildHWLMMatcher(*this, fproto.get()); if (ftable) { proto.fmatcherOffset = bc.engine_blob.add(ftable); bc.resources.has_floating = true; } // Build delay rebuild HWLM matcher. - auto drtable = buildDelayRebuildMatcher(*this, fragments, - longLitLengthThreshold); + auto drtable = buildHWLMMatcher(*this, drproto.get()); if (drtable) { proto.drmatcherOffset = bc.engine_blob.add(drtable); } // Build EOD-anchored HWLM matcher. - auto etable = buildEodAnchoredMatcher(*this, fragments); + auto etable = buildHWLMMatcher(*this, eproto.get()); if (etable) { proto.ematcherOffset = bc.engine_blob.add(etable); } // Build small-block HWLM matcher. - auto sbtable = buildSmallBlockMatcher(*this, fragments); + auto sbtable = buildHWLMMatcher(*this, sbproto.get()); if (sbtable) { proto.sbmatcherOffset = bc.engine_blob.add(sbtable); } diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 5e9f95f2..e98308ac 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -1463,6 +1463,12 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(INCLUDED_JUMP) { + os << " child_offset " << ri->child_offset << endl; + os << " squash " << ri->squash << endl; + } + PROGRAM_NEXT_INSTRUCTION + default: os << " UNKNOWN (code " << int{code} << ")" << endl; os << " " << endl; diff --git a/src/rose/rose_build_instructions.cpp b/src/rose/rose_build_instructions.cpp index b00c36be..8af08298 100644 --- a/src/rose/rose_build_instructions.cpp +++ b/src/rose/rose_build_instructions.cpp @@ -636,4 +636,12 @@ void RoseInstrCheckMultipathShufti64::write(void *dest, RoseEngineBlob &blob, inst->fail_jump = calc_jump(offset_map, this, target); } +void RoseInstrIncludedJump::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->child_offset = child_offset; + inst->squash = squash; +} + } diff --git a/src/rose/rose_build_instructions.h b/src/rose/rose_build_instructions.h index 025f6a67..3bc3266b 100644 --- a/src/rose/rose_build_instructions.h +++ b/src/rose/rose_build_instructions.h @@ -2121,6 +2121,34 @@ public: } }; +class RoseInstrIncludedJump + : public RoseInstrBaseNoTargets { +public: + u32 child_offset; + u8 squash; + + RoseInstrIncludedJump(u32 child_offset_in, u8 squash_in) + : child_offset(child_offset_in), squash(squash_in) {} + + bool operator==(const RoseInstrIncludedJump &ri) const { + return child_offset == ri.child_offset && squash == ri.squash; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), child_offset, squash); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrIncludedJump &ri, const OffsetMap &, + const OffsetMap &) const { + return child_offset == ri.child_offset && squash == ri.squash; + } +}; + class RoseInstrEnd : public RoseInstrBaseTrivial { diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index 57269747..2c302a85 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -46,6 +46,7 @@ #include "util/compile_context.h" #include "util/compile_error.h" #include "util/dump_charclass.h" +#include "util/make_unique.h" #include "util/report.h" #include "util/report_manager.h" #include "util/verify_types.h" @@ -699,8 +700,7 @@ struct MatcherProto { static void addFragmentLiteral(const RoseBuildImpl &build, MatcherProto &mp, - const LitFragment &f, u32 id, bool delay_rebuild, - size_t max_len) { + const LitFragment &f, u32 id, size_t max_len) { const rose_literal_id &lit = build.literals.at(id); DEBUG_PRINTF("lit='%s' (len %zu)\n", dumpString(lit.s).c_str(), @@ -737,12 +737,10 @@ void addFragmentLiteral(const RoseBuildImpl &build, MatcherProto &mp, return; } - u32 prog_offset = - delay_rebuild ? f.delay_program_offset : f.lit_program_offset; const auto &groups = f.groups; - mp.lits.emplace_back(move(s_final), nocase, noruns, prog_offset, groups, - msk, cmp); + mp.lits.emplace_back(move(s_final), nocase, noruns, f.fragment_id, + groups, msk, cmp); } static @@ -837,8 +835,7 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, } // Build our fragment (for the HWLM matcher) from the first literal. - addFragmentLiteral(build, mp, f, used_lit_ids.front(), delay_rebuild, - max_len); + addFragmentLiteral(build, mp, f, used_lit_ids.front(), max_len); for (u32 id : used_lit_ids) { const rose_literal_id &lit = build.literals.at(id); @@ -876,8 +873,8 @@ void MatcherProto::insert(const MatcherProto &a) { } static -void buildAccel(const RoseBuildImpl &build, const MatcherProto &mp, - HWLM &hwlm) { +void buildAccel(const RoseBuildImpl &build, + const vector &accel_lits, HWLM &hwlm) { if (!build.cc.grey.hamsterAccelForward) { return; } @@ -886,49 +883,68 @@ void buildAccel(const RoseBuildImpl &build, const MatcherProto &mp, return; } - buildForwardAccel(&hwlm, mp.accel_lits, build.getInitialGroups()); + buildForwardAccel(&hwlm, accel_lits, build.getInitialGroups()); } -bytecode_ptr buildFloatingMatcher(const RoseBuildImpl &build, - const vector &fragments, - size_t longLitLengthThreshold, - rose_group *fgroups, - size_t *historyRequired) { - *fgroups = 0; - - auto mp = makeMatcherProto(build, fragments, ROSE_FLOATING, false, - longLitLengthThreshold); - if (mp.lits.empty()) { - DEBUG_PRINTF("empty floating matcher\n"); +bytecode_ptr +buildHWLMMatcher(const RoseBuildImpl &build, LitProto *litProto) { + if (!litProto) { return nullptr; } - dumpMatcherLiterals(mp.lits, "floating", build.cc.grey); - - for (const hwlmLiteral &lit : mp.lits) { - *fgroups |= lit.groups; - } - - auto hwlm = hwlmBuild(mp.lits, false, build.cc, build.getInitialGroups()); + auto hwlm = hwlmBuild(*litProto->hwlmProto, build.cc, + build.getInitialGroups()); if (!hwlm) { throw CompileError("Unable to generate bytecode."); } - buildAccel(build, mp, *hwlm); + buildAccel(build, litProto->accel_lits, *hwlm); - if (build.cc.streaming) { - DEBUG_PRINTF("history_required=%zu\n", mp.history_required); - assert(mp.history_required <= build.cc.grey.maxHistoryAvailable); - *historyRequired = max(*historyRequired, mp.history_required); - } - - DEBUG_PRINTF("built floating literal table size %zu bytes\n", hwlm.size()); + DEBUG_PRINTF("built eod-anchored literal table size %zu bytes\n", + hwlm.size()); return hwlm; } -bytecode_ptr -buildDelayRebuildMatcher(const RoseBuildImpl &build, - const vector &fragments, - size_t longLitLengthThreshold) { +unique_ptr +buildFloatingMatcherProto(const RoseBuildImpl &build, + const vector &fragments, + size_t longLitLengthThreshold, + rose_group *fgroups, + size_t *historyRequired) { + DEBUG_PRINTF("Floating literal matcher\n"); + *fgroups = 0; + + auto mp = makeMatcherProto(build, fragments, ROSE_FLOATING, false, + longLitLengthThreshold); + if (mp.lits.empty()) { + DEBUG_PRINTF("empty floating matcher\n"); + return nullptr; + } + dumpMatcherLiterals(mp.lits, "floating", build.cc.grey); + + for (const hwlmLiteral &lit : mp.lits) { + *fgroups |= lit.groups; + } + + if (build.cc.streaming) { + DEBUG_PRINTF("history_required=%zu\n", mp.history_required); + assert(mp.history_required <= build.cc.grey.maxHistoryAvailable); + *historyRequired = max(*historyRequired, mp.history_required); + } + + auto proto = hwlmBuildProto(mp.lits, false, build.cc); + + if (!proto) { + throw CompileError("Unable to generate literal matcher proto."); + } + + return ue2::make_unique(move(proto), mp.accel_lits); +} + +unique_ptr +buildDelayRebuildMatcherProto(const RoseBuildImpl &build, + const vector &fragments, + size_t longLitLengthThreshold) { + DEBUG_PRINTF("Delay literal matcher\n"); if (!build.cc.streaming) { DEBUG_PRINTF("not streaming\n"); return nullptr; @@ -942,20 +958,20 @@ buildDelayRebuildMatcher(const RoseBuildImpl &build, } dumpMatcherLiterals(mp.lits, "delay_rebuild", build.cc.grey); - auto hwlm = hwlmBuild(mp.lits, false, build.cc, build.getInitialGroups()); - if (!hwlm) { - throw CompileError("Unable to generate bytecode."); + + auto proto = hwlmBuildProto(mp.lits, false, build.cc); + + if (!proto) { + throw CompileError("Unable to generate literal matcher proto."); } - buildAccel(build, mp, *hwlm); - - DEBUG_PRINTF("built delay rebuild table size %zu bytes\n", hwlm.size()); - return hwlm; + return ue2::make_unique(move(proto), mp.accel_lits); } -bytecode_ptr -buildSmallBlockMatcher(const RoseBuildImpl &build, - const vector &fragments) { +unique_ptr +buildSmallBlockMatcherProto(const RoseBuildImpl &build, + const vector &fragments) { + DEBUG_PRINTF("Small block literal matcher\n"); if (build.cc.streaming) { DEBUG_PRINTF("streaming mode\n"); return nullptr; @@ -1000,21 +1016,19 @@ buildSmallBlockMatcher(const RoseBuildImpl &build, return nullptr; } - auto hwlm = hwlmBuild(mp.lits, true, build.cc, build.getInitialGroups()); - if (!hwlm) { - throw CompileError("Unable to generate bytecode."); + auto proto = hwlmBuildProto(mp.lits, false, build.cc); + + if (!proto) { + throw CompileError("Unable to generate literal matcher proto."); } - buildAccel(build, mp, *hwlm); - - DEBUG_PRINTF("built small block literal table size %zu bytes\n", - hwlm.size()); - return hwlm; + return ue2::make_unique(move(proto), mp.accel_lits); } -bytecode_ptr -buildEodAnchoredMatcher(const RoseBuildImpl &build, - const vector &fragments) { +unique_ptr +buildEodAnchoredMatcherProto(const RoseBuildImpl &build, + const vector &fragments) { + DEBUG_PRINTF("Eod anchored literal matcher\n"); auto mp = makeMatcherProto(build, fragments, ROSE_EOD_ANCHORED, false, build.ematcher_region_size); @@ -1027,16 +1041,13 @@ buildEodAnchoredMatcher(const RoseBuildImpl &build, assert(build.ematcher_region_size); - auto hwlm = hwlmBuild(mp.lits, true, build.cc, build.getInitialGroups()); - if (!hwlm) { - throw CompileError("Unable to generate bytecode."); + auto proto = hwlmBuildProto(mp.lits, false, build.cc); + + if (!proto) { + throw CompileError("Unable to generate literal matcher proto."); } - buildAccel(build, mp, *hwlm); - - DEBUG_PRINTF("built eod-anchored literal table size %zu bytes\n", - hwlm.size()); - return hwlm; + return ue2::make_unique(move(proto), mp.accel_lits); } } // namespace ue2 diff --git a/src/rose/rose_build_matchers.h b/src/rose/rose_build_matchers.h index 2b1afc8c..9668ebc9 100644 --- a/src/rose/rose_build_matchers.h +++ b/src/rose/rose_build_matchers.h @@ -35,7 +35,10 @@ #define ROSE_BUILD_MATCHERS_H #include "rose_build_impl.h" +#include "rose_build_lit_accel.h" +#include "hwlm/hwlm_build.h" #include "util/bytecode_ptr.h" +#include "util/ue2string.h" #include @@ -44,38 +47,80 @@ struct HWLM; namespace ue2 { +static constexpr u32 INVALID_FRAG_ID = ~0U; + struct LitFragment { - LitFragment(u32 fragment_id_in, rose_group groups_in, u32 lit_id) - : fragment_id(fragment_id_in), groups(groups_in), lit_ids({lit_id}) {} - LitFragment(u32 fragment_id_in, rose_group groups_in, - std::vector lit_ids_in) - : fragment_id(fragment_id_in), groups(groups_in), - lit_ids(std::move(lit_ids_in)) {} + LitFragment(u32 fragment_id_in, ue2_literal s_in, + rose_group groups_in, u32 lit_id) + : fragment_id(fragment_id_in), s(s_in), groups(groups_in), + lit_ids({lit_id}) {} + LitFragment(u32 fragment_id_in, ue2_literal s_in, + rose_group groups_in, std::vector lit_ids_in) + : fragment_id(fragment_id_in), s(s_in), groups(groups_in), + lit_ids(std::move(lit_ids_in)) {} u32 fragment_id; + + /** + * \brief literal fragment. + */ + ue2_literal s; + + /** + * \brief FDR confirm squash mask for included literals. + */ + u8 squash; + + /** + * \brief FDR confirm squash mask for included literals (Delayed + * literals only). + */ + u8 delay_squash; + + /** + * \brief Fragment id of included literal. + */ + u32 included_frag_id = INVALID_FRAG_ID; + + /** + * \brief Fragment Id of included literal (Delayed literals only). + */ + u32 included_delay_frag_id = INVALID_FRAG_ID; rose_group groups; std::vector lit_ids; u32 lit_program_offset = ROSE_INVALID_PROG_OFFSET; u32 delay_program_offset = ROSE_INVALID_PROG_OFFSET; }; -bytecode_ptr -buildFloatingMatcher(const RoseBuildImpl &build, - const std::vector &fragments, - size_t longLitLengthThreshold, rose_group *fgroups, - size_t *historyRequired); +struct LitProto { + LitProto(std::unique_ptr hwlmProto_in, + std::vector &accel_lits_in) + : hwlmProto(std::move(hwlmProto_in)), accel_lits(accel_lits_in) {} + + std::unique_ptr hwlmProto; + std::vector accel_lits; +}; bytecode_ptr -buildDelayRebuildMatcher(const RoseBuildImpl &build, - const std::vector &fragments, - size_t longLitLengthThreshold); +buildHWLMMatcher(const RoseBuildImpl &build, LitProto *proto); -bytecode_ptr -buildSmallBlockMatcher(const RoseBuildImpl &build, - const std::vector &fragments); +std::unique_ptr +buildFloatingMatcherProto(const RoseBuildImpl &build, + const std::vector &fragments, + size_t longLitLengthThreshold, + rose_group *fgroups, + size_t *historyRequired); -bytecode_ptr -buildEodAnchoredMatcher(const RoseBuildImpl &build, - const std::vector &fragments); +std::unique_ptr +buildDelayRebuildMatcherProto(const RoseBuildImpl &build, + const std::vector &fragments, + size_t longLitLengthThreshold); +std::unique_ptr +buildSmallBlockMatcherProto(const RoseBuildImpl &build, + const std::vector &fragments); + +std::unique_ptr +buildEodAnchoredMatcherProto(const RoseBuildImpl &build, + const std::vector &fragments); void findMoreLiteralMasks(RoseBuildImpl &build); diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index 562ddb20..01bd7c54 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -2164,6 +2164,14 @@ RoseProgram makeBoundaryProgram(const RoseBuildImpl &build, return prog; } +void addIncludedJumpProgram(RoseProgram &program, u32 child_offset, + u8 squash) { + RoseProgram block; + block.add_before_end(make_unique(child_offset, + squash)); + program.add_block(move(block)); +} + static void addPredBlockSingle(u32 pred_state, RoseProgram &pred_block, RoseProgram &program) { diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index 8758ef64..afbaa36e 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -282,6 +282,7 @@ void recordLongLiterals(std::vector &longLiterals, void recordResources(RoseResources &resources, const RoseProgram &program); +void addIncludedJumpProgram(RoseProgram &program, u32 child_offset, u8 squash); } // namespace ue2 #endif // ROSE_BUILD_PROGRAM_H diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 78b123d5..eeebfed1 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -178,7 +178,12 @@ enum RoseInstructionCode { */ ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_64, - LAST_ROSE_INSTRUCTION = ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_64 //!< Sentinel. + /** + * \brief Jump to the program of included literal. + */ + ROSE_INSTR_INCLUDED_JUMP, + + LAST_ROSE_INSTRUCTION = ROSE_INSTR_INCLUDED_JUMP //!< Sentinel. }; struct ROSE_STRUCT_END { @@ -625,4 +630,10 @@ struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_64 { s32 last_start; //!< The latest start offset among 8 paths. u32 fail_jump; //!< Jump forward this many bytes on failure. }; + +struct ROSE_STRUCT_INCLUDED_JUMP { + u8 code; //!< From enum RoseInstructionCode. + u8 squash; //!< FDR confirm squash mask for included literal. + u32 child_offset; //!< Program offset of included literal. +}; #endif // ROSE_ROSE_PROGRAM_H diff --git a/src/scratch.c b/src/scratch.c index 84d23ced..8e082c77 100644 --- a/src/scratch.c +++ b/src/scratch.c @@ -136,6 +136,7 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { s->in_use = 0; s->scratchSize = alloc_size; s->scratch_alloc = (char *)s_tmp; + s->fdr_conf = NULL; // each of these is at an offset from the previous char *current = (char *)s + sizeof(*s); diff --git a/src/scratch.h b/src/scratch.h index 1d4b849e..fa998e84 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -200,6 +200,9 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { u32 delay_fatbit_size; /**< size of each delay fatbit in bytes */ u32 scratchSize; char *scratch_alloc; /* user allocated scratch object */ + u64a *fdr_conf; /**< FDR confirm value */ + u8 fdr_conf_offset; /**< offset where FDR/Teddy front end matches + * in buffer */ }; /* array of fatbit ptr; TODO: why not an array of fatbits? */ diff --git a/unit/internal/fdr.cpp b/unit/internal/fdr.cpp index 399147e2..87ab0974 100644 --- a/unit/internal/fdr.cpp +++ b/unit/internal/fdr.cpp @@ -36,6 +36,7 @@ #include "fdr/fdr_engine_description.h" #include "fdr/teddy_compile.h" #include "fdr/teddy_engine_description.h" +#include "hwlm/hwlm_internal.h" #include "util/alloc.h" #include "database.h" @@ -135,6 +136,31 @@ vector getValidFdrEngines() { return ret; } + +static +bytecode_ptr buildFDREngineHinted(std::vector &lits, + bool make_small, u32 hint, + const target_t &target, + const Grey &grey) { + auto proto = fdrBuildProtoHinted(HWLM_ENGINE_FDR, lits, make_small, hint, + target, grey); + if (!proto) { + return nullptr; + } + return fdrBuildTable(*proto, grey); +} + +static +bytecode_ptr buildFDREngine(std::vector &lits, + bool make_small, const target_t &target, + const Grey &grey) { + auto proto = fdrBuildProto(HWLM_ENGINE_FDR, lits, make_small, target, grey); + if (!proto) { + return nullptr; + } + return fdrBuildTable(*proto, grey); +} + class FDRp : public TestWithParam { }; @@ -147,10 +173,12 @@ TEST_P(FDRp, Simple) { vector lits; lits.push_back(hwlmLiteral("mnopqr", 0, 0)); - auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); + auto fdr = buildFDREngineHinted(lits, false, hint, get_current_target(), + Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); struct hs_scratch scratch; + scratch.fdr_conf = NULL; fdrExec(fdr.get(), (const u8 *)data, sizeof(data), 0, decentCallback, &scratch, HWLM_ALL_GROUPS); @@ -170,10 +198,12 @@ TEST_P(FDRp, SimpleSingle) { vector lits; lits.push_back(hwlmLiteral("m", 0, 0)); - auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); + auto fdr = buildFDREngineHinted(lits, false, hint, get_current_target(), + Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); struct hs_scratch scratch; + scratch.fdr_conf = NULL; fdrExec(fdr.get(), (const u8 *)data, sizeof(data) - 1 /* skip nul */, 0, decentCallback, &scratch, HWLM_ALL_GROUPS); @@ -192,7 +222,8 @@ TEST_P(FDRp, MultiLocation) { vector lits; lits.push_back(hwlmLiteral("abc", 0, 1)); - auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); + auto fdr = buildFDREngineHinted(lits, false, hint, get_current_target(), + Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); const u32 testSize = 128; @@ -200,6 +231,7 @@ TEST_P(FDRp, MultiLocation) { vector data(testSize, 0); struct hs_scratch scratch; + scratch.fdr_conf = NULL; for (u32 i = 0; i < testSize - 3; i++) { memcpy(data.data() + i, "abc", 3); fdrExec(fdr.get(), data.data(), testSize, 0, decentCallback, &scratch, @@ -220,10 +252,12 @@ TEST_P(FDRp, NoRepeat1) { vector lits = { hwlmLiteral("m", 0, 1, 0, HWLM_ALL_GROUPS, {}, {}) }; - auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); + auto fdr = buildFDREngineHinted(lits, false, hint, get_current_target(), + Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); struct hs_scratch scratch; + scratch.fdr_conf = NULL; fdrExec(fdr.get(), (const u8 *)data, sizeof(data) - 1 /* skip nul */, 0, decentCallback, &scratch, HWLM_ALL_GROUPS); @@ -242,10 +276,12 @@ TEST_P(FDRp, NoRepeat2) { = { hwlmLiteral("m", 0, 1, 0, HWLM_ALL_GROUPS, {}, {}), hwlmLiteral("A", 0, 42) }; - auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); + auto fdr = buildFDREngineHinted(lits, false, hint, get_current_target(), + Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); struct hs_scratch scratch; + scratch.fdr_conf = NULL; fdrExec(fdr.get(), (const u8 *)data, sizeof(data) - 1 /* skip nul */, 0, decentCallback, &scratch, HWLM_ALL_GROUPS); @@ -265,10 +301,12 @@ TEST_P(FDRp, NoRepeat3) { = { hwlmLiteral("90m", 0, 1, 0, HWLM_ALL_GROUPS, {}, {}), hwlmLiteral("zA", 0, 1, 0, HWLM_ALL_GROUPS, {}, {}) }; - auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); + auto fdr = buildFDREngineHinted(lits, false, hint, get_current_target(), + Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); struct hs_scratch scratch; + scratch.fdr_conf = NULL; fdrExec(fdr.get(), (const u8 *)data, sizeof(data) - 1 /* skip nul */, 0, decentCallback, &scratch, HWLM_ALL_GROUPS); @@ -293,6 +331,7 @@ hwlm_error_t safeExecStreaming(const FDR *fdr, const u8 *hbuf, size_t hlen, hbuf = new_hbuf; } struct hs_scratch scratch; + scratch.fdr_conf = NULL; return fdrExecStreaming(fdr, hbuf, hlen, buf, len, start, cb, &scratch, groups); } @@ -304,7 +343,8 @@ TEST_P(FDRp, SmallStreaming) { vector lits = {hwlmLiteral("a", 1, 1), hwlmLiteral("aardvark", 0, 10)}; - auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); + auto fdr = buildFDREngineHinted(lits, false, hint, get_current_target(), + Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); vector expected; @@ -342,7 +382,8 @@ TEST_P(FDRp, SmallStreaming2) { hwlmLiteral("kk", 1, 2), hwlmLiteral("aardvark", 0, 10)}; - auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); + auto fdr = buildFDREngineHinted(lits, false, hint, get_current_target(), + Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); vector expected; @@ -373,7 +414,8 @@ TEST_P(FDRp, moveByteStream) { vector lits; lits.push_back(hwlmLiteral("mnopqr", 0, 0)); - auto fdrTable0 = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); + auto fdrTable0 = buildFDREngineHinted(lits, false, hint, + get_current_target(), Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdrTable0, hint); size_t size = fdrSize(fdrTable0.get()); @@ -390,6 +432,7 @@ TEST_P(FDRp, moveByteStream) { // check matches struct hs_scratch scratch; + scratch.fdr_conf = NULL; hwlm_error_t fdrStatus = fdrExec(fdrTable.get(), (const u8 *)data, data_len, 0, decentCallback, &scratch, @@ -414,7 +457,8 @@ TEST_P(FDRp, Stream1) { lits.push_back(hwlmLiteral("f", 0, 0)); lits.push_back(hwlmLiteral("literal", 0, 1)); - auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); + auto fdr = buildFDREngineHinted(lits, false, hint, get_current_target(), + Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); // check matches @@ -470,12 +514,13 @@ TEST_P(FDRpp, AlignAndTooEarly) { vector lits; struct hs_scratch scratch; + scratch.fdr_conf = NULL; for (size_t litLen = 1; litLen <= patLen; litLen++) { // building literal from pattern substring of variable length 1-patLen lits.push_back(hwlmLiteral(string(pattern, 0, litLen), 0, 0)); - auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), - Grey()); + auto fdr = buildFDREngineHinted(lits, false, hint, get_current_target(), + Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); // check with buffer offset from aligned start from 0 to 31 @@ -592,6 +637,7 @@ TEST_P(FDRpa, ShortWritings) { // run the literal matching through all generated literals struct hs_scratch scratch; + scratch.fdr_conf = NULL; for (size_t patIdx = 0; patIdx < pats.size();) { // group them in the sets of 32 vector testSigs; @@ -599,8 +645,8 @@ TEST_P(FDRpa, ShortWritings) { testSigs.push_back(hwlmLiteral(pats[patIdx], false, patIdx)); } - auto fdr = fdrBuildTableHinted(testSigs, false, hint, - get_current_target(), Grey()); + auto fdr = buildFDREngineHinted(testSigs, false, hint, + get_current_target(), Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); @@ -659,7 +705,7 @@ TEST(FDR, FDRTermS) { lits.push_back(hwlmLiteral("f", 0, 0)); lits.push_back(hwlmLiteral("ff", 0, 1)); - auto fdr = fdrBuildTable(lits, false, get_current_target(), Grey()); + auto fdr = buildFDREngine(lits, false, get_current_target(), Grey()); ASSERT_TRUE(fdr != nullptr); // check matches @@ -682,11 +728,12 @@ TEST(FDR, FDRTermB) { lits.push_back(hwlmLiteral("f", 0, 0)); lits.push_back(hwlmLiteral("ff", 0, 1)); - auto fdr = fdrBuildTable(lits, false, get_current_target(), Grey()); + auto fdr = buildFDREngine(lits, false, get_current_target(), Grey()); ASSERT_TRUE(fdr != nullptr); // check matches struct hs_scratch scratch; + scratch.fdr_conf = NULL; fdrStatus = fdrExec(fdr.get(), (const u8 *)data1, data_len1, 0, decentCallbackT, &scratch, HWLM_ALL_GROUPS); diff --git a/unit/internal/fdr_flood.cpp b/unit/internal/fdr_flood.cpp index 8bdd0763..81afbeaa 100644 --- a/unit/internal/fdr_flood.cpp +++ b/unit/internal/fdr_flood.cpp @@ -36,6 +36,7 @@ #include "fdr/fdr_engine_description.h" #include "fdr/teddy_compile.h" #include "fdr/teddy_engine_description.h" +#include "hwlm/hwlm_internal.h" #include "scratch.h" #include "util/alloc.h" #include "util/bitutils.h" @@ -131,6 +132,16 @@ static vector getValidFdrEngines() { return ret; } +static +bytecode_ptr buildFDREngineHinted(std::vector &lits, + bool make_small, u32 hint, + const target_t &target, + const Grey &grey) { + auto proto = fdrBuildProtoHinted(HWLM_ENGINE_FDR, lits, make_small, hint, + target, grey); + return fdrBuildTable(*proto, grey); +} + class FDRFloodp : public TestWithParam { }; @@ -142,6 +153,7 @@ TEST_P(FDRFloodp, NoMask) { u8 c = 0; struct hs_scratch scratch; + scratch.fdr_conf = NULL; while (1) { SCOPED_TRACE((unsigned int)c); u8 bit = 1 << (c & 0x7); @@ -169,8 +181,8 @@ TEST_P(FDRFloodp, NoMask) { lits.push_back(hwlmLiteral(sAlt, false, i * 8 + 7)); } - auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), - Grey()); + auto fdr = buildFDREngineHinted(lits, false, hint, get_current_target(), + Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); hwlm_error_t fdrStatus = fdrExec(fdr.get(), &data[0], dataSize, @@ -235,6 +247,7 @@ TEST_P(FDRFloodp, WithMask) { u8 c = '\0'; struct hs_scratch scratch; + scratch.fdr_conf = NULL; while (1) { u8 bit = 1 << (c & 0x7); u8 cAlt = c ^ bit; @@ -305,8 +318,8 @@ TEST_P(FDRFloodp, WithMask) { HWLM_ALL_GROUPS, msk, cmp)); } } - auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), - Grey()); + auto fdr = buildFDREngineHinted(lits, false, hint, get_current_target(), + Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); hwlm_error_t fdrStatus = fdrExec(fdr.get(), &data[0], dataSize, @@ -400,6 +413,7 @@ TEST_P(FDRFloodp, StreamingMask) { u8 c = '\0'; struct hs_scratch scratch; + scratch.fdr_conf = NULL; while (1) { u8 bit = 1 << (c & 0x7); u8 cAlt = c ^ bit; @@ -470,8 +484,8 @@ TEST_P(FDRFloodp, StreamingMask) { HWLM_ALL_GROUPS, msk, cmp)); } } - auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), - Grey()); + auto fdr = buildFDREngineHinted(lits, false, hint, get_current_target(), + Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); hwlm_error_t fdrStatus; From 252eb820c4214b54dd84fd0a7faaa9ef6416370b Mon Sep 17 00:00:00 2001 From: "Wang, Xiang W" Date: Thu, 20 Jul 2017 16:40:54 -0400 Subject: [PATCH 098/190] ue-3145: make parents of included literals exclusive --- src/fdr/fdr_compile.cpp | 58 +++++++++++++++++++----------------- src/fdr/teddy_compile.h | 2 +- src/rose/program_runtime.h | 5 ++-- src/rose/rose_build_dump.cpp | 2 +- 4 files changed, 36 insertions(+), 31 deletions(-) diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index 181f9512..dc91010e 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -609,16 +609,18 @@ bool includedCheck(const hwlmLiteral &lit1, const hwlmLiteral &lit2) { } /* - * if lit2 is an included literal of both lit1 and lit0, and lit1 is an - * exceptional literal of lit0 - lit1 sometimes matches when lit0 matches, - * then we give up squashing for lit1. e.g. lit0:AAA(no case), lit1:aa, - * lit2:A(no case). We can have duplicate matches for input "aaa" if lit0 - * and lit1 both squash lit2. + * if lit2 is an included literal of both lit0 and lit1, then lit0 and lit1 + * shouldn't match at the same offset, otherwise we give up squashing for lit1. + * e.g. lit0:AAA(no case), lit1:aa, lit2:A(no case). We can have duplicate + * matches for input "aaa" if lit0 and lit1 both squash lit2. */ static bool checkParentLit( - u32 pos1, const unordered_set &parent_map, + const vector &lits, u32 pos1, + const unordered_set &parent_map, const unordered_map> &exception_map) { + assert(pos1 < lits.size()); + const auto &lit1 = lits[pos1]; for (const auto pos2 : parent_map) { if (contains(exception_map, pos2)) { const auto &exception_pos = exception_map.at(pos2); @@ -626,6 +628,16 @@ bool checkParentLit( return false; } } + + /* if lit1 isn't an exception of lit2, then we have to do further + * exclusive check. + * TODO: More mask checks. Note if two literals are group exclusive, + * it is possible that they match at the same offset. */ + assert(pos2 < lits.size()); + const auto &lit2 = lits[pos2]; + if (isSuffix(lit2, lit1)) { + return false; + } } return true; @@ -652,30 +664,26 @@ void buildSquashMask(vector &lits, u32 id1, u32 bucket1, // check if lit2 is a suffix of lit1 if (isSuffix(lit1, lit2)) { /* if we have a included literal in the same bucket, - * quit and let the included literal to do possible squashing - */ + * quit and let the included literal to do possible squashing */ if (bucket1 == bucket2) { DEBUG_PRINTF("same bucket\n"); return; } - /* - * if lit2 is a suffix but doesn't pass included checks for - * extra info, we give up sqaushing - */ + /* if lit2 is a suffix but doesn't pass included checks for + * extra info, we give up sqaushing */ if (includedCheck(lit1, lit2)) { DEBUG_PRINTF("find exceptional suffix %u\n", lit2.id); exception_map[id1].insert(id2); exception = true; - } else if (checkParentLit(id1, parent_map[id2], exception_map)) { + } else if (checkParentLit(lits, id1, parent_map[id2], + exception_map)) { if (lit1.included_id == INVALID_LIT_ID) { DEBUG_PRINTF("find suffix lit1 %u lit2 %u\n", lit1.id, lit2.id); lit1.included_id = lit2.id; } else { - /* - * if we have multiple included literals in one bucket, - * give up squashing. - */ + /* if we have multiple included literals in one bucket, + * give up squashing. */ DEBUG_PRINTF("multiple included literals\n"); lit1.included_id = INVALID_LIT_ID; return; @@ -690,10 +698,8 @@ void buildSquashMask(vector &lits, u32 id1, u32 bucket1, if (bucket2 != nextBucket) { if (included) { if (exception) { - /* - * give up if we have exception literals - * in the same bucket as the included literal - */ + /* give up if we have exception literals + * in the same bucket as the included literal. */ lit1.included_id = INVALID_LIT_ID; } else { parent_map[child_id].insert(id1); @@ -714,14 +720,12 @@ static constexpr u32 INCLUDED_LIMIT = 1000; static void findIncludedLits(vector &lits, const vector>> &lastCharMap) { - /** Map for finding the positions of literal which includes a literal - * in FDR hwlm literal vector. - */ + /* Map for finding the positions of literal which includes a literal + * in FDR hwlm literal vector. */ unordered_map> parent_map; - /** Map for finding the positions of exception literals which could - * sometimes match if a literal matches in FDR hwlm literal vector. - */ + /* Map for finding the positions of exception literals which could + * sometimes match if a literal matches in FDR hwlm literal vector. */ unordered_map> exception_map; for (const auto &group : lastCharMap) { size_t cnt = group.size(); diff --git a/src/fdr/teddy_compile.h b/src/fdr/teddy_compile.h index ec251310..a2b4a13c 100644 --- a/src/fdr/teddy_compile.h +++ b/src/fdr/teddy_compile.h @@ -44,10 +44,10 @@ struct FDR; namespace ue2 { +class TeddyEngineDescription; struct Grey; struct hwlmLiteral; struct target_t; -struct TeddyEngineDescription; bytecode_ptr teddyBuildTable(const HWLMProto &proto, const Grey &grey); diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index ab0934de..e6ce9bdb 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -2581,8 +2581,9 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, pc = getByOffset(t, ri->child_offset); pc_base = pc; programOffset = (const u8 *)pc_base -(const u8 *)t; - DEBUG_PRINTF("pc_base %p pc %p child_offset %u\n", - pc_base, pc, ri->child_offset); + DEBUG_PRINTF("pc_base %p pc %p child_offset %u squash %u\n", + pc_base, pc, ri->child_offset, ri->squash); + work_done = 0; continue; } } diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index e98308ac..5ab9fc99 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -1465,7 +1465,7 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { PROGRAM_CASE(INCLUDED_JUMP) { os << " child_offset " << ri->child_offset << endl; - os << " squash " << ri->squash << endl; + os << " squash " << (u32)ri->squash << endl; } PROGRAM_NEXT_INSTRUCTION From a425bb9b7c6e6f53cf6cadfa80ceca4161732be5 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 14 Jul 2017 14:51:53 +1000 Subject: [PATCH 099/190] ue2_graph: move descriptors out of graph struct --- src/util/ue2_graph.h | 150 ++++++++++++++++++++++--------------------- 1 file changed, 77 insertions(+), 73 deletions(-) diff --git a/src/util/ue2_graph.h b/src/util/ue2_graph.h index 138d7467..1409e091 100644 --- a/src/util/ue2_graph.h +++ b/src/util/ue2_graph.h @@ -168,7 +168,78 @@ struct default_vertex_property { size_t index; }; -} +template +class vertex_descriptor : totally_ordered> { + using vertex_node = typename Graph::vertex_node; +public: + vertex_descriptor() : p(nullptr), serial(0) {} + explicit vertex_descriptor(vertex_node *pp) : p(pp), serial(pp->serial) {} + + operator bool() const { return p; } + bool operator<(const vertex_descriptor b) const { + if (p && b.p) { + /* no vertices in the same graph can have the same serial */ + assert(p == b.p || serial != b.serial); + return serial < b.serial; + } else { + return p < b.p; + } + } + bool operator==(const vertex_descriptor b) const { return p == b.p; } + + friend size_t hash_value(vertex_descriptor v) { + using boost::hash_value; + return hash_value(v.serial); + } + +private: + vertex_node *raw(void) { return p; } + vertex_node *p; + u64a serial; + friend Graph; +}; + +template +class edge_descriptor : totally_ordered> { + using edge_node = typename Graph::edge_node; +public: + edge_descriptor() : p(nullptr), serial(0) {} + explicit edge_descriptor(edge_node *pp) : p(pp), serial(pp->serial) {} + + /* Convenience ctor to allow us to directly get an edge_descriptor from + * edge() and add_edge(). As we have null_edges and we always allow + * parallel edges, the bool component of the return from these functions is + * not required. */ + edge_descriptor(const std::pair &tup) + : p(tup.first.p), serial(tup.first.serial) { + assert(tup.second == (bool)tup.first); + } + + operator bool() const { return p; } + bool operator<(const edge_descriptor b) const { + if (p && b.p) { + /* no edges in the same graph can have the same serial */ + assert(p == b.p || serial != b.serial); + return serial < b.serial; + } else { + return p < b.p; + } + } + bool operator==(const edge_descriptor b) const { return p == b.p; } + + friend size_t hash_value(edge_descriptor e) { + using boost::hash_value; + return hash_value(e.serial); + } + +private: + edge_node *raw(void) { return p; } + edge_node *p; + u64a serial; + friend Graph; +}; + +} // namespace graph_detail template; + using edge_descriptor = graph_detail::edge_descriptor; + friend vertex_descriptor; + friend edge_descriptor; + using vertices_size_type = typename vertices_list_type::size_type; using degree_size_type = typename vertex_edge_list::size_type; @@ -293,78 +369,6 @@ public: using vertex_bundled = VertexPropertyType; using edge_bundled = EdgePropertyType; - class vertex_descriptor : totally_ordered { - public: - vertex_descriptor() : p(nullptr), serial(0) { } - explicit vertex_descriptor(vertex_node *pp) - : p(pp), serial(pp->serial) { } - - operator bool() const { return p; } - bool operator<(const vertex_descriptor b) const { - if (p && b.p) { - /* no vertices in the same graph can have the same serial */ - assert(p == b.p || serial != b.serial); - return serial < b.serial; - } else { - return p < b.p; - } - } - bool operator==(const vertex_descriptor b) const { - return p == b.p; - } - - friend size_t hash_value(vertex_descriptor v) { - using boost::hash_value; - return hash_value(v.serial); - } - - private: - vertex_node *raw(void) { return p; } - vertex_node *p; - u64a serial; - friend ue2_graph; - }; - - class edge_descriptor : totally_ordered { - public: - edge_descriptor() : p(nullptr), serial(0) { } - explicit edge_descriptor(edge_node *pp) : p(pp), serial(pp->serial) { } - - /* Convenice ctor to allow us to directly get an edge_descriptor from - * edge() and add_edge(). As we have null_edges and we always allow - * parallel edges, the bool component of the return from these functions - * is not required. */ - edge_descriptor(const std::pair &tup) - : p(tup.first.p), serial(tup.first.serial) { - assert(tup.second == (bool)tup.first); - } - - operator bool() const { return p; } - bool operator<(const edge_descriptor b) const { - if (p && b.p) { - /* no edges in the same graph can have the same serial */ - assert(p == b.p || serial != b.serial); - return serial < b.serial; - } else { - return p < b.p; - } - } - bool operator==(const edge_descriptor b) const { - return p == b.p; - } - - friend size_t hash_value(edge_descriptor e) { - using boost::hash_value; - return hash_value(e.serial); - } - - private: - edge_node *raw(void) { return p; } - edge_node *p; - u64a serial; - friend ue2_graph; - }; - private: /* Note: apparently, nested class templates cannot be fully specialised but * they can be partially specialised. Sigh, ... */ From 9cf66b6ac9cdd524d82d1aa68df4d2f1c28dae98 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 14 Jul 2017 14:59:52 +1000 Subject: [PATCH 100/190] util: switch from Boost to std::unordered set/map This commit replaces the ue2::unordered_{set,map} types with their STL versions, with some new hashing utilities in util/hash.h. The new types ue2_unordered_set and ue2_unordered_map default to using the ue2_hasher. The header util/ue2_containers.h has been removed, and the flat_set/map containers moved to util/flat_containers.h. --- CMakeLists.txt | 3 +- src/fdr/fdr_compile.cpp | 9 +- src/fdr/fdr_engine_description.h | 1 - src/fdr/teddy_compile.cpp | 1 + src/nfa/accel_dfa_build_strat.cpp | 3 +- src/nfa/accel_dump.cpp | 2 + src/nfa/accelcompile.h | 2 +- src/nfa/castlecompile.cpp | 4 +- src/nfa/castlecompile.h | 7 +- src/nfa/dfa_min.cpp | 2 +- src/nfa/goughcompile.cpp | 2 +- src/nfa/goughcompile.h | 2 +- src/nfa/goughcompile_internal.h | 2 +- src/nfa/goughcompile_reg.cpp | 6 +- src/nfa/limex_compile.cpp | 78 ++++----- src/nfa/limex_compile.h | 15 +- src/nfa/mcclellancompile.cpp | 2 +- src/nfa/mcclellancompile.h | 1 - src/nfa/mcclellancompile_util.cpp | 13 +- src/nfa/mcsheng_compile.cpp | 10 +- src/nfa/rdfa.h | 4 +- src/nfa/rdfa_merge.cpp | 5 +- src/nfa/shengcompile.h | 5 +- src/nfa/shufticompile.cpp | 4 +- src/nfa/shufticompile.h | 4 +- src/nfa/trufflecompile.cpp | 7 +- src/nfagraph/ng.h | 1 - src/nfagraph/ng_calc_components.cpp | 6 +- src/nfagraph/ng_cyclic_redundancy.cpp | 2 +- src/nfagraph/ng_dominators.cpp | 1 - src/nfagraph/ng_dominators.h | 11 +- src/nfagraph/ng_dump.cpp | 8 +- src/nfagraph/ng_dump.h | 7 +- src/nfagraph/ng_edge_redundancy.cpp | 2 +- src/nfagraph/ng_equivalence.cpp | 18 +-- src/nfagraph/ng_execute.h | 4 +- src/nfagraph/ng_haig.cpp | 7 +- src/nfagraph/ng_holder.h | 4 +- src/nfagraph/ng_is_equal.cpp | 14 +- src/nfagraph/ng_limex.cpp | 20 +-- src/nfagraph/ng_limex_accel.h | 2 +- src/nfagraph/ng_literal_analysis.cpp | 2 +- src/nfagraph/ng_literal_component.cpp | 4 +- src/nfagraph/ng_mcclellan.cpp | 5 +- src/nfagraph/ng_mcclellan_internal.h | 4 +- src/nfagraph/ng_misc_opt.cpp | 2 +- src/nfagraph/ng_prefilter.cpp | 11 +- src/nfagraph/ng_prune.cpp | 2 +- src/nfagraph/ng_redundancy.cpp | 9 +- src/nfagraph/ng_region.cpp | 2 +- src/nfagraph/ng_region.h | 20 +-- src/nfagraph/ng_region_redundancy.cpp | 10 +- src/nfagraph/ng_repeat.cpp | 130 +++++++-------- src/nfagraph/ng_repeat.h | 6 +- src/nfagraph/ng_restructuring.cpp | 6 +- src/nfagraph/ng_restructuring.h | 12 +- src/nfagraph/ng_revacc.cpp | 4 +- src/nfagraph/ng_som.cpp | 67 ++++---- src/nfagraph/ng_som_util.cpp | 12 +- src/nfagraph/ng_som_util.h | 8 +- src/nfagraph/ng_split.cpp | 28 ++-- src/nfagraph/ng_split.h | 20 +-- src/nfagraph/ng_squash.cpp | 19 ++- src/nfagraph/ng_squash.h | 7 +- src/nfagraph/ng_undirected.h | 8 +- src/nfagraph/ng_util.cpp | 17 +- src/nfagraph/ng_util.h | 19 +-- src/nfagraph/ng_violet.cpp | 12 +- src/parser/Parser.rl | 2 +- src/parser/buildstate.cpp | 8 +- src/parser/check_refs.cpp | 4 +- src/parser/check_refs.h | 16 +- src/rose/rose_build.h | 12 +- src/rose/rose_build_add.cpp | 2 +- src/rose/rose_build_anchored.cpp | 22 ++- src/rose/rose_build_bytecode.cpp | 22 +-- src/rose/rose_build_castle.cpp | 9 +- src/rose/rose_build_compile.cpp | 4 +- src/rose/rose_build_convert.cpp | 5 +- src/rose/rose_build_engine_blob.h | 14 +- src/rose/rose_build_exclusive.cpp | 26 +-- src/rose/rose_build_groups.h | 7 +- src/rose/rose_build_impl.h | 51 +++--- src/rose/rose_build_infix.cpp | 18 ++- src/rose/rose_build_instructions.h | 149 +++++++++--------- src/rose/rose_build_lookaround.cpp | 10 +- src/rose/rose_build_lookaround.h | 20 ++- src/rose/rose_build_merge.cpp | 28 ++-- src/rose/rose_build_misc.cpp | 28 +--- src/rose/rose_build_program.cpp | 11 +- src/rose/rose_build_program.h | 20 +-- src/rose/rose_build_role_aliasing.cpp | 24 +-- src/rose/rose_graph.h | 2 +- src/rose/rose_in_graph.h | 2 +- src/rose/rose_in_util.cpp | 3 +- src/som/slot_manager.cpp | 18 +-- src/som/slot_manager.h | 4 +- src/som/slot_manager_internal.h | 15 +- src/util/accel_scheme.h | 6 +- src/util/bitfield.h | 23 +-- src/util/charreach.h | 19 ++- src/util/clique.cpp | 3 +- src/util/depth.h | 26 ++- .../{ue2_containers.h => flat_containers.h} | 42 ++--- src/util/graph.h | 10 +- src/util/hash.h | 140 ++++++++++++++-- src/util/hash_dynamic_bitset.h | 5 +- src/util/multibit_build.h | 14 +- src/util/partitioned_set.h | 2 +- src/util/report.h | 22 ++- src/util/report_manager.h | 10 +- src/util/ue2_graph.h | 41 +++-- src/util/ue2string.cpp | 7 +- src/util/ue2string.h | 26 ++- src/util/unordered.h | 53 +++++++ unit/internal/bitfield.cpp | 9 +- unit/internal/depth.cpp | 6 +- unit/internal/flat_map.cpp | 9 +- unit/internal/flat_set.cpp | 9 +- unit/internal/nfagraph_util.cpp | 18 +-- unit/internal/rose_build_merge.cpp | 10 +- util/ng_corpus_generator.cpp | 4 +- util/ng_find_matches.cpp | 3 +- 123 files changed, 1048 insertions(+), 772 deletions(-) rename src/util/{ue2_containers.h => flat_containers.h} (96%) create mode 100644 src/util/unordered.h diff --git a/CMakeLists.txt b/CMakeLists.txt index c51d6133..9aa30819 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -997,6 +997,7 @@ SET (hs_SRCS src/util/dump_mask.h src/util/fatbit_build.cpp src/util/fatbit_build.h + src/util/flat_containers.h src/util/graph.h src/util/graph_range.h src/util/graph_small_color_map.h @@ -1019,7 +1020,6 @@ SET (hs_SRCS src/util/small_vector.h src/util/target_info.cpp src/util/target_info.h - src/util/ue2_containers.h src/util/ue2_graph.h src/util/ue2string.cpp src/util/ue2string.h @@ -1027,6 +1027,7 @@ SET (hs_SRCS src/util/unicode_def.h src/util/unicode_set.h src/util/uniform_ops.h + src/util/unordered.h src/util/verify_types.h ) diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index dc91010e..210729a7 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -48,7 +48,6 @@ #include "util/math.h" #include "util/noncopyable.h" #include "util/target_info.h" -#include "util/ue2_containers.h" #include "util/ue2string.h" #include "util/verify_types.h" @@ -64,6 +63,8 @@ #include #include #include +#include +#include #include #include @@ -459,7 +460,7 @@ bool getMultiEntriesAtPosition(const FDREngineDescription &eng, const vector &vl, const vector &lits, SuffixPositionInString pos, - std::map > &m2) { + map> &m2) { assert(eng.bits < 32); u32 distance = 0; @@ -530,7 +531,7 @@ void FDRCompiler::setupTab() { SuffixPositionInString pLimit = eng.getBucketWidth(b); for (SuffixPositionInString pos = 0; pos < pLimit; pos++) { u32 bit = eng.getSchemeBit(b, pos); - map> m2; + map> m2; bool done = getMultiEntriesAtPosition(eng, vl, lits, pos, m2); if (done) { clearbit(&defaultMask[0], bit); @@ -538,7 +539,7 @@ void FDRCompiler::setupTab() { } for (const auto &elem : m2) { u32 dc = elem.first; - const ue2::unordered_set &mskSet = elem.second; + const unordered_set &mskSet = elem.second; u32 v = ~dc; do { u32 b2 = v & dc; diff --git a/src/fdr/fdr_engine_description.h b/src/fdr/fdr_engine_description.h index 09c5ce86..1c464fe3 100644 --- a/src/fdr/fdr_engine_description.h +++ b/src/fdr/fdr_engine_description.h @@ -30,7 +30,6 @@ #define FDR_ENGINE_DESCRIPTION_H #include "engine_description.h" -#include "util/ue2_containers.h" #include #include diff --git a/src/fdr/teddy_compile.cpp b/src/fdr/teddy_compile.cpp index bb02f759..98736134 100644 --- a/src/fdr/teddy_compile.cpp +++ b/src/fdr/teddy_compile.cpp @@ -49,6 +49,7 @@ #include "util/make_unique.h" #include "util/noncopyable.h" #include "util/popcount.h" +#include "util/small_vector.h" #include "util/target_info.h" #include "util/verify_types.h" diff --git a/src/nfa/accel_dfa_build_strat.cpp b/src/nfa/accel_dfa_build_strat.cpp index 7c56ba72..928e078e 100644 --- a/src/nfa/accel_dfa_build_strat.cpp +++ b/src/nfa/accel_dfa_build_strat.cpp @@ -41,6 +41,7 @@ #include "util/verify_types.h" #include +#include #include #define PATHS_LIMIT 500 @@ -254,7 +255,7 @@ dstate_id_t get_sds_or_proxy(const raw_dfa &raw) { u16 top_remap = raw.alpha_remap[TOP]; - ue2::unordered_set seen; + std::unordered_set seen; while (true) { seen.insert(s); DEBUG_PRINTF("basis %hu\n", s); diff --git a/src/nfa/accel_dump.cpp b/src/nfa/accel_dump.cpp index 0d19fa8c..4c33b351 100644 --- a/src/nfa/accel_dump.cpp +++ b/src/nfa/accel_dump.cpp @@ -44,6 +44,8 @@ #include "util/simd_types.h" #include +#include +#include #include #ifndef DUMP_SUPPORT diff --git a/src/nfa/accelcompile.h b/src/nfa/accelcompile.h index 9bd4ff18..d0b3cdc7 100644 --- a/src/nfa/accelcompile.h +++ b/src/nfa/accelcompile.h @@ -31,7 +31,7 @@ #include "ue2common.h" #include "util/charreach.h" -#include "util/ue2_containers.h" +#include "util/flat_containers.h" union AccelAux; diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index 40fbc18c..3505e08a 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -48,11 +48,11 @@ #include "util/compile_context.h" #include "util/container.h" #include "util/dump_charclass.h" +#include "util/flat_containers.h" #include "util/graph.h" #include "util/make_unique.h" #include "util/multibit_build.h" #include "util/report_manager.h" -#include "util/ue2_containers.h" #include "util/verify_types.h" #include "grey.h" @@ -153,7 +153,7 @@ static void getNeighborInfo(const CliqueGraph &g, vector &neighbor, const CliqueVertex &cv, const set &group) { u32 id = g[cv].stateId; - ue2::unordered_set neighborId; + unordered_set neighborId; // find neighbors for cv for (const auto &v : adjacent_vertices_range(cv, g)) { diff --git a/src/nfa/castlecompile.h b/src/nfa/castlecompile.h index 9f44692d..aa4ed354 100644 --- a/src/nfa/castlecompile.h +++ b/src/nfa/castlecompile.h @@ -39,11 +39,12 @@ #include "nfagraph/ng_repeat.h" #include "util/bytecode_ptr.h" #include "util/depth.h" -#include "util/ue2_containers.h" +#include "util/flat_containers.h" #include #include #include +#include #include struct NFA; @@ -89,7 +90,7 @@ struct CastleProto { std::map repeats; /** \brief Mapping from report to associated tops. */ - ue2::unordered_map> report_map; + std::unordered_map> report_map; /** * \brief Next top id to use. Repeats may be removed without top remapping, @@ -155,7 +156,7 @@ bool is_equal(const CastleProto &c1, const CastleProto &c2); * of the reports in the given set. */ bool requiresDedupe(const CastleProto &proto, - const ue2::flat_set &reports); + const flat_set &reports); /** * \brief Build an NGHolder from a CastleProto. diff --git a/src/nfa/dfa_min.cpp b/src/nfa/dfa_min.cpp index c97ca5fb..1a07e8a7 100644 --- a/src/nfa/dfa_min.cpp +++ b/src/nfa/dfa_min.cpp @@ -63,9 +63,9 @@ #include "rdfa.h" #include "ue2common.h" #include "util/container.h" +#include "util/flat_containers.h" #include "util/noncopyable.h" #include "util/partitioned_set.h" -#include "util/ue2_containers.h" #include #include diff --git a/src/nfa/goughcompile.cpp b/src/nfa/goughcompile.cpp index 58b05d3d..ba7f2718 100644 --- a/src/nfa/goughcompile.cpp +++ b/src/nfa/goughcompile.cpp @@ -37,11 +37,11 @@ #include "nfa_internal.h" #include "util/compile_context.h" #include "util/container.h" +#include "util/flat_containers.h" #include "util/graph_range.h" #include "util/make_unique.h" #include "util/order_check.h" #include "util/report_manager.h" -#include "util/ue2_containers.h" #include "util/verify_types.h" #include "ue2common.h" diff --git a/src/nfa/goughcompile.h b/src/nfa/goughcompile.h index 72469f3c..00da1891 100644 --- a/src/nfa/goughcompile.h +++ b/src/nfa/goughcompile.h @@ -33,7 +33,7 @@ #include "nfa_kind.h" #include "ue2common.h" #include "util/bytecode_ptr.h" -#include "util/ue2_containers.h" +#include "util/flat_containers.h" #include "util/order_check.h" #include diff --git a/src/nfa/goughcompile_internal.h b/src/nfa/goughcompile_internal.h index a6ba0d1b..9de88c77 100644 --- a/src/nfa/goughcompile_internal.h +++ b/src/nfa/goughcompile_internal.h @@ -33,9 +33,9 @@ #include "mcclellancompile.h" #include "ue2common.h" #include "util/charreach.h" +#include "util/flat_containers.h" #include "util/noncopyable.h" #include "util/order_check.h" -#include "util/ue2_containers.h" #include #include diff --git a/src/nfa/goughcompile_reg.cpp b/src/nfa/goughcompile_reg.cpp index a9370450..48e515b9 100644 --- a/src/nfa/goughcompile_reg.cpp +++ b/src/nfa/goughcompile_reg.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,10 +32,10 @@ #include "gough_internal.h" #include "grey.h" #include "util/container.h" +#include "util/flat_containers.h" #include "util/graph.h" #include "util/graph_range.h" #include "util/order_check.h" -#include "util/ue2_containers.h" #include "ue2common.h" @@ -235,7 +235,7 @@ void handle_pending_vertices(GoughSSAVar *def, const GoughGraph &g, if (contains(aux.containing_v, def)) { def_v = aux.containing_v.at(def); } - ue2::unordered_set done; + unordered_set done; while (!pending_vertex.empty()) { GoughVertex current = *pending_vertex.begin(); pending_vertex.erase(current); diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 5e18b800..94d9961b 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -53,12 +53,13 @@ #include "util/charreach.h" #include "util/compile_context.h" #include "util/container.h" +#include "util/flat_containers.h" #include "util/graph.h" #include "util/graph_range.h" #include "util/graph_small_color_map.h" #include "util/order_check.h" +#include "util/unordered.h" #include "util/verify_types.h" -#include "util/ue2_containers.h" #include #include @@ -97,16 +98,16 @@ struct precalcAccel { }; struct limex_accel_info { - ue2::unordered_set accelerable; + unordered_set accelerable; map precalc; - ue2::unordered_map> friends; - ue2::unordered_map accel_map; + unordered_map> friends; + unordered_map accel_map; }; static map reindexByStateId(const map &in, const NGHolder &g, - const ue2::unordered_map &state_ids, + const unordered_map &state_ids, const u32 num_states) { map out; @@ -138,7 +139,7 @@ reindexByStateId(const map &in, const NGHolder &g, struct build_info { build_info(NGHolder &hi, - const ue2::unordered_map &states_in, + const unordered_map &states_in, const vector &ri, const map &rsmi, const map &smi, @@ -161,7 +162,7 @@ struct build_info { } NGHolder &h; - const ue2::unordered_map &state_ids; + const unordered_map &state_ids; const vector &repeats; // Squash maps; state sets are indexed by state_id. @@ -169,7 +170,7 @@ struct build_info { map squashMap; const map> &tops; - ue2::unordered_set tugs; + unordered_set tugs; map br_cyclic; const set &zombies; bool do_accel; @@ -479,7 +480,7 @@ bool allow_wide_accel(const vector &vv, const NGHolder &g, static void nfaFindAccelSchemes(const NGHolder &g, const map &br_cyclic, - ue2::unordered_map *out) { + unordered_map *out) { vector refined_cr = reduced_cr(g, br_cyclic); NFAVertex sds_or_proxy = get_sds_or_proxy(g); @@ -504,8 +505,8 @@ void nfaFindAccelSchemes(const NGHolder &g, } struct fas_visitor : public boost::default_bfs_visitor { - fas_visitor(const ue2::unordered_map &am_in, - ue2::unordered_map *out_in) + fas_visitor(const unordered_map &am_in, + unordered_map *out_in) : accel_map(am_in), out(out_in) {} void discover_vertex(NFAVertex v, const NGHolder &) { @@ -516,13 +517,13 @@ struct fas_visitor : public boost::default_bfs_visitor { throw this; /* done */ } } - const ue2::unordered_map &accel_map; - ue2::unordered_map *out; + const unordered_map &accel_map; + unordered_map *out; }; static void filterAccelStates(NGHolder &g, const map> &tops, - ue2::unordered_map *accel_map) { + unordered_map *accel_map) { /* We want the NFA_MAX_ACCEL_STATES best acceleration states, everything * else should be ditched. We use a simple BFS to choose accel states near * the start. */ @@ -542,7 +543,7 @@ void filterAccelStates(NGHolder &g, const map> &tops, tempEdges.push_back(e); // Remove edge later. } - ue2::unordered_map out; + unordered_map out; try { boost::breadth_first_search(g, g.start, @@ -982,16 +983,18 @@ u32 addSquashMask(const build_info &args, const NFAVertex &v, return idx; } +using ReportListCache = ue2_unordered_map, u32>; + static u32 addReports(const flat_set &r, vector &reports, - unordered_map, u32> &reportListCache) { + ReportListCache &reports_cache) { assert(!r.empty()); vector my_reports(begin(r), end(r)); my_reports.push_back(MO_INVALID_IDX); // sentinel - auto cache_it = reportListCache.find(my_reports); - if (cache_it != end(reportListCache)) { + auto cache_it = reports_cache.find(my_reports); + if (cache_it != end(reports_cache)) { u32 offset = cache_it->second; DEBUG_PRINTF("reusing cached report list at %u\n", offset); return offset; @@ -1007,13 +1010,12 @@ u32 addReports(const flat_set &r, vector &reports, u32 offset = verify_u32(reports.size()); insert(&reports, reports.end(), my_reports); - reportListCache.emplace(move(my_reports), offset); + reports_cache.emplace(move(my_reports), offset); return offset; } static -void buildAcceptsList(const build_info &args, - unordered_map, u32> &reports_cache, +void buildAcceptsList(const build_info &args, ReportListCache &reports_cache, vector &verts, vector &accepts, vector &reports, vector &squash) { if (verts.empty()) { @@ -1051,8 +1053,7 @@ void buildAcceptsList(const build_info &args, } static -void buildAccepts(const build_info &args, - unordered_map, u32> &reports_cache, +void buildAccepts(const build_info &args, ReportListCache &reports_cache, NFAStateSet &acceptMask, NFAStateSet &acceptEodMask, vector &accepts, vector &acceptsEod, vector &reports, vector &squash) { @@ -1119,7 +1120,7 @@ u32 uncompressedStateSize(u32 num_states) { static u32 compressedStateSize(const NGHolder &h, const NFAStateSet &maskedStates, - const ue2::unordered_map &state_ids) { + const unordered_map &state_ids) { // Shrink state requirement to enough to fit the compressed largest reach. vector allreach(N_CHARS, 0); @@ -1190,7 +1191,7 @@ bool hasSquashableInitDs(const build_info &args) { static bool hasInitDsStates(const NGHolder &h, - const ue2::unordered_map &state_ids) { + const unordered_map &state_ids) { if (state_ids.at(h.startDs) != NO_STATE) { return true; } @@ -1358,17 +1359,16 @@ struct ExceptionProto { }; static -u32 buildExceptionMap(const build_info &args, - unordered_map, u32> &reports_cache, - const ue2::unordered_set &exceptional, +u32 buildExceptionMap(const build_info &args, ReportListCache &reports_cache, + const unordered_set &exceptional, map> &exceptionMap, vector &reportList) { const NGHolder &h = args.h; const u32 num_states = args.num_states; u32 exceptionCount = 0; - ue2::unordered_map pos_trigger; - ue2::unordered_map tug_trigger; + unordered_map pos_trigger; + unordered_map tug_trigger; for (u32 i = 0; i < args.repeats.size(); i++) { const BoundedRepeatData &br = args.repeats[i]; @@ -1893,7 +1893,7 @@ struct Factory { static void findExceptionalTransitions(const build_info &args, - ue2::unordered_set &exceptional, + unordered_set &exceptional, u32 maxShift) { const NGHolder &h = args.h; @@ -2168,9 +2168,9 @@ struct Factory { // We track report lists that have already been written into the global // list in case we can reuse them. - unordered_map, u32> reports_cache; + ReportListCache reports_cache; - ue2::unordered_set exceptional; + unordered_set exceptional; u32 shiftCount = findBestNumOfVarShifts(args); assert(shiftCount); u32 maxShift = findMaxVarShift(args, shiftCount); @@ -2374,10 +2374,10 @@ MAKE_LIMEX_TRAITS(512) // Some sanity tests, called by an assertion in generate(). static UNUSED bool isSane(const NGHolder &h, const map> &tops, - const ue2::unordered_map &state_ids, + const unordered_map &state_ids, u32 num_states) { - ue2::unordered_set seen; - ue2::unordered_set top_starts; + unordered_set seen; + unordered_set top_starts; for (const auto &vv : tops | map_values) { insert(&top_starts, vv); } @@ -2424,7 +2424,7 @@ bool isSane(const NGHolder &h, const map> &tops, #endif // NDEBUG static -u32 max_state(const ue2::unordered_map &state_ids) { +u32 max_state(const unordered_map &state_ids) { u32 rv = 0; for (const auto &m : state_ids) { DEBUG_PRINTF("state %u\n", m.second); @@ -2437,7 +2437,7 @@ u32 max_state(const ue2::unordered_map &state_ids) { } bytecode_ptr generate(NGHolder &h, - const ue2::unordered_map &states, + const unordered_map &states, const vector &repeats, const map &reportSquashMap, const map &squashMap, @@ -2507,7 +2507,7 @@ bytecode_ptr generate(NGHolder &h, } u32 countAccelStates(NGHolder &h, - const ue2::unordered_map &states, + const unordered_map &states, const vector &repeats, const map &reportSquashMap, const map &squashMap, diff --git a/src/nfa/limex_compile.h b/src/nfa/limex_compile.h index a12ae9f6..3b819739 100644 --- a/src/nfa/limex_compile.h +++ b/src/nfa/limex_compile.h @@ -34,15 +34,16 @@ #ifndef LIMEX_COMPILE_H #define LIMEX_COMPILE_H -#include -#include -#include - #include "nfagraph/ng_holder.h" #include "nfagraph/ng_squash.h" // for NFAStateSet #include "ue2common.h" #include "util/bytecode_ptr.h" -#include "util/ue2_containers.h" + +#include +#include +#include +#include +#include struct NFA; @@ -69,7 +70,7 @@ struct CompileContext; * graph. */ bytecode_ptr generate(NGHolder &g, - const ue2::unordered_map &states, + const std::unordered_map &states, const std::vector &repeats, const std::map &reportSquashMap, const std::map &squashMap, @@ -87,7 +88,7 @@ bytecode_ptr generate(NGHolder &g, * implementable. */ u32 countAccelStates(NGHolder &h, - const ue2::unordered_map &states, + const std::unordered_map &states, const std::vector &repeats, const std::map &reportSquashMap, const std::map &squashMap, diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 93746777..8f73d077 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -46,7 +46,7 @@ #include "util/make_unique.h" #include "util/order_check.h" #include "util/report_manager.h" -#include "util/ue2_containers.h" +#include "util/flat_containers.h" #include "util/unaligned.h" #include "util/verify_types.h" diff --git a/src/nfa/mcclellancompile.h b/src/nfa/mcclellancompile.h index baf72d9c..ce63fbbf 100644 --- a/src/nfa/mcclellancompile.h +++ b/src/nfa/mcclellancompile.h @@ -33,7 +33,6 @@ #include "rdfa.h" #include "ue2common.h" #include "util/bytecode_ptr.h" -#include "util/ue2_containers.h" #include #include diff --git a/src/nfa/mcclellancompile_util.cpp b/src/nfa/mcclellancompile_util.cpp index 317c5889..977cf3d5 100644 --- a/src/nfa/mcclellancompile_util.cpp +++ b/src/nfa/mcclellancompile_util.cpp @@ -30,12 +30,11 @@ #include "rdfa.h" #include "util/container.h" -#include "util/ue2_containers.h" +#include "util/hash.h" #include "ue2common.h" #include - -#include +#include using namespace std; @@ -232,22 +231,18 @@ bool has_non_eod_accepts(const raw_dfa &rdfa) { } size_t hash_dfa_no_reports(const raw_dfa &rdfa) { - using boost::hash_combine; - using boost::hash_range; - size_t v = 0; hash_combine(v, rdfa.alpha_size); - hash_combine(v, hash_range(begin(rdfa.alpha_remap), end(rdfa.alpha_remap))); + hash_combine(v, rdfa.alpha_remap); for (const auto &ds : rdfa.states) { - hash_combine(v, hash_range(begin(ds.next), end(ds.next))); + hash_combine(v, ds.next); } return v; } size_t hash_dfa(const raw_dfa &rdfa) { - using boost::hash_combine; size_t v = 0; hash_combine(v, hash_dfa_no_reports(rdfa)); hash_combine(v, all_reports(rdfa)); diff --git a/src/nfa/mcsheng_compile.cpp b/src/nfa/mcsheng_compile.cpp index 2049fee0..728f03be 100644 --- a/src/nfa/mcsheng_compile.cpp +++ b/src/nfa/mcsheng_compile.cpp @@ -45,13 +45,14 @@ #include "util/compare.h" #include "util/compile_context.h" #include "util/container.h" +#include "util/flat_containers.h" #include "util/graph.h" #include "util/graph_range.h" #include "util/make_unique.h" #include "util/order_check.h" #include "util/report_manager.h" -#include "util/ue2_containers.h" #include "util/unaligned.h" +#include "util/unordered.h" #include "util/verify_types.h" #include @@ -383,6 +384,8 @@ CharReach get_edge_reach(dstate_id_t u, dstate_id_t v, const dfa_info &info) { #define MAX_SHENG_STATES 16 #define MAX_SHENG_LEAKINESS 0.05 +using LeakinessCache = ue2_unordered_map, double>; + /** * Returns the proportion of strings of length 'depth' which will leave the * sheng region when starting at state 'u'. @@ -390,8 +393,7 @@ CharReach get_edge_reach(dstate_id_t u, dstate_id_t v, const dfa_info &info) { static double leakiness(const RdfaGraph &g, dfa_info &info, const flat_set &sheng_states, RdfaVertex u, - u32 depth, - unordered_map, double> &cache) { + u32 depth, LeakinessCache &cache) { double rv = 0; if (contains(cache, make_pair(u, depth))) { return cache[make_pair(u, depth)]; @@ -426,7 +428,7 @@ double leakiness(const RdfaGraph &g, dfa_info &info, static double leakiness(const RdfaGraph &g, dfa_info &info, const flat_set &sheng_states, RdfaVertex u) { - unordered_map, double> cache; + LeakinessCache cache; double rv = leakiness(g, info, sheng_states, u, 8, cache); return rv; } diff --git a/src/nfa/rdfa.h b/src/nfa/rdfa.h index fc60f177..0936fb15 100644 --- a/src/nfa/rdfa.h +++ b/src/nfa/rdfa.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,7 +32,7 @@ #include "nfa_kind.h" #include "ue2common.h" -#include "util/ue2_containers.h" +#include "util/flat_containers.h" #include #include diff --git a/src/nfa/rdfa_merge.cpp b/src/nfa/rdfa_merge.cpp index 0905dc08..2ad87123 100644 --- a/src/nfa/rdfa_merge.cpp +++ b/src/nfa/rdfa_merge.cpp @@ -36,9 +36,10 @@ #include "nfagraph/ng_mcclellan_internal.h" #include "util/container.h" #include "util/determinise.h" +#include "util/flat_containers.h" #include "util/make_unique.h" #include "util/report_manager.h" -#include "util/ue2_containers.h" +#include "util/unordered.h" #include #include @@ -54,7 +55,7 @@ namespace { class Automaton_Merge { public: using StateSet = vector; - using StateMap = unordered_map; + using StateMap = ue2_unordered_map; Automaton_Merge(const raw_dfa *rdfa1, const raw_dfa *rdfa2, const ReportManager *rm_in, const Grey &grey_in) diff --git a/src/nfa/shengcompile.h b/src/nfa/shengcompile.h index 9885cd16..2fe1e356 100644 --- a/src/nfa/shengcompile.h +++ b/src/nfa/shengcompile.h @@ -33,7 +33,10 @@ #include "rdfa.h" #include "util/bytecode_ptr.h" #include "util/charreach.h" -#include "util/ue2_containers.h" +#include "util/flat_containers.h" + +#include +#include struct NFA; diff --git a/src/nfa/shufticompile.cpp b/src/nfa/shufticompile.cpp index 12a94b7b..f712ef94 100644 --- a/src/nfa/shufticompile.cpp +++ b/src/nfa/shufticompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,7 +33,7 @@ #include "ue2common.h" #include "util/charreach.h" #include "util/container.h" -#include "util/ue2_containers.h" +#include "util/flat_containers.h" #include #include diff --git a/src/nfa/shufticompile.h b/src/nfa/shufticompile.h index a72904e0..59b9c38d 100644 --- a/src/nfa/shufticompile.h +++ b/src/nfa/shufticompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,7 +35,7 @@ #include "ue2common.h" #include "util/charreach.h" -#include "util/ue2_containers.h" +#include "util/flat_containers.h" #include diff --git a/src/nfa/trufflecompile.cpp b/src/nfa/trufflecompile.cpp index 9442d046..f19de0ee 100644 --- a/src/nfa/trufflecompile.cpp +++ b/src/nfa/trufflecompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,12 +32,15 @@ * truffle is always able to represent an entire character class, providing a * backstop to other acceleration engines. */ + #include "trufflecompile.h" + #include "ue2common.h" #include "util/charreach.h" +#include "util/dump_mask.h" #include "util/simd_types.h" -#include "util/dump_mask.h" +#include using namespace std; diff --git a/src/nfagraph/ng.h b/src/nfagraph/ng.h index a5a5c235..a1304583 100644 --- a/src/nfagraph/ng.h +++ b/src/nfagraph/ng.h @@ -44,7 +44,6 @@ #include "util/graph.h" #include "util/noncopyable.h" #include "util/report_manager.h" -#include "util/ue2_containers.h" #include #include diff --git a/src/nfagraph/ng_calc_components.cpp b/src/nfagraph/ng_calc_components.cpp index 7ac57dab..65574b50 100644 --- a/src/nfagraph/ng_calc_components.cpp +++ b/src/nfagraph/ng_calc_components.cpp @@ -310,11 +310,11 @@ void splitIntoComponents(unique_ptr g, return; } - ue2::unordered_map old2new; + unordered_map old2new; auto ug = createUnGraph(*g, true, true, old2new); // Construct reverse mapping. - ue2::unordered_map new2old; + unordered_map new2old; for (const auto &m : old2new) { new2old.emplace(m.second, m.first); } @@ -356,7 +356,7 @@ void splitIntoComponents(unique_ptr g, DEBUG_PRINTF("vertex %zu is in comp %u\n", (*g)[v].index, c); } - ue2::unordered_map v_map; // temp map for fillHolder + unordered_map v_map; // temp map for fillHolder for (auto &vv : verts) { // Shells are in every component. vv.insert(vv.end(), begin(head_shell), end(head_shell)); diff --git a/src/nfagraph/ng_cyclic_redundancy.cpp b/src/nfagraph/ng_cyclic_redundancy.cpp index 80980a66..c8d34687 100644 --- a/src/nfagraph/ng_cyclic_redundancy.cpp +++ b/src/nfagraph/ng_cyclic_redundancy.cpp @@ -62,9 +62,9 @@ #include "ng_prune.h" #include "ng_util.h" #include "util/container.h" +#include "util/flat_containers.h" #include "util/graph_range.h" #include "util/graph_small_color_map.h" -#include "util/ue2_containers.h" #include #include diff --git a/src/nfagraph/ng_dominators.cpp b/src/nfagraph/ng_dominators.cpp index 50536b76..d6a064d1 100644 --- a/src/nfagraph/ng_dominators.cpp +++ b/src/nfagraph/ng_dominators.cpp @@ -36,7 +36,6 @@ #include "ue2common.h" #include "ng_holder.h" #include "ng_util.h" -#include "util/ue2_containers.h" #include // locally patched version #include diff --git a/src/nfagraph/ng_dominators.h b/src/nfagraph/ng_dominators.h index 81b7e037..f505b7e4 100644 --- a/src/nfagraph/ng_dominators.h +++ b/src/nfagraph/ng_dominators.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,15 +36,14 @@ #define NG_DOMINATORS_H #include "ng_holder.h" -#include "util/ue2_containers.h" + +#include namespace ue2 { -class NGHolder; +std::unordered_map findDominators(const NGHolder &g); -ue2::unordered_map findDominators(const NGHolder &g); - -ue2::unordered_map findPostDominators(const NGHolder &g); +std::unordered_map findPostDominators(const NGHolder &g); } // namespace ue2 diff --git a/src/nfagraph/ng_dump.cpp b/src/nfagraph/ng_dump.cpp index 9624f762..8777a750 100644 --- a/src/nfagraph/ng_dump.cpp +++ b/src/nfagraph/ng_dump.cpp @@ -176,7 +176,7 @@ public: : g(g_in), rm(&rm_in) {} NFAWriter(const GraphT &g_in, - const ue2::unordered_map ®ion_map_in) + const unordered_map ®ion_map_in) : g(g_in), region_map(®ion_map_in) {} void operator()(ostream& os, const VertexT& v) const { @@ -254,7 +254,7 @@ public: private: const GraphT &g; const ReportManager *rm = nullptr; - const ue2::unordered_map *region_map = nullptr; + const unordered_map *region_map = nullptr; }; } @@ -278,7 +278,7 @@ void dumpGraphImpl(const char *name, const GraphT &g, const ReportManager &rm) { template void dumpGraphImpl(const char *name, const GraphT &g, - const ue2::unordered_map ®ion_map) { + const unordered_map ®ion_map) { typedef typename boost::graph_traits::vertex_descriptor VertexT; typedef typename boost::graph_traits::edge_descriptor EdgeT; ofstream os(name); @@ -332,7 +332,7 @@ void dumpHolderImpl(const NGHolder &h, unsigned int stageNumber, } void dumpHolderImpl(const NGHolder &h, - const ue2::unordered_map ®ion_map, + const unordered_map ®ion_map, unsigned int stageNumber, const char *stageName, const Grey &grey) { if (grey.dumpFlags & Grey::DUMP_INT_GRAPH) { diff --git a/src/nfagraph/ng_dump.h b/src/nfagraph/ng_dump.h index 077f07ce..3e12d1d2 100644 --- a/src/nfagraph/ng_dump.h +++ b/src/nfagraph/ng_dump.h @@ -36,7 +36,8 @@ #include "grey.h" #include "ng_holder.h" // for graph types #include "ue2common.h" -#include "util/ue2_containers.h" + +#include #ifdef DUMP_SUPPORT #include @@ -75,7 +76,7 @@ void dumpHolderImpl(const NGHolder &h, unsigned int stageNumber, // Variant that takes a region map as well. void dumpHolderImpl(const NGHolder &h, - const ue2::unordered_map ®ion_map, + const std::unordered_map ®ion_map, unsigned int stageNumber, const char *stageName, const Grey &grey); @@ -123,7 +124,7 @@ void dumpHolder(UNUSED const NGHolder &h, UNUSED unsigned int stageNumber, UNUSED static inline void dumpHolder(UNUSED const NGHolder &h, - UNUSED const ue2::unordered_map ®ion_map, + UNUSED const std::unordered_map ®ion_map, UNUSED unsigned int stageNumber, UNUSED const char *name, UNUSED const Grey &grey) { #ifdef DUMP_SUPPORT diff --git a/src/nfagraph/ng_edge_redundancy.cpp b/src/nfagraph/ng_edge_redundancy.cpp index 1578d2e4..b8354bd4 100644 --- a/src/nfagraph/ng_edge_redundancy.cpp +++ b/src/nfagraph/ng_edge_redundancy.cpp @@ -38,8 +38,8 @@ #include "parser/position.h" #include "util/compile_context.h" #include "util/container.h" +#include "util/flat_containers.h" #include "util/graph_range.h" -#include "util/ue2_containers.h" #include #include diff --git a/src/nfagraph/ng_equivalence.cpp b/src/nfagraph/ng_equivalence.cpp index 438e5ea8..a42a0ac7 100644 --- a/src/nfagraph/ng_equivalence.cpp +++ b/src/nfagraph/ng_equivalence.cpp @@ -37,9 +37,10 @@ #include "ng_holder.h" #include "ng_util.h" #include "util/compile_context.h" +#include "util/flat_containers.h" #include "util/graph_range.h" #include "util/make_unique.h" -#include "util/ue2_containers.h" +#include "util/unordered.h" #include #include @@ -121,16 +122,9 @@ public: vertex_flags == b.vertex_flags && rs == b.rs; } - friend size_t hash_value(const ClassInfo &c) { - size_t val = 0; - boost::hash_combine(val, c.rs); - boost::hash_combine(val, c.vertex_flags); - boost::hash_combine(val, c.cr); - boost::hash_combine(val, c.adjacent_cr); - boost::hash_combine(val, c.node_type); - boost::hash_combine(val, c.depth.d1); - boost::hash_combine(val, c.depth.d2); - return val; + size_t hash() const { + return hash_all(rs, vertex_flags, cr, adjacent_cr, node_type, depth.d1, + depth.d2); } private: @@ -319,7 +313,7 @@ vector partitionGraph(vector> &infos, const size_t num_verts = infos.size(); vector classes; - unordered_map classinfomap; + ue2_unordered_map classinfomap; // assume we will have lots of classes, so we don't waste time resizing // these structures. diff --git a/src/nfagraph/ng_execute.h b/src/nfagraph/ng_execute.h index bdcfecfd..32f5520d 100644 --- a/src/nfagraph/ng_execute.h +++ b/src/nfagraph/ng_execute.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,7 +35,7 @@ #define NG_EXECUTE_H #include "ng_holder.h" -#include "util/ue2_containers.h" +#include "util/flat_containers.h" #include diff --git a/src/nfagraph/ng_haig.cpp b/src/nfagraph/ng_haig.cpp index 9582a1e8..992faf7c 100644 --- a/src/nfagraph/ng_haig.cpp +++ b/src/nfagraph/ng_haig.cpp @@ -40,11 +40,12 @@ #include "util/bitfield.h" #include "util/container.h" #include "util/determinise.h" +#include "util/flat_containers.h" #include "util/graph.h" #include "util/graph_range.h" #include "util/hash_dynamic_bitset.h" #include "util/make_unique.h" -#include "util/ue2_containers.h" +#include "util/unordered.h" #include #include @@ -258,7 +259,7 @@ public: struct Graph_Traits { using StateSet = bitfield; - using StateMap = ue2::unordered_map; + using StateMap = unordered_map; static StateSet init_states(UNUSED u32 num) { assert(num <= NFA_STATE_LIMIT); @@ -286,7 +287,7 @@ public: class Automaton_Haig_Merge { public: using StateSet = vector; - using StateMap = unordered_map; + using StateMap = ue2_unordered_map; explicit Automaton_Haig_Merge(const vector &in) : nfas(in.begin(), in.end()), dead(in.size()) { diff --git a/src/nfagraph/ng_holder.h b/src/nfagraph/ng_holder.h index fbb6ac52..f61c476a 100644 --- a/src/nfagraph/ng_holder.h +++ b/src/nfagraph/ng_holder.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,7 +40,7 @@ #include "ue2common.h" #include "nfa/nfa_kind.h" #include "util/charreach.h" -#include "util/ue2_containers.h" +#include "util/flat_containers.h" #include "util/ue2_graph.h" namespace ue2 { diff --git a/src/nfagraph/ng_is_equal.cpp b/src/nfagraph/ng_is_equal.cpp index 2df79f50..35a09d0e 100644 --- a/src/nfagraph/ng_is_equal.cpp +++ b/src/nfagraph/ng_is_equal.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,13 +39,9 @@ #include "ng_util.h" #include "ue2common.h" #include "util/container.h" +#include "util/flat_containers.h" #include "util/graph_range.h" #include "util/make_unique.h" -#include "util/ue2_containers.h" - -#include - -#include using namespace std; @@ -200,11 +196,11 @@ u64a hash_holder(const NGHolder &g) { size_t rv = 0; for (auto v : vertices_range(g)) { - boost::hash_combine(rv, g[v].index); - boost::hash_combine(rv, g[v].char_reach); + hash_combine(rv, g[v].index); + hash_combine(rv, g[v].char_reach); for (auto w : adjacent_vertices_range(v, g)) { - boost::hash_combine(rv, g[w].index); + hash_combine(rv, g[w].index); } } diff --git a/src/nfagraph/ng_limex.cpp b/src/nfagraph/ng_limex.cpp index 283bba22..1daec578 100644 --- a/src/nfagraph/ng_limex.cpp +++ b/src/nfagraph/ng_limex.cpp @@ -53,11 +53,13 @@ #include "util/container.h" #include "util/graph_range.h" #include "util/report_manager.h" -#include "util/ue2_containers.h" +#include "util/flat_containers.h" #include "util/verify_types.h" #include #include +#include +#include #include #include @@ -73,8 +75,8 @@ namespace ue2 { // Only used in assertions. static bool sanityCheckGraph(const NGHolder &g, - const ue2::unordered_map &state_ids) { - ue2::unordered_set seen_states; + const unordered_map &state_ids) { + unordered_set seen_states; for (auto v : vertices_range(g)) { // Non-specials should have non-empty reachability. @@ -468,7 +470,7 @@ void makeTopStates(NGHolder &g, map> &tops_out, static set findZombies(const NGHolder &h, const map &br_cyclic, - const ue2::unordered_map &state_ids, + const unordered_map &state_ids, const CompileContext &cc) { set zombies; if (!cc.grey.allowZombies) { @@ -516,7 +518,7 @@ set findZombies(const NGHolder &h, } static -void reverseStateOrdering(ue2::unordered_map &state_ids) { +void reverseStateOrdering(unordered_map &state_ids) { vector ordering; for (auto &e : state_ids) { if (e.second == NO_STATE) { @@ -569,7 +571,7 @@ prepareGraph(const NGHolder &h_in, const ReportManager *rm, const map &fixed_depth_tops, const map>> &triggers, bool impl_test_only, const CompileContext &cc, - ue2::unordered_map &state_ids, + unordered_map &state_ids, vector &repeats, map> &tops) { assert(is_triggered(h_in) || fixed_depth_tops.empty()); @@ -637,7 +639,7 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm, assert(rm); } - ue2::unordered_map state_ids; + unordered_map state_ids; vector repeats; map> tops; unique_ptr h @@ -785,7 +787,7 @@ u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm, * resultant NGHolder has <= NFA_MAX_STATES. If it does, we know we can * implement it as an NFA. */ - ue2::unordered_map state_ids; + unordered_map state_ids; vector repeats; map> tops; unique_ptr h @@ -832,7 +834,7 @@ u32 countAccelStates(const NGHolder &g, const ReportManager *rm, const map fixed_depth_tops; // empty const map>> triggers; // empty - ue2::unordered_map state_ids; + unordered_map state_ids; vector repeats; map> tops; unique_ptr h diff --git a/src/nfagraph/ng_limex_accel.h b/src/nfagraph/ng_limex_accel.h index f0c98db2..4c3d2b91 100644 --- a/src/nfagraph/ng_limex_accel.h +++ b/src/nfagraph/ng_limex_accel.h @@ -39,8 +39,8 @@ #include "nfa/accelcompile.h" #include "util/accel_scheme.h" #include "util/charreach.h" +#include "util/flat_containers.h" #include "util/order_check.h" -#include "util/ue2_containers.h" #include #include diff --git a/src/nfagraph/ng_literal_analysis.cpp b/src/nfagraph/ng_literal_analysis.cpp index 87c4e79e..ea0def02 100644 --- a/src/nfagraph/ng_literal_analysis.cpp +++ b/src/nfagraph/ng_literal_analysis.cpp @@ -811,7 +811,7 @@ bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out, } assert(u != g.startDs); - ue2::unordered_map rhs_map; + unordered_map rhs_map; vector pivots = make_vector_from(adjacent_vertices(u, g)); splitRHS(g, pivots, rhs, &rhs_map); diff --git a/src/nfagraph/ng_literal_component.cpp b/src/nfagraph/ng_literal_component.cpp index de05e490..4d3965df 100644 --- a/src/nfagraph/ng_literal_component.cpp +++ b/src/nfagraph/ng_literal_component.cpp @@ -45,6 +45,8 @@ #include "util/graph_range.h" #include "util/ue2string.h" +#include + using namespace std; namespace ue2 { @@ -196,7 +198,7 @@ bool splitOffLiterals(NG &ng, NGHolder &g) { bool changed = false; set dead; - ue2::unordered_set unanchored; // for faster lookup. + unordered_set unanchored; // for faster lookup. insert(&unanchored, adjacent_vertices(g.startDs, g)); // Anchored literals. diff --git a/src/nfagraph/ng_mcclellan.cpp b/src/nfagraph/ng_mcclellan.cpp index ec8ae223..091b89b8 100644 --- a/src/nfagraph/ng_mcclellan.cpp +++ b/src/nfagraph/ng_mcclellan.cpp @@ -41,17 +41,18 @@ #include "ue2common.h" #include "util/bitfield.h" #include "util/determinise.h" +#include "util/flat_containers.h" #include "util/graph_range.h" #include "util/hash.h" #include "util/hash_dynamic_bitset.h" #include "util/make_unique.h" #include "util/report_manager.h" -#include "util/ue2_containers.h" #include #include #include #include +#include #include #include @@ -483,7 +484,7 @@ public: struct Graph_Traits { using StateSet = bitfield; - using StateMap = ue2::unordered_map; + using StateMap = unordered_map; static StateSet init_states(UNUSED u32 num) { assert(num <= NFA_STATE_LIMIT); diff --git a/src/nfagraph/ng_mcclellan_internal.h b/src/nfagraph/ng_mcclellan_internal.h index b78dac3b..f069d733 100644 --- a/src/nfagraph/ng_mcclellan_internal.h +++ b/src/nfagraph/ng_mcclellan_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,7 +38,7 @@ #include "nfagraph/ng_holder.h" #include "util/charreach.h" #include "util/graph_range.h" -#include "util/ue2_containers.h" +#include "util/flat_containers.h" #include diff --git a/src/nfagraph/ng_misc_opt.cpp b/src/nfagraph/ng_misc_opt.cpp index c8dfcbab..8aaaf99f 100644 --- a/src/nfagraph/ng_misc_opt.cpp +++ b/src/nfagraph/ng_misc_opt.cpp @@ -70,7 +70,7 @@ #include "util/container.h" #include "util/graph_range.h" #include "util/graph_small_color_map.h" -#include "util/ue2_containers.h" +#include "util/flat_containers.h" #include "ue2common.h" #include diff --git a/src/nfagraph/ng_prefilter.cpp b/src/nfagraph/ng_prefilter.cpp index 64d4cf2f..04611872 100644 --- a/src/nfagraph/ng_prefilter.cpp +++ b/src/nfagraph/ng_prefilter.cpp @@ -55,10 +55,11 @@ #include "util/compile_context.h" #include "util/container.h" #include "util/dump_charclass.h" -#include "util/ue2_containers.h" #include "util/graph_range.h" #include +#include +#include #include @@ -127,10 +128,10 @@ struct RegionInfoQueueComp { static void findWidths(const NGHolder &g, - const ue2::unordered_map ®ion_map, + const unordered_map ®ion_map, RegionInfo &ri) { NGHolder rg; - ue2::unordered_map mapping; + unordered_map mapping; fillHolder(&rg, g, ri.vertices, &mapping); // Wire our entries to start and our exits to accept. @@ -155,7 +156,7 @@ void findWidths(const NGHolder &g, // acc can be either h.accept or h.acceptEod. static void markBoundaryRegions(const NGHolder &h, - const ue2::unordered_map ®ion_map, + const unordered_map ®ion_map, map ®ions, NFAVertex acc) { for (auto v : inv_adjacent_vertices_range(acc, h)) { if (is_special(v, h)) { @@ -174,7 +175,7 @@ void markBoundaryRegions(const NGHolder &h, static map findRegionInfo(const NGHolder &h, - const ue2::unordered_map ®ion_map) { + const unordered_map ®ion_map) { map regions; for (auto v : vertices_range(h)) { if (is_special(v, h)) { diff --git a/src/nfagraph/ng_prune.cpp b/src/nfagraph/ng_prune.cpp index 72d017ae..adda7031 100644 --- a/src/nfagraph/ng_prune.cpp +++ b/src/nfagraph/ng_prune.cpp @@ -223,7 +223,7 @@ void pruneHighlanderAccepts(NGHolder &g, const ReportManager &rm) { static bool isDominatedByReporter(const NGHolder &g, - const ue2::unordered_map &dom, + const unordered_map &dom, NFAVertex v, ReportID report_id) { for (auto it = dom.find(v); it != end(dom); it = dom.find(v)) { NFAVertex u = it->second; diff --git a/src/nfagraph/ng_redundancy.cpp b/src/nfagraph/ng_redundancy.cpp index 76bc93da..06b9daee 100644 --- a/src/nfagraph/ng_redundancy.cpp +++ b/src/nfagraph/ng_redundancy.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -78,8 +78,8 @@ #include "ng_util.h" #include "ue2common.h" #include "util/container.h" +#include "util/flat_containers.h" #include "util/graph_range.h" -#include "util/ue2_containers.h" #include #include @@ -747,7 +747,7 @@ u32 findCyclic(const NGHolder &g, vector &cyclic) { static void findCyclicDom(NGHolder &g, vector &cyclic, set &dead, som_type som) { - ue2::unordered_map dominators = findDominators(g); + auto dominators = findDominators(g); for (auto v : vertices_range(g)) { if (is_special(v, g)) { @@ -791,8 +791,7 @@ void findCyclicDom(NGHolder &g, vector &cyclic, static void findCyclicPostDom(NGHolder &g, vector &cyclic, set &dead) { - ue2::unordered_map postdominators = - findPostDominators(g); + auto postdominators = findPostDominators(g); for (auto v : vertices_range(g)) { if (is_special(v, g)) { diff --git a/src/nfagraph/ng_region.cpp b/src/nfagraph/ng_region.cpp index 6463a281..2675be64 100644 --- a/src/nfagraph/ng_region.cpp +++ b/src/nfagraph/ng_region.cpp @@ -56,7 +56,7 @@ #include "ng_util.h" #include "ue2common.h" #include "util/container.h" -#include "util/ue2_containers.h" +#include "util/flat_containers.h" #include "util/graph_range.h" #include "util/graph_small_color_map.h" diff --git a/src/nfagraph/ng_region.h b/src/nfagraph/ng_region.h index a56933dc..a4708a58 100644 --- a/src/nfagraph/ng_region.h +++ b/src/nfagraph/ng_region.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,19 +36,19 @@ #include "ng_holder.h" #include "util/container.h" #include "util/graph_range.h" -#include "util/ue2_containers.h" +#include #include namespace ue2 { /** \brief Assign a region ID to every vertex in the graph. */ -ue2::unordered_map assignRegions(const NGHolder &g); +std::unordered_map assignRegions(const NGHolder &g); /** \brief True if vertices \p a and \p b are in the same region. */ template bool inSameRegion(const Graph &g, NFAVertex a, NFAVertex b, - const ue2::unordered_map ®ion_map) { + const std::unordered_map ®ion_map) { assert(contains(region_map, a) && contains(region_map, b)); return region_map.at(a) == region_map.at(b) && @@ -58,7 +58,7 @@ bool inSameRegion(const Graph &g, NFAVertex a, NFAVertex b, /** \brief True if vertex \p b is in a later region than vertex \p a. */ template bool inLaterRegion(const Graph &g, NFAVertex a, NFAVertex b, - const ue2::unordered_map ®ion_map) { + const std::unordered_map ®ion_map) { assert(contains(region_map, a) && contains(region_map, b)); u32 aa = g[a].index; @@ -85,7 +85,7 @@ bool inLaterRegion(const Graph &g, NFAVertex a, NFAVertex b, /** \brief True if vertex \p b is in an earlier region than vertex \p a. */ template bool inEarlierRegion(const Graph &g, NFAVertex a, NFAVertex b, - const ue2::unordered_map ®ion_map) { + const std::unordered_map ®ion_map) { assert(contains(region_map, a) && contains(region_map, b)); u32 aa = g[a].index; @@ -112,7 +112,7 @@ bool inEarlierRegion(const Graph &g, NFAVertex a, NFAVertex b, /** \brief True if vertex \p v is an entry vertex for its region. */ template bool isRegionEntry(const Graph &g, NFAVertex v, - const ue2::unordered_map ®ion_map) { + const std::unordered_map ®ion_map) { // Note that some graph types do not have inv_adjacent_vertices, so we must // use in_edges here. for (const auto &e : in_edges_range(v, g)) { @@ -127,7 +127,7 @@ bool isRegionEntry(const Graph &g, NFAVertex v, /** \brief True if vertex \p v is an exit vertex for its region. */ template bool isRegionExit(const Graph &g, NFAVertex v, - const ue2::unordered_map ®ion_map) { + const std::unordered_map ®ion_map) { for (auto w : adjacent_vertices_range(v, g)) { if (!inSameRegion(g, v, w, region_map)) { return true; @@ -140,7 +140,7 @@ bool isRegionExit(const Graph &g, NFAVertex v, /** \brief True if vertex \p v is in a region all on its own. */ template bool isSingletonRegion(const Graph &g, NFAVertex v, - const ue2::unordered_map ®ion_map) { + const std::unordered_map ®ion_map) { for (const auto &e : in_edges_range(v, g)) { auto u = source(e, g); if (u != v && inSameRegion(g, v, u, region_map)) { @@ -178,7 +178,7 @@ bool isSingletonRegion(const Graph &g, NFAVertex v, */ template bool isOptionalRegion(const Graph &g, NFAVertex v, - const ue2::unordered_map ®ion_map) { + const std::unordered_map ®ion_map) { assert(isRegionEntry(g, v, region_map)); DEBUG_PRINTF("check if r%u is optional (inspecting v%zu)\n", diff --git a/src/nfagraph/ng_region_redundancy.cpp b/src/nfagraph/ng_region_redundancy.cpp index 264e4312..1126d4d6 100644 --- a/src/nfagraph/ng_region_redundancy.cpp +++ b/src/nfagraph/ng_region_redundancy.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -60,7 +60,7 @@ struct RegionInfo { static bool regionHasUnexpectedAccept(const NGHolder &g, const u32 region, const flat_set &expected_reports, - const ue2::unordered_map ®ion_map) { + const unordered_map ®ion_map) { /* TODO: only check vertices connected to accept/acceptEOD */ for (auto v : vertices_range(g)) { if (region != region_map.at(v)) { @@ -84,7 +84,7 @@ bool regionHasUnexpectedAccept(const NGHolder &g, const u32 region, static void processCyclicStateForward(NGHolder &h, NFAVertex cyc, const map &info, - const ue2::unordered_map ®ion_map, + const unordered_map ®ion_map, set &deadRegions) { u32 region = region_map.at(cyc); CharReach cr = h[cyc].char_reach; @@ -130,7 +130,7 @@ void processCyclicStateForward(NGHolder &h, NFAVertex cyc, static void processCyclicStateReverse(NGHolder &h, NFAVertex cyc, const map &info, - const ue2::unordered_map ®ion_map, + const unordered_map ®ion_map, set &deadRegions) { u32 region = region_map.at(cyc); CharReach cr = h[cyc].char_reach; @@ -179,7 +179,7 @@ void processCyclicStateReverse(NGHolder &h, NFAVertex cyc, static map buildRegionInfoMap(const NGHolder &g, - const ue2::unordered_map ®ion_map) { + const unordered_map ®ion_map) { map info; for (auto v : vertices_range(g)) { diff --git a/src/nfagraph/ng_repeat.cpp b/src/nfagraph/ng_repeat.cpp index 4487376a..da42b36d 100644 --- a/src/nfagraph/ng_repeat.cpp +++ b/src/nfagraph/ng_repeat.cpp @@ -49,10 +49,13 @@ #include "util/graph_range.h" #include "util/graph_small_color_map.h" #include "util/report_manager.h" +#include "util/unordered.h" #include #include #include +#include +#include #include #include @@ -64,6 +67,7 @@ using namespace std; using boost::depth_first_search; using boost::depth_first_visit; +using boost::make_assoc_property_map; namespace ue2 { @@ -118,7 +122,7 @@ struct ReachSubgraph { static void findInitDepths(const NGHolder &g, - ue2::unordered_map &depths) { + unordered_map &depths) { auto d = calcDepths(g); for (auto v : vertices_range(g)) { @@ -133,12 +137,12 @@ vector buildTopoOrder(const RepeatGraph &g) { /* Note: RepeatGraph is a filtered version of NGHolder and still has * NFAVertex as its vertex descriptor */ - typedef ue2::unordered_set EdgeSet; + typedef unordered_set EdgeSet; EdgeSet deadEdges; // We don't have indices spanning [0,N] on our filtered graph, so we // provide a colour map. - ue2::unordered_map colours; + unordered_map colours; depth_first_search(g, visitor(BackEdges(deadEdges)). color_map(make_assoc_property_map(colours))); @@ -155,22 +159,22 @@ vector buildTopoOrder(const RepeatGraph &g) { static void proper_pred(const NGHolder &g, NFAVertex v, - ue2::unordered_set &p) { + unordered_set &p) { pred(g, v, &p); p.erase(v); // self-loops } static void proper_succ(const NGHolder &g, NFAVertex v, - ue2::unordered_set &s) { + unordered_set &s) { succ(g, v, &s); s.erase(v); // self-loops } static bool roguePredecessor(const NGHolder &g, NFAVertex v, - const ue2::unordered_set &involved, - const ue2::unordered_set &pred) { + const unordered_set &involved, + const unordered_set &pred) { u32 seen = 0; for (auto u : inv_adjacent_vertices_range(v, g)) { @@ -195,8 +199,8 @@ bool roguePredecessor(const NGHolder &g, NFAVertex v, static bool rogueSuccessor(const NGHolder &g, NFAVertex v, - const ue2::unordered_set &involved, - const ue2::unordered_set &succ) { + const unordered_set &involved, + const unordered_set &succ) { u32 seen = 0; for (auto w : adjacent_vertices_range(v, g)) { if (contains(involved, w)) { @@ -245,10 +249,10 @@ bool hasDifferentTops(const NGHolder &g, const vector &verts) { static bool vertexIsBad(const NGHolder &g, NFAVertex v, - const ue2::unordered_set &involved, - const ue2::unordered_set &tail, - const ue2::unordered_set &pred, - const ue2::unordered_set &succ, + const unordered_set &involved, + const unordered_set &tail, + const unordered_set &pred, + const unordered_set &succ, const flat_set &reports) { DEBUG_PRINTF("check vertex %zu\n", g[v].index); @@ -293,13 +297,13 @@ void splitSubgraph(const NGHolder &g, const deque &verts, // We construct a copy of the graph using just the vertices we want, rather // than using a filtered_graph -- this way is faster. NGHolder verts_g; - ue2::unordered_map verts_map; // in g -> in verts_g + unordered_map verts_map; // in g -> in verts_g fillHolder(&verts_g, g, verts, &verts_map); - ue2::unordered_map old2new; + unordered_map old2new; auto ug = createUnGraph(verts_g, true, true, old2new); - ue2::unordered_map repeatMap; + unordered_map repeatMap; size_t num = connected_components(ug, make_assoc_property_map(repeatMap)); DEBUG_PRINTF("found %zu connected repeat components\n", num); @@ -377,10 +381,10 @@ void checkReachSubgraphs(const NGHolder &g, vector &rs, continue; } - ue2::unordered_set involved(rsi.vertices.begin(), - rsi.vertices.end()); - ue2::unordered_set tail(involved); // to look for back-edges. - ue2::unordered_set pred, succ; + unordered_set involved(rsi.vertices.begin(), + rsi.vertices.end()); + unordered_set tail(involved); // to look for back-edges. + unordered_set pred, succ; proper_pred(g, rsi.vertices.front(), pred); proper_succ(g, rsi.vertices.back(), succ); @@ -514,7 +518,7 @@ bool processSubgraph(const NGHolder &g, ReachSubgraph &rsi, NFAVertex first = rsi.vertices.front(); NFAVertex last = rsi.vertices.back(); - typedef ue2::unordered_map DistanceMap; + typedef unordered_map DistanceMap; DistanceMap dist; // Initial distance sets. @@ -608,7 +612,7 @@ bool processSubgraph(const NGHolder &g, ReachSubgraph &rsi, static bool allPredsInSubgraph(NFAVertex v, const NGHolder &g, - const ue2::unordered_set &involved) { + const unordered_set &involved) { for (auto u : inv_adjacent_vertices_range(v, g)) { if (!contains(involved, u)) { return false; @@ -619,8 +623,8 @@ bool allPredsInSubgraph(NFAVertex v, const NGHolder &g, static void buildTugTrigger(NGHolder &g, NFAVertex cyclic, NFAVertex v, - const ue2::unordered_set &involved, - ue2::unordered_map &depths, + const unordered_set &involved, + unordered_map &depths, vector &tugs) { if (allPredsInSubgraph(v, g, involved)) { // We can transform this vertex into a tug trigger in-place. @@ -699,7 +703,7 @@ u32 unpeelAmount(const NGHolder &g, const ReachSubgraph &rsi) { static void unpeelNearEnd(NGHolder &g, ReachSubgraph &rsi, - ue2::unordered_map &depths, + unordered_map &depths, vector *succs) { u32 unpeel = unpeelAmount(g, rsi); DEBUG_PRINTF("unpeeling %u vertices\n", unpeel); @@ -759,8 +763,8 @@ void getSuccessors(const NGHolder &g, const ReachSubgraph &rsi, static void replaceSubgraphWithSpecial(NGHolder &g, ReachSubgraph &rsi, vector *repeats, - ue2::unordered_map &depths, - ue2::unordered_set &created) { + unordered_map &depths, + unordered_set &created) { assert(!rsi.bad); assert(rsi.repeatMin > depth(0)); assert(rsi.repeatMax >= rsi.repeatMin); @@ -768,7 +772,7 @@ void replaceSubgraphWithSpecial(NGHolder &g, ReachSubgraph &rsi, DEBUG_PRINTF("entry\n"); - const ue2::unordered_set involved(rsi.vertices.begin(), + const unordered_set involved(rsi.vertices.begin(), rsi.vertices.end()); vector succs; getSuccessors(g, rsi, &succs); @@ -829,16 +833,16 @@ void replaceSubgraphWithSpecial(NGHolder &g, ReachSubgraph &rsi, static void replaceSubgraphWithLazySpecial(NGHolder &g, ReachSubgraph &rsi, vector *repeats, - ue2::unordered_map &depths, - ue2::unordered_set &created) { + unordered_map &depths, + unordered_set &created) { assert(!rsi.bad); assert(rsi.repeatMin); assert(rsi.repeatMax >= rsi.repeatMin); DEBUG_PRINTF("entry\n"); - const ue2::unordered_set involved(rsi.vertices.begin(), - rsi.vertices.end()); + const unordered_set involved(rsi.vertices.begin(), + rsi.vertices.end()); vector succs; getSuccessors(g, rsi, &succs); @@ -932,7 +936,7 @@ void reprocessSubgraph(const NGHolder &h, const Grey &grey, * involved in other repeats as a result of earlier repeat transformations. */ static bool peelSubgraph(const NGHolder &g, const Grey &grey, ReachSubgraph &rsi, - const ue2::unordered_set &created) { + const unordered_set &created) { assert(!rsi.bad); if (created.empty()) { @@ -994,8 +998,8 @@ bool peelSubgraph(const NGHolder &g, const Grey &grey, ReachSubgraph &rsi, * idea to extend to cyclic states, too. */ static void peelStartDotStar(const NGHolder &g, - const ue2::unordered_map &depths, - const Grey &grey, ReachSubgraph &rsi) { + const unordered_map &depths, + const Grey &grey, ReachSubgraph &rsi) { if (rsi.vertices.size() < 1) { return; } @@ -1073,8 +1077,8 @@ bool hasSkipEdges(const NGHolder &g, const ReachSubgraph &rsi) { /* depth info is valid as calculated at entry */ static bool entered_at_fixed_offset(NFAVertex v, const NGHolder &g, - const ue2::unordered_map &depths, - const ue2::unordered_set &reached_by_fixed_tops) { + const unordered_map &depths, + const unordered_set &reached_by_fixed_tops) { DEBUG_PRINTF("|reached_by_fixed_tops| %zu\n", reached_by_fixed_tops.size()); if (is_triggered(g) && !contains(reached_by_fixed_tops, v)) { @@ -1200,12 +1204,12 @@ CharReach predReach(const NGHolder &g, NFAVertex v) { */ static void filterMap(const NGHolder &subg, - ue2::unordered_map &vmap) { + unordered_map &vmap) { NGHolder::vertex_iterator vi, ve; tie(vi, ve) = vertices(subg); - const ue2::unordered_set remaining_verts(vi, ve); + const unordered_set remaining_verts(vi, ve); - ue2::unordered_map fmap; // filtered map + unordered_map fmap; // filtered map for (const auto &m : vmap) { if (contains(remaining_verts, m.second)) { @@ -1220,7 +1224,7 @@ void filterMap(const NGHolder &subg, * the bounded repeat. */ static void buildRepeatGraph(NGHolder &rg, - ue2::unordered_map &rg_map, + unordered_map &rg_map, const NGHolder &g, const ReachSubgraph &rsi, const map>> &triggers) { cloneHolder(rg, g, &rg_map); @@ -1231,7 +1235,7 @@ void buildRepeatGraph(NGHolder &rg, add_edge(rg.accept, rg.acceptEod, rg); // Find the set of vertices in rg involved in the repeat. - ue2::unordered_set rg_involved; + unordered_set rg_involved; for (const auto &v : rsi.vertices) { assert(contains(rg_map, v)); rg_involved.insert(rg_map.at(v)); @@ -1273,7 +1277,7 @@ void buildRepeatGraph(NGHolder &rg, */ static void buildInputGraph(NGHolder &lhs, - ue2::unordered_map &lhs_map, + unordered_map &lhs_map, const NGHolder &g, const NFAVertex first, const map>> &triggers) { DEBUG_PRINTF("building lhs with first=%zu\n", g[first].index); @@ -1327,8 +1331,8 @@ static const size_t MAX_SOLE_ENTRY_VERTICES = 10000; * single offset at runtime. See UE-1361. */ static bool hasSoleEntry(const NGHolder &g, const ReachSubgraph &rsi, - const ue2::unordered_map &depths, - const ue2::unordered_set &reached_by_fixed_tops, + const unordered_map &depths, + const unordered_set &reached_by_fixed_tops, const map>> &triggers) { DEBUG_PRINTF("checking repeat {%s,%s}\n", rsi.repeatMin.str().c_str(), rsi.repeatMax.str().c_str()); @@ -1358,12 +1362,12 @@ bool hasSoleEntry(const NGHolder &g, const ReachSubgraph &rsi, } NGHolder rg; - ue2::unordered_map rg_map; + unordered_map rg_map; buildRepeatGraph(rg, rg_map, g, rsi, triggers); assert(rg.kind == g.kind); NGHolder lhs; - ue2::unordered_map lhs_map; + unordered_map lhs_map; buildInputGraph(lhs, lhs_map, g, first, triggers); assert(lhs.kind == g.kind); @@ -1377,7 +1381,7 @@ bool hasSoleEntry(const NGHolder &g, const ReachSubgraph &rsi, // are in one region, vertices in the bounded repeat are in another. const u32 lhs_region = 1; const u32 repeat_region = 2; - ue2::unordered_map region_map; + unordered_map region_map; for (const auto &v : rsi.vertices) { assert(!is_special(v, g)); // no specials in repeats @@ -1473,7 +1477,7 @@ struct StrawWalker { NFAVertex walk(NFAVertex v, vector &straw) const { DEBUG_PRINTF("walk from %zu\n", g[v].index); - ue2::unordered_set visited; + unordered_set visited; straw.clear(); while (!is_special(v, g)) { @@ -1695,7 +1699,7 @@ vector> getRepeatTriggers(const NGHolder &g, assert(!done.empty()); // Convert our path list into a set of unique triggers. - ue2::unordered_set> unique_triggers; + ue2_unordered_set> unique_triggers; for (const auto &path : done) { vector reach_path; for (auto jt = path.rbegin(), jte = path.rend(); jt != jte; ++jt) { @@ -1743,8 +1747,8 @@ static void selectHistoryScheme(const NGHolder &g, const ReportManager *rm, ReachSubgraph &rsi, - const ue2::unordered_map &depths, - const ue2::unordered_set &reached_by_fixed_tops, + const unordered_map &depths, + const unordered_set &reached_by_fixed_tops, const map>> &triggers, const vector &all_repeats, const bool simple_model_selection) { @@ -1812,7 +1816,7 @@ selectHistoryScheme(const NGHolder &g, const ReportManager *rm, static void buildFeeder(NGHolder &g, const BoundedRepeatData &rd, - ue2::unordered_set &created, + unordered_set &created, const vector &straw) { if (!g[rd.cyclic].char_reach.all()) { // Create another cyclic feeder state with flipped reach. It has an @@ -1859,7 +1863,7 @@ void buildFeeder(NGHolder &g, const BoundedRepeatData &rd, */ static bool improveLeadingRepeat(NGHolder &g, BoundedRepeatData &rd, - ue2::unordered_set &created, + unordered_set &created, const vector &all_repeats) { assert(edge(g.startDs, g.startDs, g).second); @@ -1963,7 +1967,7 @@ vector makeOwnStraw(NGHolder &g, BoundedRepeatData &rd, */ static bool improveLeadingRepeatOutfix(NGHolder &g, BoundedRepeatData &rd, - ue2::unordered_set &created, + unordered_set &created, const vector &all_repeats) { assert(g.kind == NFA_OUTFIX); @@ -2061,7 +2065,7 @@ bool endsInAcceptEod(const NGHolder &g, const ReachSubgraph &rsi) { namespace { class pfti_visitor : public boost::default_dfs_visitor { public: - pfti_visitor(ue2::unordered_map &top_depths_in, + pfti_visitor(unordered_map &top_depths_in, const depth &our_depth_in) : top_depths(top_depths_in), our_depth(our_depth_in) {} @@ -2077,7 +2081,7 @@ public: top_depths[v] = our_depth; } } - ue2::unordered_map &top_depths; + unordered_map &top_depths; const depth &our_depth; }; } // namespace @@ -2091,7 +2095,7 @@ void populateFixedTopInfo(const map &fixed_depth_tops, } assert(!proper_out_degree(g.startDs, g)); - ue2::unordered_map top_depths; + unordered_map top_depths; auto colours = make_small_color_map(g); for (const auto &e : out_edges_range(g.start, g)) { @@ -2142,7 +2146,7 @@ void populateFixedTopInfo(const map &fixed_depth_tops, static bool hasOverlappingRepeats(UNUSED const NGHolder &g, const vector &repeats) { - ue2::unordered_set involved; + unordered_set involved; for (const auto &br : repeats) { if (contains(involved, br.cyclic)) { @@ -2177,7 +2181,7 @@ bool hasOverlappingRepeats(UNUSED const NGHolder &g, */ static bool repeatIsNasty(const NGHolder &g, const ReachSubgraph &rsi, - const ue2::unordered_map &depths) { + const unordered_map &depths) { if (num_vertices(g) > NFA_MAX_STATES) { // We may have no choice but to implement this repeat to get the graph // down to a tractable number of vertices. @@ -2236,7 +2240,7 @@ void analyseRepeats(NGHolder &g, const ReportManager *rm, // Later on, we're (a little bit) dependent on depth information for // unpeeling and so forth. Note that these depths MUST be maintained when // new vertices are added. - ue2::unordered_map depths; + unordered_map depths; findInitDepths(g, depths); // Construct our list of subgraphs with the same reach using BGL magic. @@ -2293,13 +2297,13 @@ void analyseRepeats(NGHolder &g, const ReportManager *rm, // could make this unnecessary? const unique_ptr orig_g(cloneHolder(g)); - ue2::unordered_set reached_by_fixed_tops; + unordered_set reached_by_fixed_tops; if (is_triggered(g)) { populateFixedTopInfo(fixed_depth_tops, g, &reached_by_fixed_tops); } // Go to town on the remaining acceptable subgraphs. - ue2::unordered_set created; + unordered_set created; for (auto &rsi : rs) { DEBUG_PRINTF("subgraph (beginning vertex %zu) is a {%s,%s} repeat\n", g[rsi.vertices.front()].index, diff --git a/src/nfagraph/ng_repeat.h b/src/nfagraph/ng_repeat.h index 2f14cb0c..cfd804b7 100644 --- a/src/nfagraph/ng_repeat.h +++ b/src/nfagraph/ng_repeat.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,7 +37,7 @@ #include "ue2common.h" #include "nfa/repeat_internal.h" #include "util/depth.h" -#include "util/ue2_containers.h" +#include "util/flat_containers.h" #include #include @@ -122,7 +122,7 @@ void findRepeats(const NGHolder &h, u32 minRepeatVertices, struct PureRepeat { CharReach reach; DepthMinMax bounds; - ue2::flat_set reports; + flat_set reports; bool operator==(const PureRepeat &a) const { return reach == a.reach && bounds == a.bounds && reports == a.reports; diff --git a/src/nfagraph/ng_restructuring.cpp b/src/nfagraph/ng_restructuring.cpp index 32cdac23..704697e5 100644 --- a/src/nfagraph/ng_restructuring.cpp +++ b/src/nfagraph/ng_restructuring.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -131,9 +131,9 @@ void getStateOrdering(NGHolder &g, const flat_set &tops, // Returns the number of states. static -ue2::unordered_map +unordered_map getStateIndices(const NGHolder &h, const vector &ordering) { - ue2::unordered_map states; + unordered_map states; for (const auto &v : vertices_range(h)) { states[v] = NO_STATE; } diff --git a/src/nfagraph/ng_restructuring.h b/src/nfagraph/ng_restructuring.h index bbd478d5..75d19c62 100644 --- a/src/nfagraph/ng_restructuring.h +++ b/src/nfagraph/ng_restructuring.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -28,14 +28,16 @@ /** \file * \brief State numbering and late graph restructuring code. - */ + #ifndef NG_RESTRUCTURING_H #define NG_RESTRUCTURING_H #include "ng_holder.h" #include "ue2common.h" -#include "util/ue2_containers.h" +#include "util/flat_containers.h" + +#include namespace ue2 { @@ -48,14 +50,14 @@ static constexpr u32 NO_STATE = ~0; /** * \brief Gives each participating vertex in the graph a unique state index. */ -unordered_map +std::unordered_map numberStates(NGHolder &h, const flat_set &tops); /** * \brief Counts the number of states (vertices with state indices) in the * graph. */ -u32 countStates(const unordered_map &state_ids); +u32 countStates(const std::unordered_map &state_ids); } // namespace ue2 diff --git a/src/nfagraph/ng_revacc.cpp b/src/nfagraph/ng_revacc.cpp index dc86dd44..0f932668 100644 --- a/src/nfagraph/ng_revacc.cpp +++ b/src/nfagraph/ng_revacc.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,6 +40,8 @@ #include "util/charreach.h" #include "util/graph_range.h" +#include + using namespace std; namespace ue2 { diff --git a/src/nfagraph/ng_som.cpp b/src/nfagraph/ng_som.cpp index 67438103..6520a590 100644 --- a/src/nfagraph/ng_som.cpp +++ b/src/nfagraph/ng_som.cpp @@ -69,6 +69,8 @@ #include #include +#include +#include #include using namespace std; @@ -103,7 +105,7 @@ struct som_plan { static bool regionCanEstablishSom(const NGHolder &g, - const ue2::unordered_map ®ions, + const unordered_map ®ions, const u32 region, const vector &r_exits, const vector &depths) { if (region == regions.at(g.accept) || @@ -149,7 +151,7 @@ struct region_info { static void buildRegionMapping(const NGHolder &g, - const ue2::unordered_map ®ions, + const unordered_map ®ions, map &info, bool include_region_0 = false) { for (auto v : vertices_range(g)) { @@ -228,7 +230,7 @@ void buildRegionMapping(const NGHolder &g, static bool validateXSL(const NGHolder &g, - const ue2::unordered_map ®ions, + const unordered_map ®ions, const u32 region, const CharReach &escapes, u32 *bad_region) { /* need to check that the escapes escape all of the graph past region */ u32 first_bad_region = ~0U; @@ -251,7 +253,7 @@ bool validateXSL(const NGHolder &g, static bool validateEXSL(const NGHolder &g, - const ue2::unordered_map ®ions, + const unordered_map ®ions, const u32 region, const CharReach &escapes, const NGHolder &prefix, u32 *bad_region) { /* EXSL: To be a valid EXSL with escapes e, we require that all states @@ -353,7 +355,7 @@ bool isPossibleLock(const NGHolder &g, static unique_ptr -makePrefix(const NGHolder &g, const ue2::unordered_map ®ions, +makePrefix(const NGHolder &g, const unordered_map ®ions, const region_info &curr, const region_info &next, bool renumber = true) { const vector &curr_exits = curr.exits; @@ -368,12 +370,12 @@ makePrefix(const NGHolder &g, const ue2::unordered_map ®ions, deque lhs_verts; insert(&lhs_verts, lhs_verts.end(), vertices(g)); - ue2::unordered_map lhs_map; // g -> prefix + unordered_map lhs_map; // g -> prefix fillHolder(&prefix, g, lhs_verts, &lhs_map); prefix.kind = NFA_OUTFIX; // We need a reverse mapping to track regions. - ue2::unordered_map rev_map; // prefix -> g + unordered_map rev_map; // prefix -> g for (const auto &e : lhs_map) { rev_map.emplace(e.second, e.first); } @@ -541,7 +543,7 @@ void setMidfixReports(ReportManager &rm, const som_plan &item, static bool finalRegion(const NGHolder &g, - const ue2::unordered_map ®ions, + const unordered_map ®ions, NFAVertex v) { u32 region = regions.at(v); for (auto w : adjacent_vertices_range(v, g)) { @@ -771,7 +773,7 @@ void fillHolderForLockCheck(NGHolder *out, const NGHolder &g, static void fillRoughMidfix(NGHolder *out, const NGHolder &g, - const ue2::unordered_map ®ions, + const unordered_map ®ions, const map &info, map::const_iterator picked) { /* as we are not the first prefix, we are probably not acyclic. We need to @@ -941,7 +943,7 @@ bool isMandRegionBetween(map::const_iterator a, // (woot!); updates picked, plan and bad_region. static bool advancePlan(const NGHolder &g, - const ue2::unordered_map ®ions, + const unordered_map ®ions, const NGHolder &prefix, bool stuck, map::const_iterator &picked, const map::const_iterator furthest, @@ -1051,13 +1053,12 @@ void addReporterVertices(const region_info &r, const NGHolder &g, // Fetches the mappings of all preds of {accept, acceptEod} in this region. static void addMappedReporterVertices(const region_info &r, const NGHolder &g, - const ue2::unordered_map &mapping, + const unordered_map &mapping, vector &reporters) { for (auto v : r.exits) { if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) { DEBUG_PRINTF("adding v=%zu\n", g[v].index); - ue2::unordered_map::const_iterator it = - mapping.find(v); + auto it = mapping.find(v); assert(it != mapping.end()); reporters.push_back(it->second); } @@ -1068,9 +1069,9 @@ void addMappedReporterVertices(const region_info &r, const NGHolder &g, // from earlier regions. static void cloneGraphWithOneEntry(NGHolder &out, const NGHolder &g, - const ue2::unordered_map ®ions, + const unordered_map ®ions, NFAVertex entry, const vector &enters, - ue2::unordered_map &orig_to_copy) { + unordered_map &orig_to_copy) { orig_to_copy.clear(); cloneHolder(out, g, &orig_to_copy); @@ -1095,7 +1096,7 @@ void cloneGraphWithOneEntry(NGHolder &out, const NGHolder &g, } static -void expandGraph(NGHolder &g, ue2::unordered_map ®ions, +void expandGraph(NGHolder &g, unordered_map ®ions, vector &enters) { assert(!enters.empty()); const u32 split_region = regions.at(enters.front()); @@ -1178,11 +1179,11 @@ void expandGraph(NGHolder &g, ue2::unordered_map ®ions, static bool doTreePlanningIntl(NGHolder &g, - const ue2::unordered_map ®ions, + const unordered_map ®ions, const map &info, map::const_iterator picked, u32 bad_region, u32 parent_plan, - const ue2::unordered_map ©_to_orig, + const unordered_map ©_to_orig, vector &plan, const Grey &grey) { assert(picked != info.end()); @@ -1341,7 +1342,7 @@ bool doTreePlanning(NGHolder &g, // regions. NGHolder g_path; - ue2::unordered_map orig_to_copy; + unordered_map orig_to_copy; cloneGraphWithOneEntry(g_path, g, g_regions, v, enters, orig_to_copy); auto regions = assignRegions(g_path); dumpHolder(g_path, regions, 14, "som_treepath", grey); @@ -1375,7 +1376,7 @@ bool doTreePlanning(NGHolder &g, } // Construct reverse mapping from vertices in g_path to g. - ue2::unordered_map copy_to_orig; + unordered_map copy_to_orig; for (const auto &m : orig_to_copy) { copy_to_orig.insert(make_pair(m.second, m.first)); } @@ -1398,7 +1399,7 @@ enum dsp_behaviour { static bool doSomPlanning(NGHolder &g, bool stuck_in, - const ue2::unordered_map ®ions, + const unordered_map ®ions, const map &info, map::const_iterator picked, vector &plan, @@ -1940,7 +1941,7 @@ map::const_iterator findLaterLiteral(const NGHolder &g, static bool attemptToBuildChainAfterSombe(SomSlotManager &ssm, NGHolder &g, - const ue2::unordered_map ®ions, + const unordered_map ®ions, const map &info, map::const_iterator picked, const Grey &grey, @@ -2014,7 +2015,7 @@ void setReportOnHaigPrefix(RoseBuild &rose, NGHolder &h) { static bool tryHaig(RoseBuild &rose, NGHolder &g, - const ue2::unordered_map ®ions, + const unordered_map ®ions, som_type som, u32 somPrecision, map::const_iterator picked, shared_ptr *haig, shared_ptr *haig_prefix, @@ -2062,7 +2063,7 @@ void roseAddHaigLiteral(RoseBuild &tb, const shared_ptr &prefix, static sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, u32 comp_id, som_type som, - const ue2::unordered_map ®ions, + const unordered_map ®ions, const map &info, map::const_iterator lower_bound) { DEBUG_PRINTF("entry\n"); @@ -2343,7 +2344,7 @@ bool splitOffLeadingLiterals(const NGHolder &g, set *lit_out, } } - ue2::unordered_map rhs_map; + unordered_map rhs_map; vector pivots; insert(&pivots, pivots.end(), adj_term1); splitRHS(g, pivots, rhs, &rhs_map); @@ -2354,7 +2355,7 @@ bool splitOffLeadingLiterals(const NGHolder &g, set *lit_out, static void findBestLiteral(const NGHolder &g, - const ue2::unordered_map ®ions, + const unordered_map ®ions, ue2_literal *lit_out, NFAVertex *v, const CompileContext &cc) { map info; @@ -2394,7 +2395,7 @@ void findBestLiteral(const NGHolder &g, static bool splitOffBestLiteral(const NGHolder &g, - const ue2::unordered_map ®ions, + const unordered_map ®ions, ue2_literal *lit_out, NGHolder *lhs, NGHolder *rhs, const CompileContext &cc) { NFAVertex v = NGHolder::null_vertex(); @@ -2406,8 +2407,8 @@ bool splitOffBestLiteral(const NGHolder &g, DEBUG_PRINTF("literal is '%s'\n", dumpString(*lit_out).c_str()); - ue2::unordered_map lhs_map; - ue2::unordered_map rhs_map; + unordered_map lhs_map; + unordered_map rhs_map; splitGraph(g, v, lhs, &lhs_map, rhs, &rhs_map); @@ -2498,7 +2499,7 @@ bool doLitHaigSom(NG &ng, NGHolder &g, som_type som) { static bool doHaigLitHaigSom(NG &ng, NGHolder &g, - const ue2::unordered_map ®ions, + const unordered_map ®ions, som_type som) { if (!ng.cc.grey.allowLitHaig) { return false; @@ -2732,7 +2733,7 @@ bool trySombe(NG &ng, NGHolder &g, som_type som) { static map::const_iterator pickInitialSomCut(const NGHolder &g, - const ue2::unordered_map ®ions, + const unordered_map ®ions, const map &info, const vector &depths) { map::const_iterator picked = info.end(); @@ -2757,7 +2758,7 @@ map::const_iterator pickInitialSomCut(const NGHolder &g, static map::const_iterator tryForLaterRevNfaCut(const NGHolder &g, - const ue2::unordered_map ®ions, + const unordered_map ®ions, const map &info, const vector &depths, const map::const_iterator &orig, @@ -2849,7 +2850,7 @@ map::const_iterator tryForLaterRevNfaCut(const NGHolder &g, static unique_ptr makePrefixForChain(NGHolder &g, - const ue2::unordered_map ®ions, + const unordered_map ®ions, const map &info, const map::const_iterator &picked, vector *depths, bool prefix_by_rev, diff --git a/src/nfagraph/ng_som_util.cpp b/src/nfagraph/ng_som_util.cpp index a3b6ee5f..1e7a41bb 100644 --- a/src/nfagraph/ng_som_util.cpp +++ b/src/nfagraph/ng_som_util.cpp @@ -54,7 +54,7 @@ vector getDistancesFromSOM(const NGHolder &g_orig) { // We operate on a temporary copy of the original graph here, so we don't // have to mutate the original. NGHolder g; - ue2::unordered_map vmap; // vertex in g_orig to vertex in g + unordered_map vmap; // vertex in g_orig to vertex in g cloneHolder(g, g_orig, &vmap); vector vstarts; @@ -136,7 +136,7 @@ bool firstMatchIsFirst(const NGHolder &p) { return false; } - ue2::flat_set states; + flat_set states; /* turn on all states (except starts - avoid suffix matches) */ /* If we were doing (1) we would also except states leading to accepts - avoid prefix matches */ @@ -166,7 +166,7 @@ bool firstMatchIsFirst(const NGHolder &p) { } bool somMayGoBackwards(NFAVertex u, const NGHolder &g, - const ue2::unordered_map ®ion_map, + const unordered_map ®ion_map, smgb_cache &cache) { /* Need to ensure all matches of the graph g up to u contain no infixes * which are also matches of the graph to u. @@ -215,7 +215,7 @@ bool somMayGoBackwards(NFAVertex u, const NGHolder &g, } } - ue2::unordered_map orig_to_copy; + unordered_map orig_to_copy; NGHolder c_g; cloneHolder(c_g, g, &orig_to_copy); @@ -287,7 +287,7 @@ bool somMayGoBackwards(NFAVertex u, const NGHolder &g, } bool sentClearsTail(const NGHolder &g, - const ue2::unordered_map ®ion_map, + const unordered_map ®ion_map, const NGHolder &sent, u32 last_head_region, u32 *bad_region) { /* if a subsequent match from the prefix clears the rest of the pattern @@ -312,7 +312,7 @@ bool sentClearsTail(const NGHolder &g, */ u32 first_bad_region = ~0U; - ue2::flat_set states; + flat_set states; /* turn on all states */ DEBUG_PRINTF("region %u is cutover\n", last_head_region); for (auto v : vertices_range(g)) { diff --git a/src/nfagraph/ng_som_util.h b/src/nfagraph/ng_som_util.h index 793dd2c3..e2d38642 100644 --- a/src/nfagraph/ng_som_util.h +++ b/src/nfagraph/ng_som_util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,9 +35,9 @@ #include "ng_util.h" #include "util/depth.h" -#include "util/ue2_containers.h" #include +#include #include namespace ue2 { @@ -61,7 +61,7 @@ struct smgb_cache : public mbsb_cache { }; bool somMayGoBackwards(NFAVertex u, const NGHolder &g, - const ue2::unordered_map ®ion_map, + const std::unordered_map ®ion_map, smgb_cache &cache); /** @@ -75,7 +75,7 @@ bool somMayGoBackwards(NFAVertex u, const NGHolder &g, * region ID associated with a tail state that is still on. */ bool sentClearsTail(const NGHolder &g, - const ue2::unordered_map ®ion_map, + const std::unordered_map ®ion_map, const NGHolder &sent, u32 last_head_region, u32 *bad_region); diff --git a/src/nfagraph/ng_split.cpp b/src/nfagraph/ng_split.cpp index 3c2baee4..91a099fc 100644 --- a/src/nfagraph/ng_split.cpp +++ b/src/nfagraph/ng_split.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,7 +37,6 @@ #include "util/container.h" #include "util/graph.h" #include "util/graph_range.h" -#include "util/ue2_containers.h" #include #include @@ -63,12 +62,13 @@ void clearAccepts(NGHolder &g) { } static -void filterSplitMap(const NGHolder &g, ue2::unordered_map *out_map) { - ue2::unordered_set verts; +void filterSplitMap(const NGHolder &g, + unordered_map *out_map) { + unordered_set verts; insert(&verts, vertices(g)); - ue2::unordered_map::iterator it = out_map->begin(); + auto it = out_map->begin(); while (it != out_map->end()) { - ue2::unordered_map::iterator jt = it; + auto jt = it; ++it; if (!contains(verts, jt->second)) { out_map->erase(jt); @@ -78,8 +78,8 @@ void filterSplitMap(const NGHolder &g, ue2::unordered_map static void splitLHS(const NGHolder &base, const vector &pivots, - const vector &rhs_pivots, - NGHolder *lhs, ue2::unordered_map *lhs_map) { + const vector &rhs_pivots, NGHolder *lhs, + unordered_map *lhs_map) { assert(lhs && lhs_map); cloneHolder(*lhs, base, lhs_map); @@ -131,7 +131,7 @@ void splitLHS(const NGHolder &base, const vector &pivots, } void splitLHS(const NGHolder &base, NFAVertex pivot, - NGHolder *lhs, ue2::unordered_map *lhs_map) { + NGHolder *lhs, unordered_map *lhs_map) { vector pivots(1, pivot); vector rhs_pivots; insert(&rhs_pivots, rhs_pivots.end(), adjacent_vertices(pivot, base)); @@ -139,7 +139,7 @@ void splitLHS(const NGHolder &base, NFAVertex pivot, } void splitRHS(const NGHolder &base, const vector &pivots, - NGHolder *rhs, ue2::unordered_map *rhs_map) { + NGHolder *rhs, unordered_map *rhs_map) { assert(rhs && rhs_map); cloneHolder(*rhs, base, rhs_map); @@ -211,8 +211,8 @@ void findCommonSuccessors(const NGHolder &g, const vector &pivots, } void splitGraph(const NGHolder &base, const vector &pivots, - NGHolder *lhs, ue2::unordered_map *lhs_map, - NGHolder *rhs, ue2::unordered_map *rhs_map) { + NGHolder *lhs, unordered_map *lhs_map, + NGHolder *rhs, unordered_map *rhs_map) { DEBUG_PRINTF("splitting graph at %zu vertices\n", pivots.size()); assert(!has_parallel_edge(base)); @@ -235,8 +235,8 @@ void splitGraph(const NGHolder &base, const vector &pivots, } void splitGraph(const NGHolder &base, NFAVertex pivot, - NGHolder *lhs, ue2::unordered_map *lhs_map, - NGHolder *rhs, ue2::unordered_map *rhs_map) { + NGHolder *lhs, unordered_map *lhs_map, + NGHolder *rhs, unordered_map *rhs_map) { vector pivots(1, pivot); splitGraph(base, pivots, lhs, lhs_map, rhs, rhs_map); } diff --git a/src/nfagraph/ng_split.h b/src/nfagraph/ng_split.h index 31c1cf35..9ddc0332 100644 --- a/src/nfagraph/ng_split.h +++ b/src/nfagraph/ng_split.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,10 +33,10 @@ #ifndef NG_SPLIT_H #define NG_SPLIT_H -#include - #include "ng_holder.h" -#include "util/ue2_containers.h" + +#include +#include namespace ue2 { @@ -55,21 +55,21 @@ class NGHolder; * vertices which have an edge to every pivot */ void splitGraph(const NGHolder &base, NFAVertex pivot, NGHolder *lhs, - ue2::unordered_map *lhs_map, + std::unordered_map *lhs_map, NGHolder *rhs, - ue2::unordered_map *rhs_map); + std::unordered_map *rhs_map); void splitGraph(const NGHolder &base, const std::vector &pivots, NGHolder *lhs, - ue2::unordered_map *lhs_map, + std::unordered_map *lhs_map, NGHolder *rhs, - ue2::unordered_map *rhs_map); + std::unordered_map *rhs_map); void splitLHS(const NGHolder &base, NFAVertex pivot, NGHolder *lhs, - ue2::unordered_map *lhs_map); + std::unordered_map *lhs_map); void splitRHS(const NGHolder &base, const std::vector &pivots, - NGHolder *rhs, ue2::unordered_map *rhs_map); + NGHolder *rhs, std::unordered_map *rhs_map); } // namespace ue2 diff --git a/src/nfagraph/ng_squash.cpp b/src/nfagraph/ng_squash.cpp index ebec3a4a..df77668e 100644 --- a/src/nfagraph/ng_squash.cpp +++ b/src/nfagraph/ng_squash.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -104,7 +104,6 @@ #include "ng_region.h" #include "ng_som_util.h" #include "ng_util.h" -#include "ng_util.h" #include "util/container.h" #include "util/graph_range.h" #include "util/report_manager.h" @@ -112,6 +111,8 @@ #include #include +#include +#include #include #include @@ -120,13 +121,11 @@ using namespace std; namespace ue2 { -typedef ue2::unordered_map > PostDomTree; +typedef unordered_map> PostDomTree; static void buildPDomTree(const NGHolder &g, PostDomTree &tree) { - ue2::unordered_map postdominators = - findPostDominators(g); + auto postdominators = findPostDominators(g); for (auto v : vertices_range(g)) { if (is_special(v, g)) { @@ -150,7 +149,7 @@ void buildSquashMask(NFAStateSet &mask, const NGHolder &g, NFAVertex v, const CharReach &cr, const NFAStateSet &init, const vector &vByIndex, const PostDomTree &tree, som_type som, const vector &som_depths, - const ue2::unordered_map ®ion_map, + const unordered_map ®ion_map, smgb_cache &cache) { DEBUG_PRINTF("build base squash mask for vertex %zu)\n", g[v].index); @@ -274,7 +273,7 @@ void findDerivedSquashers(const NGHolder &g, const vector &vByIndex, const PostDomTree &pdom_tree, const NFAStateSet &init, map *squash, som_type som, const vector &som_depths, - const ue2::unordered_map ®ion_map, + const unordered_map ®ion_map, smgb_cache &cache) { deque remaining; for (const auto &m : *squash) { @@ -619,7 +618,7 @@ static vector findUnreachable(const NGHolder &g) { const boost::reverse_graph revg(g); - ue2::unordered_map colours; + unordered_map colours; colours.reserve(num_vertices(g)); depth_first_visit(revg, g.acceptEod, @@ -661,7 +660,7 @@ findHighlanderSquashers(const NGHolder &g, const ReportManager &rm) { // cutting the appropriate out-edges to accept and seeing which // vertices become unreachable. - ue2::unordered_map orig_to_copy; + unordered_map orig_to_copy; NGHolder h; cloneHolder(h, g, &orig_to_copy); removeEdgesToAccept(h, orig_to_copy[v]); diff --git a/src/nfagraph/ng_squash.h b/src/nfagraph/ng_squash.h index 66621a7d..51ce245a 100644 --- a/src/nfagraph/ng_squash.h +++ b/src/nfagraph/ng_squash.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,7 +35,6 @@ #include "ng_holder.h" #include "som/som.h" #include "ue2common.h" -#include "util/ue2_containers.h" #include #include @@ -45,7 +44,9 @@ namespace ue2 { class NGHolder; class ReportManager; -/** Dynamically-sized bitset, as an NFA can have an arbitrary number of states. */ +/** + * Dynamically-sized bitset, as an NFA can have an arbitrary number of states. + */ typedef boost::dynamic_bitset<> NFAStateSet; /** diff --git a/src/nfagraph/ng_undirected.h b/src/nfagraph/ng_undirected.h index 1e27ad79..036adcbf 100644 --- a/src/nfagraph/ng_undirected.h +++ b/src/nfagraph/ng_undirected.h @@ -37,7 +37,7 @@ #include "ng_util.h" #include "ue2common.h" #include "util/graph_range.h" -#include "util/ue2_containers.h" +#include "util/unordered.h" #include @@ -71,8 +71,8 @@ template NFAUndirectedGraph createUnGraph(const Graph &g, bool excludeStarts, bool excludeAccepts, - unordered_map &old2new) { + std::unordered_map &old2new) { NFAUndirectedGraph ug; size_t idx = 0; @@ -97,7 +97,7 @@ NFAUndirectedGraph createUnGraph(const Graph &g, // Track seen edges so that we don't insert parallel edges. using Vertex = typename Graph::vertex_descriptor; - unordered_set> seen; + ue2_unordered_set> seen; seen.reserve(num_edges(g)); auto make_ordered_edge = [](Vertex a, Vertex b) { return std::make_pair(std::min(a, b), std::max(a, b)); diff --git a/src/nfagraph/ng_util.cpp b/src/nfagraph/ng_util.cpp index 14082407..83b29257 100644 --- a/src/nfagraph/ng_util.cpp +++ b/src/nfagraph/ng_util.cpp @@ -48,6 +48,9 @@ #include #include #include +#include +#include + #include #include #include @@ -353,7 +356,7 @@ vector getTopoOrdering(const NGHolder &g) { // having to reallocate it, etc. auto colors = make_small_color_map(g); - using EdgeSet = ue2::unordered_set; + using EdgeSet = unordered_set; EdgeSet backEdges; BackEdges be(backEdges); @@ -467,7 +470,7 @@ void setTops(NGHolder &h, u32 top) { void clearReports(NGHolder &g) { DEBUG_PRINTF("clearing reports without an accept edge\n"); - ue2::unordered_set allow; + unordered_set allow; insert(&allow, inv_adjacent_vertices(g.accept, g)); insert(&allow, inv_adjacent_vertices(g.acceptEod, g)); allow.erase(g.accept); // due to stylised edge. @@ -491,7 +494,7 @@ void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new) { static void fillHolderOutEdges(NGHolder &out, const NGHolder &in, - const ue2::unordered_map &v_map, + const unordered_map &v_map, NFAVertex u) { NFAVertex u_new = v_map.at(u); @@ -513,9 +516,9 @@ void fillHolderOutEdges(NGHolder &out, const NGHolder &in, } void fillHolder(NGHolder *outp, const NGHolder &in, const deque &vv, - ue2::unordered_map *v_map_out) { + unordered_map *v_map_out) { NGHolder &out = *outp; - ue2::unordered_map &v_map = *v_map_out; + unordered_map &v_map = *v_map_out; out.kind = in.kind; @@ -597,7 +600,7 @@ void cloneHolder(NGHolder &out, const NGHolder &in) { } void cloneHolder(NGHolder &out, const NGHolder &in, - ue2::unordered_map *mapping) { + unordered_map *mapping) { cloneHolder(out, in); vector out_verts(num_vertices(in)); for (auto v : vertices_range(out)) { @@ -620,7 +623,7 @@ unique_ptr cloneHolder(const NGHolder &in) { void reverseHolder(const NGHolder &g_in, NGHolder &g) { // Make the BGL do the grunt work. - ue2::unordered_map vertexMap; + unordered_map vertexMap; boost::transpose_graph(g_in, g, orig_to_copy(boost::make_assoc_property_map(vertexMap))); diff --git a/src/nfagraph/ng_util.h b/src/nfagraph/ng_util.h index 4c529a83..3cc9c7c3 100644 --- a/src/nfagraph/ng_util.h +++ b/src/nfagraph/ng_util.h @@ -32,16 +32,17 @@ #ifndef NG_UTIL_H #define NG_UTIL_H -#include -#include +#include "ng_holder.h" +#include "ue2common.h" +#include "util/flat_containers.h" +#include "util/graph.h" +#include "util/graph_range.h" #include // for default_dfs_visitor -#include "ng_holder.h" -#include "ue2common.h" -#include "util/graph.h" -#include "util/graph_range.h" -#include "util/ue2_containers.h" +#include +#include +#include namespace ue2 { @@ -272,12 +273,12 @@ void appendLiteral(NGHolder &h, const ue2_literal &s); * \a in). A vertex mapping is returned in \a v_map_out. */ void fillHolder(NGHolder *outp, const NGHolder &in, const std::deque &vv, - unordered_map *v_map_out); + std::unordered_map *v_map_out); /** \brief Clone the graph in \a in into graph \a out, returning a vertex * mapping in \a v_map_out. */ void cloneHolder(NGHolder &out, const NGHolder &in, - unordered_map *v_map_out); + std::unordered_map *v_map_out); /** \brief Clone the graph in \a in into graph \a out. */ void cloneHolder(NGHolder &out, const NGHolder &in); diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index d3303985..6742fec9 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -57,13 +57,13 @@ #include "util/compare.h" #include "util/compile_context.h" #include "util/container.h" +#include "util/flat_containers.h" #include "util/graph.h" #include "util/graph_range.h" #include "util/make_unique.h" #include "util/order_check.h" #include "util/target_info.h" #include "util/ue2string.h" -#include "util/ue2_containers.h" #include #include @@ -559,7 +559,7 @@ void filterCandPivots(const NGHolder &g, const set &cand_raw, static void getCandidatePivots(const NGHolder &g, set *cand, set *cand_raw) { - ue2::unordered_map dominators = findDominators(g); + auto dominators = findDominators(g); set accepts; @@ -1023,8 +1023,8 @@ bool splitRoseEdge(const NGHolder &base_graph, RoseInGraph &vg, shared_ptr lhs = make_shared(); shared_ptr rhs = make_shared(); - ue2::unordered_map lhs_map; - ue2::unordered_map rhs_map; + unordered_map lhs_map; + unordered_map rhs_map; splitGraph(base_graph, splitters, lhs.get(), &lhs_map, rhs.get(), &rhs_map); DEBUG_PRINTF("split %s:%zu into %s:%zu + %s:%zu\n", @@ -1217,7 +1217,7 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, NFAVertex pivot = target(e, h); DEBUG_PRINTF("splitting on pivot %zu\n", h[pivot].index); - ue2::unordered_map temp_map; + unordered_map temp_map; shared_ptr new_lhs = make_shared(); splitLHS(h, pivot, new_lhs.get(), &temp_map); @@ -1298,7 +1298,7 @@ void splitEdgesByCut(NGHolder &h, RoseInGraph &vg, effort */ if (!contains(done_rhs, adj)) { - ue2::unordered_map temp_map; + unordered_map temp_map; shared_ptr new_rhs = make_shared(); splitRHS(h, adj, new_rhs.get(), &temp_map); remove_edge(new_rhs->start, new_rhs->accept, *new_rhs); diff --git a/src/parser/Parser.rl b/src/parser/Parser.rl index ce9ca865..43dfc760 100644 --- a/src/parser/Parser.rl +++ b/src/parser/Parser.rl @@ -53,8 +53,8 @@ #include "parser/Parser.h" #include "ue2common.h" #include "util/compare.h" +#include "util/flat_containers.h" #include "util/make_unique.h" -#include "util/ue2_containers.h" #include "util/unicode_def.h" #include "util/verify_types.h" diff --git a/src/parser/buildstate.cpp b/src/parser/buildstate.cpp index eb25550b..75cfbb7b 100644 --- a/src/parser/buildstate.cpp +++ b/src/parser/buildstate.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,8 +39,10 @@ #include "nfagraph/ng_builder.h" #include "util/charreach.h" #include "util/container.h" +#include "util/flat_containers.h" +#include "util/hash.h" #include "util/make_unique.h" -#include "util/ue2_containers.h" +#include "util/unordered.h" #include #include @@ -449,7 +451,7 @@ unique_ptr makeGlushkovBuildState(NFABuilder &b, * Scans through a list of positions and retains only the highest priority * version of a given (position, flags) entry. */ void cleanupPositions(vector &a) { - ue2::unordered_set> seen; // track dupes + ue2_unordered_set> seen; vector out; out.reserve(a.size()); // output should be close to input in size. diff --git a/src/parser/check_refs.cpp b/src/parser/check_refs.cpp index fae68f74..0badc780 100644 --- a/src/parser/check_refs.cpp +++ b/src/parser/check_refs.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,7 +36,7 @@ #include "ConstComponentVisitor.h" #include "parse_error.h" #include "util/container.h" -#include "util/ue2_containers.h" +#include "util/flat_containers.h" #include diff --git a/src/parser/check_refs.h b/src/parser/check_refs.h index ede44896..26912fb8 100644 --- a/src/parser/check_refs.h +++ b/src/parser/check_refs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,14 +26,16 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief Component tree analysis that checks that references (such as * back-refs, conditionals) have valid referents. */ -#ifndef PARSER_CHECK_REFS_H_ -#define PARSER_CHECK_REFS_H_ -#include "util/ue2_containers.h" +#ifndef PARSER_CHECK_REFS_H +#define PARSER_CHECK_REFS_H + +#include "util/flat_containers.h" #include @@ -43,8 +45,8 @@ class Component; class ComponentSequence; void checkReferences(const Component &root, unsigned int groupIndices, - const ue2::flat_set &groupNames); + const flat_set &groupNames); } // namespace ue2 -#endif // PARSER_CHECK_REFS_H_ +#endif // PARSER_CHECK_REFS_H diff --git a/src/rose/rose_build.h b/src/rose/rose_build.h index cbb925f7..2219f12e 100644 --- a/src/rose/rose_build.h +++ b/src/rose/rose_build.h @@ -42,8 +42,8 @@ #include "rose_in_graph.h" #include "util/bytecode_ptr.h" #include "util/charreach.h" +#include "util/flat_containers.h" #include "util/noncopyable.h" -#include "util/ue2_containers.h" #include "util/ue2string.h" #include @@ -73,7 +73,7 @@ public: /** \brief True if we can not establish that at most a single callback will * be generated at a given offset from this set of reports. */ - virtual bool requiresDedupeSupport(const ue2::flat_set &reports) + virtual bool requiresDedupeSupport(const flat_set &reports) const = 0; }; @@ -85,7 +85,7 @@ public: /** \brief Adds a single literal. */ virtual void add(bool anchored, bool eod, const ue2_literal &lit, - const ue2::flat_set &ids) = 0; + const flat_set &ids) = 0; virtual bool addRose(const RoseInGraph &ig, bool prefilter) = 0; virtual bool addSombeRose(const RoseInGraph &ig) = 0; @@ -99,17 +99,17 @@ public: /** \brief Returns true if we were able to add it as a mask. */ virtual bool add(bool anchored, const std::vector &mask, - const ue2::flat_set &reports) = 0; + const flat_set &reports) = 0; /** \brief Attempts to add the graph to the anchored acyclic table. Returns * true on success. */ virtual bool addAnchoredAcyclic(const NGHolder &graph) = 0; virtual bool validateMask(const std::vector &mask, - const ue2::flat_set &reports, + const flat_set &reports, bool anchored, bool eod) const = 0; virtual void addMask(const std::vector &mask, - const ue2::flat_set &reports, bool anchored, + const flat_set &reports, bool anchored, bool eod) = 0; /** \brief Construct a runtime implementation. */ diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index 4c895caf..f36fa576 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -85,7 +85,7 @@ struct RoseBuildData : noncopyable { /** Edges we've transformed (in \ref transformAnchoredLiteralOverlap) which * require ANCH history to prevent overlap. */ - ue2::unordered_set anch_history_edges; + unordered_set anch_history_edges; /** True if we're tracking Start of Match. */ bool som; diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index 74626a82..8ea07c95 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -49,11 +49,12 @@ #include "util/compile_error.h" #include "util/container.h" #include "util/determinise.h" +#include "util/flat_containers.h" #include "util/graph_range.h" #include "util/make_unique.h" #include "util/order_check.h" -#include "util/ue2_containers.h" #include "util/ue2string.h" +#include "util/unordered.h" #include "util/verify_types.h" #include @@ -285,19 +286,16 @@ struct Holder_StateSet { bool operator==(const Holder_StateSet &b) const { return wdelay == b.wdelay && wrap_state == b.wrap_state; } -}; -size_t hash_value(const Holder_StateSet &s) { - size_t val = 0; - boost::hash_combine(val, s.wrap_state); - boost::hash_combine(val, s.wdelay); - return val; -} + size_t hash() const { + return hash_all(wrap_state, wdelay); + } +}; class Automaton_Holder { public: using StateSet = Holder_StateSet; - using StateMap = unordered_map; + using StateMap = ue2_unordered_map; explicit Automaton_Holder(const NGHolder &g_in) : g(g_in) { for (auto v : vertices_range(g)) { @@ -416,7 +414,7 @@ public: private: const NGHolder &g; - ue2::unordered_map vertexToIndex; + unordered_map vertexToIndex; vector indexToVertex; vector cr_by_index; StateSet init; @@ -712,7 +710,7 @@ int addAutomaton(RoseBuildImpl &build, const NGHolder &h, ReportID *remap) { static void setReports(NGHolder &h, const map> &reportMap, - const ue2::unordered_map &orig_to_copy) { + const unordered_map &orig_to_copy) { for (const auto &m : reportMap) { NFAVertex t = orig_to_copy.at(m.first); assert(!m.second.empty()); @@ -724,7 +722,7 @@ void setReports(NGHolder &h, const map> &reportMap, int addAnchoredNFA(RoseBuildImpl &build, const NGHolder &wrapper, const map> &reportMap) { NGHolder h; - ue2::unordered_map orig_to_copy; + unordered_map orig_to_copy; cloneHolder(h, wrapper, &orig_to_copy); clear_in_edges(h.accept, h); clear_in_edges(h.acceptEod, h); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index a41f0322..e7fd6271 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -145,8 +145,8 @@ struct build_context : noncopyable { /** \brief Simple cache of programs written to engine blob, used for * deduplication. */ - ue2::unordered_map program_cache; + unordered_map program_cache; /** \brief State indices, for those roles that have them. * Each vertex present has a unique state index in the range @@ -155,7 +155,7 @@ struct build_context : noncopyable { /** \brief Mapping from queue index to bytecode offset for built engines * that have already been pushed into the engine_blob. */ - ue2::unordered_map engineOffsets; + unordered_map engineOffsets; /** \brief List of long literals (ones with CHECK_LONG_LIT instructions) * that need hash table support. */ @@ -1470,7 +1470,7 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc, map > infixTriggers; vector order; - unordered_map > succs; + unordered_map> succs; findInfixTriggers(tbi, &infixTriggers); if (cc.grey.allowTamarama && cc.streaming && !do_prefix) { @@ -2269,9 +2269,9 @@ bool hasMpvTrigger(const set &reports, const ReportManager &rm) { } static -bool anyEndfixMpvTriggers(const RoseBuildImpl &tbi) { - const RoseGraph &g = tbi.g; - ue2::unordered_set done; +bool anyEndfixMpvTriggers(const RoseBuildImpl &build) { + const RoseGraph &g = build.g; + unordered_set done; /* suffixes */ for (auto v : vertices_range(g)) { @@ -2283,14 +2283,14 @@ bool anyEndfixMpvTriggers(const RoseBuildImpl &tbi) { } done.insert(g[v].suffix); - if (hasMpvTrigger(all_reports(g[v].suffix), tbi.rm)) { + if (hasMpvTrigger(all_reports(g[v].suffix), build.rm)) { return true; } } /* outfixes */ - for (const auto &out : tbi.outfixes) { - if (hasMpvTrigger(all_reports(out), tbi.rm)) { + for (const auto &out : build.outfixes) { + if (hasMpvTrigger(all_reports(out), build.rm)) { return true; } } @@ -2588,7 +2588,7 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, const RoseGraph &g = tbi.g; const CompileContext &cc = tbi.cc; - ue2::unordered_set done_core; + unordered_set done_core; leftTable.resize(leftfixCount); diff --git a/src/rose/rose_build_castle.cpp b/src/rose/rose_build_castle.cpp index a85a784f..59bab3b1 100644 --- a/src/rose/rose_build_castle.cpp +++ b/src/rose/rose_build_castle.cpp @@ -38,7 +38,6 @@ #include "util/container.h" #include "util/dump_charclass.h" #include "util/graph_range.h" -#include "util/ue2_containers.h" #include "util/ue2string.h" #include @@ -55,7 +54,7 @@ namespace ue2 { static void makeCastle(LeftEngInfo &left, - unordered_map> &cache) { + unordered_map> &cache) { if (left.dfa || left.haig || left.castle) { return; } @@ -85,7 +84,7 @@ void makeCastle(LeftEngInfo &left, static void makeCastleSuffix(RoseBuildImpl &tbi, RoseVertex v, - ue2::unordered_map > &cache) { + unordered_map> &cache) { RoseSuffixInfo &suffix = tbi.g[v].suffix; if (!suffix.graph) { return; @@ -298,8 +297,8 @@ bool unmakeCastles(RoseBuildImpl &tbi) { } void remapCastleTops(RoseBuildImpl &tbi) { - ue2::unordered_map > rose_castles; - ue2::unordered_map > suffix_castles; + unordered_map> rose_castles; + unordered_map> suffix_castles; RoseGraph &g = tbi.g; for (auto v : vertices_range(g)) { diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 96241e39..e24d0b2e 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -61,10 +61,10 @@ #include "util/compile_context.h" #include "util/container.h" #include "util/dump_charclass.h" +#include "util/flat_containers.h" #include "util/graph_range.h" #include "util/order_check.h" #include "util/report_manager.h" -#include "util/ue2_containers.h" #include "util/ue2string.h" #include "util/verify_types.h" @@ -1639,7 +1639,7 @@ static bool danglingVertexRef(RoseBuildImpl &tbi) { RoseGraph::vertex_iterator vi, ve; tie(vi, ve) = vertices(tbi.g); - const ue2::unordered_set valid_vertices(vi, ve); + const unordered_set valid_vertices(vi, ve); if (!contains(valid_vertices, tbi.anchored_root)) { DEBUG_PRINTF("anchored root vertex %zu not in graph\n", diff --git a/src/rose/rose_build_convert.cpp b/src/rose/rose_build_convert.cpp index f80e25cb..33351099 100644 --- a/src/rose/rose_build_convert.cpp +++ b/src/rose/rose_build_convert.cpp @@ -58,8 +58,9 @@ #include #include #include -#include +#include #include +#include #include @@ -561,7 +562,7 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, DEBUG_PRINTF("woot?\n"); shared_ptr h_new = make_shared(); - ue2::unordered_map rhs_map; + unordered_map rhs_map; vector exits_vec; insert(&exits_vec, exits_vec.end(), exits); splitRHS(h, exits_vec, h_new.get(), &rhs_map); diff --git a/src/rose/rose_build_engine_blob.h b/src/rose/rose_build_engine_blob.h index 3aa501b4..da4e355d 100644 --- a/src/rose/rose_build_engine_blob.h +++ b/src/rose/rose_build_engine_blob.h @@ -36,13 +36,14 @@ #include "util/bytecode_ptr.h" #include "util/charreach.h" #include "util/container.h" +#include "util/hash.h" #include "util/multibit_build.h" #include "util/noncopyable.h" -#include "util/ue2_containers.h" #include "util/verify_types.h" +#include "util/unordered.h" -#include #include +#include namespace ue2 { @@ -56,9 +57,10 @@ struct lookaround_info : noncopyable { u32 get_offset_of(const std::vector &look, RoseEngineBlob &blob); private: - unordered_map>, u32> multi_cache; - unordered_map, u32> lcache; - unordered_map, u32> rcache; + using Path = std::vector; + ue2_unordered_map, u32> multi_cache; + ue2_unordered_map, u32> lcache; + ue2_unordered_map rcache; }; class RoseEngineBlob : noncopyable { @@ -160,7 +162,7 @@ private: } /** \brief Cache of previously-written sparse iterators. */ - unordered_map, u32> cached_iters; + ue2_unordered_map, u32> cached_iters; /** * \brief Contents of the Rose bytecode immediately following the diff --git a/src/rose/rose_build_exclusive.cpp b/src/rose/rose_build_exclusive.cpp index e91cc297..25585ec0 100644 --- a/src/rose/rose_build_exclusive.cpp +++ b/src/rose/rose_build_exclusive.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -87,7 +87,7 @@ vector> divideIntoChunks(const RoseBuildImpl &build, /* add prefix literals to engine graph */ static -bool addPrefixLiterals(NGHolder &h, ue2::unordered_set &tailId, +bool addPrefixLiterals(NGHolder &h, unordered_set &tailId, const vector> &triggers) { DEBUG_PRINTF("add literals to graph\n"); @@ -196,8 +196,8 @@ vector findStartPos(const CharReach &cr1, template static bool isExclusive(const NGHolder &h, - const u32 num, ue2::unordered_set &tailId, - map> &skipList, + const u32 num, unordered_set &tailId, + map> &skipList, const RoleInfo &role1, const RoleInfo &role2) { const u32 id1 = role1.id; @@ -253,12 +253,12 @@ bool isExclusive(const NGHolder &h, template static -ue2::unordered_set checkExclusivity(const NGHolder &h, - const u32 num, ue2::unordered_set &tailId, - map> &skipList, - const RoleInfo &role1, - const RoleChunk &roleChunk) { - ue2::unordered_set info; +unordered_set checkExclusivity(const NGHolder &h, + const u32 num, unordered_set &tailId, + map> &skipList, + const RoleInfo &role1, + const RoleChunk &roleChunk) { + unordered_set info; const u32 id1 = role1.id; for (const auto &role2 : roleChunk.roles) { const u32 id2 = role2.id; @@ -316,7 +316,7 @@ void findCliques(const map> &exclusiveGroups, static map> findExclusiveGroups(const RoseBuildImpl &build, - const map> &exclusiveInfo, + const map> &exclusiveInfo, const map> &vertex_map, const bool is_infix) { map> exclusiveGroups; @@ -396,10 +396,10 @@ void exclusiveAnalysis(const RoseBuildImpl &build, vector> &exclusive_roles, const bool is_infix) { const auto &chunks = divideIntoChunks(build, roleInfoSet); DEBUG_PRINTF("Exclusivity analysis entry\n"); - map> exclusiveInfo; + map> exclusiveInfo; for (const auto &roleChunk : chunks) { - map> skipList; + map> skipList; for (const auto &role1 : roleChunk.roles) { const u32 id1 = role1.id; const role_id &s1 = role1.role; diff --git a/src/rose/rose_build_groups.h b/src/rose/rose_build_groups.h index 3ab5eb78..ada64b80 100644 --- a/src/rose/rose_build_groups.h +++ b/src/rose/rose_build_groups.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,11 +35,12 @@ #define ROSE_BUILD_GROUPS_H #include "rose_build_impl.h" -#include "util/ue2_containers.h" + +#include namespace ue2 { -unordered_map +std::unordered_map getVertexGroupMap(const RoseBuildImpl &build); rose_group getSquashableGroups(const RoseBuildImpl &build); diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 13f1cfc9..42ae054a 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -39,11 +39,12 @@ #include "nfagraph/ng_holder.h" #include "nfagraph/ng_revacc.h" #include "util/bytecode_ptr.h" +#include "util/flat_containers.h" #include "util/hash.h" #include "util/order_check.h" #include "util/queue_index_factory.h" -#include "util/ue2_containers.h" #include "util/ue2string.h" +#include "util/unordered.h" #include "util/verify_types.h" #include @@ -177,7 +178,6 @@ depth findMinWidth(const suffix_id &s); depth findMaxWidth(const suffix_id &s); depth findMinWidth(const suffix_id &s, u32 top); depth findMaxWidth(const suffix_id &s, u32 top); -size_t hash_value(const suffix_id &s); /** \brief represents an engine to the left of a rose role */ struct left_id { @@ -258,11 +258,10 @@ bool isAnchored(const left_id &r); depth findMinWidth(const left_id &r); depth findMaxWidth(const left_id &r); u32 num_tops(const left_id &r); -size_t hash_value(const left_id &r); struct rose_literal_info { - ue2::flat_set delayed_ids; - ue2::flat_set vertices; + flat_set delayed_ids; + flat_set vertices; rose_group group_mask = 0; u32 undelayed_id = MO_INVALID_IDX; bool squash_group = false; @@ -306,6 +305,10 @@ struct rose_literal_id { return s == b.s && msk == b.msk && cmp == b.cmp && table == b.table && delay == b.delay && distinctiveness == b.distinctiveness; } + + size_t hash() const { + return hash_all(s, msk, cmp, table, delay, distinctiveness); + } }; static inline @@ -319,12 +322,6 @@ bool operator<(const rose_literal_id &a, const rose_literal_id &b) { return 0; } -inline -size_t hash_value(const rose_literal_id &lit) { - return hash_all(lit.s, lit.msk, lit.cmp, lit.table, lit.delay, - lit.distinctiveness); -} - class RoseLiteralMap { /** * \brief Main storage for literals. @@ -336,7 +333,7 @@ class RoseLiteralMap { std::deque lits; /** \brief Quick-lookup index from literal -> index in lits. */ - unordered_map lits_index; + ue2_unordered_map lits_index; public: std::pair insert(const rose_literal_id &lit) { @@ -504,7 +501,7 @@ public: // Adds a single literal. void add(bool anchored, bool eod, const ue2_literal &lit, - const ue2::flat_set &ids) override; + const flat_set &ids) override; bool addRose(const RoseInGraph &ig, bool prefilter) override; bool addSombeRose(const RoseInGraph &ig) override; @@ -517,15 +514,15 @@ public: // Returns true if we were able to add it as a mask bool add(bool anchored, const std::vector &mask, - const ue2::flat_set &reports) override; + const flat_set &reports) override; bool addAnchoredAcyclic(const NGHolder &graph) override; bool validateMask(const std::vector &mask, - const ue2::flat_set &reports, bool anchored, + const flat_set &reports, bool anchored, bool eod) const override; void addMask(const std::vector &mask, - const ue2::flat_set &reports, bool anchored, + const flat_set &reports, bool anchored, bool eod) override; // Construct a runtime implementation. @@ -627,8 +624,8 @@ public: * overlap calculation in history assignment. */ std::map anchoredLitSuffix; - unordered_set transient; - unordered_map rose_squash_masks; + ue2_unordered_set transient; + ue2_unordered_map rose_squash_masks; std::vector outfixes; @@ -689,4 +686,22 @@ bool canImplementGraphs(const RoseBuildImpl &tbi); } // namespace ue2 +namespace std { + +template<> +struct hash { + size_t operator()(const ue2::left_id &l) const { + return l.hash(); + } +}; + +template<> +struct hash { + size_t operator()(const ue2::suffix_id &s) const { + return s.hash(); + } +}; + +} // namespace std + #endif /* ROSE_BUILD_IMPL_H */ diff --git a/src/rose/rose_build_infix.cpp b/src/rose/rose_build_infix.cpp index 4bbb3525..80e12542 100644 --- a/src/rose/rose_build_infix.cpp +++ b/src/rose/rose_build_infix.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,10 +36,12 @@ #include "rose/rose_build_impl.h" #include "util/container.h" #include "util/dump_charclass.h" +#include "util/flat_containers.h" #include "util/graph_range.h" #include "util/graph.h" -#include "util/ue2_containers.h" +#include "util/hash.h" #include "util/ue2string.h" +#include "util/unordered.h" #include #include @@ -51,7 +53,7 @@ namespace ue2 { static bool couldEndLiteral(const ue2_literal &s, NFAVertex initial, const NGHolder &h) { - ue2::flat_set curr, next; + flat_set curr, next; curr.insert(initial); for (auto it = s.rbegin(), ite = s.rend(); it != ite; ++it) { @@ -82,9 +84,10 @@ bool couldEndLiteral(const ue2_literal &s, NFAVertex initial, return true; } +using EdgeCache = ue2_unordered_set>; + static -void contractVertex(NGHolder &g, NFAVertex v, - ue2::unordered_set> &all_edges) { +void contractVertex(NGHolder &g, NFAVertex v, EdgeCache &all_edges) { for (auto u : inv_adjacent_vertices_range(v, g)) { if (u == v) { continue; // self-edge @@ -144,8 +147,9 @@ u32 findMaxLiteralMatches(const NGHolder &h, const set &lits) { cloneHolder(g, h); vector dead; - // The set of all edges in the graph is used for existence checks in contractVertex. - ue2::unordered_set> all_edges; + // The set of all edges in the graph is used for existence checks in + // contractVertex. + EdgeCache all_edges; for (const auto &e : edges_range(g)) { all_edges.emplace(source(e, g), target(e, g)); } diff --git a/src/rose/rose_build_instructions.h b/src/rose/rose_build_instructions.h index 3bc3266b..d3ede29b 100644 --- a/src/rose/rose_build_instructions.h +++ b/src/rose/rose_build_instructions.h @@ -39,6 +39,7 @@ #include "rose_build_lookaround.h" #include "rose_build_program.h" +#include "util/hash.h" #include "util/verify_types.h" namespace ue2 { @@ -65,7 +66,7 @@ public: /** \brief Length of the bytecode instruction in bytes. */ virtual size_t byte_length() const = 0; - using OffsetMap = unordered_map; + using OffsetMap = std::unordered_map; /** * \brief Writes a concrete implementation of this instruction. @@ -149,6 +150,10 @@ private: } }; +template +constexpr RoseInstructionCode + RoseInstrBase::opcode; + /** * \brief Refinement of RoseInstrBase to use for instructions that have * just a single target member, called "target". @@ -190,7 +195,7 @@ public: virtual bool operator==(const RoseInstrType &) const { return true; } size_t hash() const override { - return boost::hash_value(static_cast(Opcode)); + return hash_all(Opcode); } bool equiv_to(const RoseInstrType &, const RoseInstruction::OffsetMap &, @@ -222,7 +227,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), groups, anch_id); + return hash_all(opcode, groups, anch_id); } void write(void *dest, RoseEngineBlob &blob, @@ -251,7 +256,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), min_offset); + return hash_all(opcode, min_offset); } void write(void *dest, RoseEngineBlob &blob, @@ -278,7 +283,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), groups); + return hash_all(opcode, groups); } void write(void *dest, RoseEngineBlob &blob, @@ -305,7 +310,7 @@ public: } size_t hash() const override { - return boost::hash_value(static_cast(opcode)); + return hash_all(opcode); } void write(void *dest, RoseEngineBlob &blob, @@ -335,7 +340,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), min_bound, max_bound); + return hash_all(opcode, min_bound, max_bound); } void write(void *dest, RoseEngineBlob &blob, @@ -364,7 +369,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), key); + return hash_all(opcode, key); } void write(void *dest, RoseEngineBlob &blob, @@ -395,7 +400,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), offset, reach); + return hash_all(opcode, offset, reach); } void write(void *dest, RoseEngineBlob &blob, @@ -426,7 +431,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), look); + return hash_all(opcode, look); } void write(void *dest, RoseEngineBlob &blob, @@ -462,8 +467,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), and_mask, cmp_mask, neg_mask, - offset); + return hash_all(opcode, and_mask, cmp_mask, neg_mask, offset); } void write(void *dest, RoseEngineBlob &blob, @@ -501,8 +505,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), and_mask, cmp_mask, neg_mask, - offset); + return hash_all(opcode, and_mask, cmp_mask, neg_mask, offset); } void write(void *dest, RoseEngineBlob &blob, @@ -539,8 +542,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), and_mask, cmp_mask, negation, - offset); + return hash_all(opcode, and_mask, cmp_mask, negation, offset); } void write(void *dest, RoseEngineBlob &blob, @@ -581,8 +583,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), nib_mask, - bucket_select_mask, neg_mask, offset); + return hash_all(opcode, nib_mask, bucket_select_mask, neg_mask, offset); } void write(void *dest, RoseEngineBlob &blob, @@ -626,8 +627,8 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), hi_mask, lo_mask, - bucket_select_mask, neg_mask, offset); + return hash_all(opcode, hi_mask, lo_mask, bucket_select_mask, neg_mask, + offset); } void write(void *dest, RoseEngineBlob &blob, @@ -671,8 +672,8 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), hi_mask, lo_mask, - bucket_select_mask, neg_mask, offset); + return hash_all(opcode, hi_mask, lo_mask, bucket_select_mask, neg_mask, + offset); } void write(void *dest, RoseEngineBlob &blob, @@ -720,9 +721,8 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), hi_mask, lo_mask, - bucket_select_mask_hi, bucket_select_mask_lo, - neg_mask, offset); + return hash_all(opcode, hi_mask, lo_mask, bucket_select_mask_hi, + bucket_select_mask_lo, neg_mask, offset); } void write(void *dest, RoseEngineBlob &blob, @@ -758,7 +758,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), queue, lag, report); + return hash_all(opcode, queue, lag, report); } void write(void *dest, RoseEngineBlob &blob, @@ -791,7 +791,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), queue, lag, report); + return hash_all(opcode, queue, lag, report); } void write(void *dest, RoseEngineBlob &blob, @@ -820,7 +820,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), delay, index); + return hash_all(opcode, delay, index); } void write(void *dest, RoseEngineBlob &blob, @@ -861,7 +861,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), distance); + return hash_all(opcode, distance); } void write(void *dest, RoseEngineBlob &blob, @@ -889,7 +889,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), queue, lag); + return hash_all(opcode, queue, lag); } void write(void *dest, RoseEngineBlob &blob, @@ -917,7 +917,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), som.type, som.onmatch); + return hash_all(opcode, som.type, som.onmatch); } void write(void *dest, RoseEngineBlob &blob, @@ -953,7 +953,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), cancel, queue, event); + return hash_all(opcode, cancel, queue, event); } void write(void *dest, RoseEngineBlob &blob, @@ -981,7 +981,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), queue, event); + return hash_all(opcode, queue, event); } void write(void *dest, RoseEngineBlob &blob, @@ -1013,8 +1013,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), quash_som, dkey, - offset_adjust); + return hash_all(opcode, quash_som, dkey, offset_adjust); } void write(void *dest, RoseEngineBlob &blob, @@ -1049,8 +1048,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), quash_som, dkey, - offset_adjust); + return hash_all(opcode, quash_som, dkey, offset_adjust); } void write(void *dest, RoseEngineBlob &blob, @@ -1081,7 +1079,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), event, top_squash_distance); + return hash_all(opcode, event, top_squash_distance); } void write(void *dest, RoseEngineBlob &blob, @@ -1110,7 +1108,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), som.type, som.onmatch); + return hash_all(opcode, som.type, som.onmatch); } void write(void *dest, RoseEngineBlob &blob, @@ -1138,7 +1136,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), som.type, som.onmatch); + return hash_all(opcode, som.type, som.onmatch); } void write(void *dest, RoseEngineBlob &blob, @@ -1165,7 +1163,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), onmatch, offset_adjust); + return hash_all(opcode, onmatch, offset_adjust); } void write(void *dest, RoseEngineBlob &blob, @@ -1196,7 +1194,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), onmatch, offset_adjust, ekey); + return hash_all(opcode, onmatch, offset_adjust, ekey); } void write(void *dest, RoseEngineBlob &blob, @@ -1225,7 +1223,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), onmatch, offset_adjust); + return hash_all(opcode, onmatch, offset_adjust); } void write(void *dest, RoseEngineBlob &blob, @@ -1256,7 +1254,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), onmatch, offset_adjust, ekey); + return hash_all(opcode, onmatch, offset_adjust, ekey); } void write(void *dest, RoseEngineBlob &blob, @@ -1293,8 +1291,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), quash_som, dkey, onmatch, - offset_adjust); + return hash_all(opcode, quash_som, dkey, onmatch, offset_adjust); } void write(void *dest, RoseEngineBlob &blob, @@ -1324,7 +1321,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), onmatch, offset_adjust); + return hash_all(opcode, onmatch, offset_adjust); } void write(void *dest, RoseEngineBlob &blob, @@ -1352,7 +1349,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), ekey); + return hash_all(opcode, ekey); } void write(void *dest, RoseEngineBlob &blob, @@ -1384,7 +1381,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), end_adj, min_length); + return hash_all(opcode, end_adj, min_length); } void write(void *dest, RoseEngineBlob &blob, @@ -1410,7 +1407,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), index); + return hash_all(opcode, index); } void write(void *dest, RoseEngineBlob &blob, @@ -1436,7 +1433,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), groups); + return hash_all(opcode, groups); } void write(void *dest, RoseEngineBlob &blob, @@ -1462,7 +1459,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), groups); + return hash_all(opcode, groups); } void write(void *dest, RoseEngineBlob &blob, @@ -1490,7 +1487,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), index); + return hash_all(opcode, index); } void write(void *dest, RoseEngineBlob &blob, @@ -1522,9 +1519,9 @@ public: } size_t hash() const override { - size_t v = hash_all(static_cast(opcode), num_keys); + size_t v = hash_all(opcode, num_keys); for (const u32 &key : jump_table | boost::adaptors::map_keys) { - boost::hash_combine(v, key); + hash_combine(v, key); } return v; } @@ -1594,7 +1591,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), state); + return hash_all(opcode, state); } void write(void *dest, RoseEngineBlob &blob, @@ -1638,7 +1635,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), num_keys, keys); + return hash_all(opcode, num_keys, keys); } void write(void *dest, RoseEngineBlob &blob, @@ -1665,7 +1662,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), iter_offset); + return hash_all(opcode, iter_offset); } void write(void *dest, RoseEngineBlob &blob, @@ -1709,7 +1706,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), literal); + return hash_all(opcode, literal); } void write(void *dest, RoseEngineBlob &blob, @@ -1741,7 +1738,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), literal); + return hash_all(opcode, literal); } void write(void *dest, RoseEngineBlob &blob, @@ -1772,7 +1769,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), literal); + return hash_all(opcode, literal); } void write(void *dest, RoseEngineBlob &blob, @@ -1804,7 +1801,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), literal); + return hash_all(opcode, literal); } void write(void *dest, RoseEngineBlob &blob, @@ -1849,8 +1846,7 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), multi_look, last_start, - start_mask); + return hash_all(opcode, multi_look, last_start, start_mask); } void write(void *dest, RoseEngineBlob &blob, @@ -1905,9 +1901,9 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), nib_mask, - bucket_select_mask, data_select_mask, hi_bits_mask, - lo_bits_mask, neg_mask, base_offset, last_start); + return hash_all(opcode, nib_mask, bucket_select_mask, data_select_mask, + hi_bits_mask, lo_bits_mask, neg_mask, base_offset, + last_start); } void write(void *dest, RoseEngineBlob &blob, @@ -1968,9 +1964,9 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), hi_mask, lo_mask, - bucket_select_mask, data_select_mask, hi_bits_mask, - lo_bits_mask, neg_mask, base_offset, last_start); + return hash_all(opcode, hi_mask, lo_mask, bucket_select_mask, + data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask, + base_offset, last_start); } void write(void *dest, RoseEngineBlob &blob, @@ -2035,10 +2031,9 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), hi_mask, lo_mask, - bucket_select_mask_hi, bucket_select_mask_lo, - data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask, - base_offset, last_start); + return hash_all(opcode, hi_mask, lo_mask, bucket_select_mask_hi, + bucket_select_mask_lo, data_select_mask, hi_bits_mask, + lo_bits_mask, neg_mask, base_offset, last_start); } void write(void *dest, RoseEngineBlob &blob, @@ -2100,9 +2095,9 @@ public: } size_t hash() const override { - return hash_all(static_cast(opcode), hi_mask, lo_mask, - bucket_select_mask, data_select_mask, hi_bits_mask, - lo_bits_mask, neg_mask, base_offset, last_start); + return hash_all(opcode, hi_mask, lo_mask, bucket_select_mask, + data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask, + base_offset, last_start); } void write(void *dest, RoseEngineBlob &blob, diff --git a/src/rose/rose_build_lookaround.cpp b/src/rose/rose_build_lookaround.cpp index a46a1aeb..dd495fd3 100644 --- a/src/rose/rose_build_lookaround.cpp +++ b/src/rose/rose_build_lookaround.cpp @@ -40,7 +40,7 @@ #include "util/container.h" #include "util/dump_charclass.h" #include "util/graph_range.h" -#include "util/ue2_containers.h" +#include "util/flat_containers.h" #include "util/verify_types.h" #include @@ -79,7 +79,7 @@ string dump(const map &look) { static void getForwardReach(const NGHolder &g, u32 top, map &look) { - ue2::flat_set curr, next; + flat_set curr, next; // Consider only successors of start with the required top. for (const auto &e : out_edges_range(g.start, g)) { @@ -116,7 +116,7 @@ void getForwardReach(const NGHolder &g, u32 top, map &look) { static void getBackwardReach(const NGHolder &g, ReportID report, u32 lag, map &look) { - ue2::flat_set curr, next; + flat_set curr, next; for (auto v : inv_adjacent_vertices_range(g.accept, g)) { if (contains(g[v].reports, report)) { @@ -187,7 +187,7 @@ void getForwardReach(const raw_dfa &rdfa, map &look) { return; } - ue2::flat_set curr, next; + flat_set curr, next; curr.insert(rdfa.start_anchored); for (u32 i = 0; i < MAX_FWD_LEN && !curr.empty(); i++) { @@ -849,7 +849,7 @@ void mergeLookaround(vector &lookaround, } // Don't merge lookarounds at offsets we already have entries for. - ue2::flat_set offsets; + flat_set offsets; for (const auto &e : lookaround) { offsets.insert(e.offset); } diff --git a/src/rose/rose_build_lookaround.h b/src/rose/rose_build_lookaround.h index aea87ccf..70d4217c 100644 --- a/src/rose/rose_build_lookaround.h +++ b/src/rose/rose_build_lookaround.h @@ -33,6 +33,7 @@ #define ROSE_ROSE_BUILD_LOOKAROUND_H #include "rose_graph.h" +#include "util/hash.h" #include @@ -58,14 +59,6 @@ struct LookEntry { } }; -static inline -size_t hash_value(const LookEntry &l) { - size_t val = 0; - boost::hash_combine(val, l.offset); - boost::hash_combine(val, l.reach); - return val; -} - void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v, std::vector &look_more); @@ -83,4 +76,15 @@ void mergeLookaround(std::vector &lookaround, } // namespace ue2 +namespace std { + +template<> +struct hash { + size_t operator()(const ue2::LookEntry &l) const { + return ue2::hash_all(l.offset, l.reach); + } +}; + +} // namespace std + #endif // ROSE_ROSE_BUILD_LOOKAROUND_H diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index d638e589..15a1ae78 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -63,9 +63,11 @@ #include "util/container.h" #include "util/dump_charclass.h" #include "util/graph_range.h" +#include "util/hash.h" #include "util/order_check.h" #include "util/report_manager.h" #include "util/ue2string.h" +#include "util/unordered.h" #include #include @@ -77,12 +79,10 @@ #include #include -#include #include using namespace std; using boost::adaptors::map_values; -using boost::hash_combine; namespace ue2 { @@ -336,7 +336,7 @@ void findUncalcLeavesCandidates(RoseBuildImpl &tbi, const RoseGraph &g = tbi.g; vector suffix_vertices; // vertices with suffix graphs - ue2::unordered_map fcount; // ref count per graph + unordered_map fcount; // ref count per graph for (auto v : vertices_range(g)) { if (g[v].suffix) { @@ -566,7 +566,7 @@ bool dedupeLeftfixes(RoseBuildImpl &tbi) { for (deque &verts : roses | map_values) { DEBUG_PRINTF("group has %zu vertices\n", verts.size()); - ue2::unordered_set seen; + unordered_set seen; for (auto jt = verts.begin(), jte = verts.end(); jt != jte; ++jt) { RoseVertex v = *jt; @@ -636,7 +636,7 @@ bool is_equal(const suffix_id &s1, const suffix_id &s2) { void dedupeSuffixes(RoseBuildImpl &tbi) { DEBUG_PRINTF("deduping suffixes\n"); - ue2::unordered_map> suffix_map; + unordered_map> suffix_map; map>, vector> part; // Collect suffixes into groups. @@ -703,7 +703,7 @@ template class Bouquet { private: list ordering; // Unique list in insert order. - typedef ue2::unordered_map > BouquetMap; + using BouquetMap = ue2_unordered_map>; BouquetMap bouquet; public: void insert(const EngineRef &h, RoseVertex v) { @@ -1331,7 +1331,7 @@ bool mergeRosePair(RoseBuildImpl &tbi, left_id &r1, left_id &r2, static void processMergeQueue(RoseBuildImpl &tbi, RoseBouquet &roses, priority_queue &pq) { - ue2::unordered_set dead; + unordered_set dead; DEBUG_PRINTF("merge queue has %zu entries\n", pq.size()); @@ -1862,7 +1862,7 @@ void mergeNfaLeftfixes(RoseBuildImpl &tbi, RoseBouquet &roses) { // We track the number of accelerable states for each graph in a map and // only recompute them when the graph is modified. - ue2::unordered_map accel_count; + unordered_map accel_count; for (const auto &rose : roses) { assert(rose.graph()->kind == NFA_INFIX); accel_count[rose] = estimatedAccelStates(tbi, *rose.graph()); @@ -2157,7 +2157,7 @@ void mergeSuffixes(RoseBuildImpl &tbi, SuffixBouquet &suffixes, // If this isn't an acyclic case, we track the number of accelerable states // for each graph in a map and only recompute them when the graph is // modified. - ue2::unordered_map accel_count; + unordered_map accel_count; if (!acyclic) { for (const auto &suffix : suffixes) { assert(suffix.graph() && suffix.graph()->kind == NFA_SUFFIX); @@ -2499,7 +2499,7 @@ private: template static void pairwiseDfaMerge(vector &dfas, - ue2::unordered_map &dfa_mapping, + unordered_map &dfa_mapping, vector &outfixes, MergeFunctor merge_func) { DEBUG_PRINTF("merging group of size %zu\n", dfas.size()); @@ -2541,7 +2541,7 @@ void pairwiseDfaMerge(vector &dfas, template static void chunkedDfaMerge(vector &dfas, - ue2::unordered_map &dfa_mapping, + unordered_map &dfa_mapping, vector &outfixes, MergeFunctor merge_func) { DEBUG_PRINTF("begin merge of %zu dfas\n", dfas.size()); @@ -2575,7 +2575,7 @@ void mergeOutfixDfas(RoseBuildImpl &tbi, vector &dfas) { /* key is index into outfix array as iterators, etc may be invalidated by * element addition. */ - ue2::unordered_map dfa_mapping; + unordered_map dfa_mapping; for (size_t i = 0; i < outfixes.size(); i++) { auto *rdfa = outfixes[i].rdfa(); if (rdfa) { @@ -2619,7 +2619,7 @@ void mergeOutfixCombo(RoseBuildImpl &tbi, const ReportManager &rm, /* key is index into outfix array as iterators, etc may be invalidated by * element addition. */ size_t new_dfas = 0; - ue2::unordered_map dfa_mapping; + unordered_map dfa_mapping; vector dfas; for (auto it = tbi.outfixes.begin(); it != tbi.outfixes.end(); ++it) { @@ -2670,7 +2670,7 @@ void mergeOutfixHaigs(RoseBuildImpl &tbi, vector &dfas, vector &outfixes = tbi.outfixes; - ue2::unordered_map dfa_mapping; + unordered_map dfa_mapping; for (size_t i = 0; i < outfixes.size(); i++) { auto *haig = outfixes[i].haig(); if (haig) { diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 01be11ef..839fd478 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -56,11 +56,9 @@ #include "ue2common.h" #include "grey.h" -#include #include using namespace std; -using boost::hash_combine; namespace ue2 { @@ -691,16 +689,7 @@ set all_tops(const suffix_id &s) { } size_t suffix_id::hash() const { - size_t val = 0; - hash_combine(val, g); - hash_combine(val, c); - hash_combine(val, d); - hash_combine(val, h); - return val; -} - -size_t hash_value(const suffix_id &s) { - return s.hash(); + return hash_all(g, c, d, h); } bool isAnchored(const left_id &r) { @@ -761,16 +750,7 @@ u32 num_tops(const left_id &r) { } size_t left_id::hash() const { - size_t val = 0; - hash_combine(val, g); - hash_combine(val, c); - hash_combine(val, d); - hash_combine(val, h); - return val; -} - -size_t hash_value(const left_id &r) { - return r.hash(); + return hash_all(g, c, d, h); } u64a findMaxOffset(const set &reports, const ReportManager &rm) { @@ -997,8 +977,8 @@ bool canImplementGraphs(const RoseBuildImpl &tbi) { bool hasOrphanedTops(const RoseBuildImpl &build) { const RoseGraph &g = build.g; - ue2::unordered_map > roses; - ue2::unordered_map > suffixes; + unordered_map> roses; + unordered_map> suffixes; for (auto v : vertices_range(g)) { if (g[v].left) { diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index 01bd7c54..8f350e29 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -41,6 +41,7 @@ #include "util/compile_context.h" #include "util/compile_error.h" #include "util/report_manager.h" +#include "util/unordered.h" #include "util/verify_types.h" #include @@ -226,7 +227,7 @@ size_t RoseProgramHash::operator()(const RoseProgram &program) const { size_t v = 0; for (const auto &ri : program) { assert(ri); - boost::hash_combine(v, ri->hash()); + hash_combine(v, ri->hash()); } return v; } @@ -1934,14 +1935,14 @@ void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 lit_id, namespace { struct ProgKey { - ProgKey(const RoseProgram &p) : prog(&p) { } + ProgKey(const RoseProgram &p) : prog(&p) {} bool operator==(const ProgKey &b) const { return RoseProgramEquivalence()(*prog, *b.prog); } - friend size_t hash_value(const ProgKey &a) { - return RoseProgramHash()(*a.prog); + size_t hash() const { + return RoseProgramHash()(*prog); } private: const RoseProgram *prog; @@ -1954,7 +1955,7 @@ RoseProgram assembleProgramBlocks(vector &&blocks_in) { vector blocks; blocks.reserve(blocks_in.size()); /* to ensure stable reference for seen */ - unordered_set seen; + ue2_unordered_set seen; for (auto &block : blocks_in) { if (contains(seen, block)) { continue; diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index afbaa36e..cc59303f 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -34,8 +34,8 @@ #include "util/bytecode_ptr.h" #include "util/hash.h" #include "util/make_unique.h" -#include "util/ue2_containers.h" +#include #include #include @@ -168,7 +168,7 @@ struct ProgramBuild : noncopyable { /** \brief Mapping from vertex to key, for vertices with a * CHECK_NOT_HANDLED instruction. */ - ue2::unordered_map handledKeys; + std::unordered_map handledKeys; /** \brief Mapping from Rose literal ID to anchored program index. */ std::map anchored_programs; @@ -178,7 +178,7 @@ struct ProgramBuild : noncopyable { /** \brief Mapping from every vertex to the groups that must be on for that * vertex to be reached. */ - ue2::unordered_map vertex_group_map; + std::unordered_map vertex_group_map; /** \brief Global bitmap of groups that can be squashed. */ rose_group squashable_groups = 0; @@ -239,13 +239,13 @@ struct engine_info { RoseProgram assembleProgramBlocks(std::vector &&blocks); RoseProgram makeLiteralProgram(const RoseBuildImpl &build, - const std::map &leftfix_info, - const std::map &suffixes, - const std::map &engine_info_by_queue, - const unordered_map &roleStateIndices, - ProgramBuild &prog_build, u32 lit_id, - const std::vector &lit_edges, - bool is_anchored_replay_program); + const std::map &leftfix_info, + const std::map &suffixes, + const std::map &engine_info_by_queue, + const std::unordered_map &roleStateIndices, + ProgramBuild &prog_build, u32 lit_id, + const std::vector &lit_edges, + bool is_anchored_replay_program); RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build, ProgramBuild &prog_build, diff --git a/src/rose/rose_build_role_aliasing.cpp b/src/rose/rose_build_role_aliasing.cpp index 0e78ec7d..b5e69ef9 100644 --- a/src/rose/rose_build_role_aliasing.cpp +++ b/src/rose/rose_build_role_aliasing.cpp @@ -45,16 +45,15 @@ #include "util/bitutils.h" #include "util/compile_context.h" #include "util/container.h" +#include "util/flat_containers.h" #include "util/graph.h" #include "util/graph_range.h" #include "util/hash.h" #include "util/order_check.h" -#include "util/ue2_containers.h" #include #include #include -#include #include #include @@ -154,7 +153,7 @@ public: private: /* if a vertex is worth storing, it is worth storing twice */ set main_cont; /* deterministic iterator */ - ue2::unordered_set hash_cont; /* member checks */ + unordered_set hash_cont; /* member checks */ }; struct RoseAliasingInfo { @@ -175,10 +174,10 @@ struct RoseAliasingInfo { } /** \brief Mapping from leftfix to vertices. */ - ue2::unordered_map> rev_leftfix; + unordered_map> rev_leftfix; /** \brief Mapping from undelayed ghost to delayed vertices. */ - ue2::unordered_map> rev_ghost; + unordered_map> rev_ghost; }; } // namespace @@ -787,7 +786,7 @@ void updateEdgeTops(RoseGraph &g, RoseVertex v, const map &top_map) { static void pruneUnusedTops(CastleProto &castle, const RoseGraph &g, const set &verts) { - ue2::unordered_set used_tops; + unordered_set used_tops; for (auto v : verts) { assert(g[v].left.castle.get() == &castle); @@ -818,7 +817,7 @@ void pruneUnusedTops(NGHolder &h, const RoseGraph &g, } assert(isCorrectlyTopped(h)); DEBUG_PRINTF("pruning unused tops\n"); - ue2::flat_set used_tops; + flat_set used_tops; for (auto v : verts) { assert(g[v].left.graph.get() == &h); @@ -1415,7 +1414,7 @@ void removeSingletonBuckets(vector> &buckets) { static void buildInvBucketMap(const vector> &buckets, - ue2::unordered_map &inv) { + unordered_map &inv) { inv.clear(); for (size_t i = 0; i < buckets.size(); i++) { for (auto v : buckets[i]) { @@ -1469,7 +1468,7 @@ void splitByReportSuffixBehaviour(const RoseGraph &g, vector> &buckets) { // Split by report set and suffix info. auto make_split_key = [&g](RoseVertex v) { - return hash_all(g[v].reports, g[v].suffix); + return hash_all(g[v].reports, suffix_id(g[v].suffix)); }; splitAndFilterBuckets(buckets, make_split_key); } @@ -1483,14 +1482,15 @@ void splitByLiteralTable(const RoseBuildImpl &build, auto make_split_key = [&](RoseVertex v) { const auto &lits = g[v].literals; assert(!lits.empty()); - return build.literals.at(*lits.begin()).table; + auto table = build.literals.at(*lits.begin()).table; + return std::underlying_type::type(table); }; splitAndFilterBuckets(buckets, make_split_key); } static void splitByNeighbour(const RoseGraph &g, vector> &buckets, - ue2::unordered_map &inv, bool succ) { + unordered_map &inv, bool succ) { vector> extras; map> neighbours_by_bucket; set picked; @@ -1575,7 +1575,7 @@ splitDiamondMergeBuckets(CandidateSet &candidates, const RoseBuildImpl &build) { } // Neighbour splits require inverse map. - ue2::unordered_map inv; + unordered_map inv; buildInvBucketMap(buckets, inv); splitByNeighbour(g, buckets, inv, true); diff --git a/src/rose/rose_graph.h b/src/rose/rose_graph.h index b7e092bb..d1181063 100644 --- a/src/rose/rose_graph.h +++ b/src/rose/rose_graph.h @@ -43,7 +43,7 @@ #include "nfa/nfa_internal.h" // for MO_INVALID_IDX #include "util/charreach.h" #include "util/depth.h" -#include "util/ue2_containers.h" +#include "util/flat_containers.h" #include "util/ue2_graph.h" #include diff --git a/src/rose/rose_in_graph.h b/src/rose/rose_in_graph.h index 42c59932..ed4644ae 100644 --- a/src/rose/rose_in_graph.h +++ b/src/rose/rose_in_graph.h @@ -45,7 +45,7 @@ #include "ue2common.h" #include "rose/rose_common.h" -#include "util/ue2_containers.h" +#include "util/flat_containers.h" #include "util/ue2_graph.h" #include "util/ue2string.h" diff --git a/src/rose/rose_in_util.cpp b/src/rose/rose_in_util.cpp index 3b31b38e..9fe47c27 100644 --- a/src/rose/rose_in_util.cpp +++ b/src/rose/rose_in_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,7 +36,6 @@ #include "util/container.h" #include "util/graph_range.h" #include "util/make_unique.h" -#include "util/ue2_containers.h" #include diff --git a/src/som/slot_manager.cpp b/src/som/slot_manager.cpp index 3dc74d3d..d97e8fc1 100644 --- a/src/som/slot_manager.cpp +++ b/src/som/slot_manager.cpp @@ -40,6 +40,7 @@ #include "nfagraph/ng_som_util.h" #include "nfagraph/ng_region.h" #include "util/charreach.h" +#include "util/hash.h" #include "util/make_unique.h" #include "util/dump_charclass.h" #include "util/verify_types.h" @@ -48,8 +49,6 @@ #include #include -#include - using namespace std; namespace ue2 { @@ -67,13 +66,8 @@ SlotCacheEntry::SlotCacheEntry(const NGHolder &prefix_in, size_t SlotEntryHasher::operator()(const SlotCacheEntry &e) const { assert(e.prefix); - using boost::hash_combine; - - size_t v = 0; - hash_combine(v, hash_holder(*e.prefix)); - hash_combine(v, e.parent_slot); - hash_combine(v, e.is_reset); - hash_combine(v, e.escapes.hash()); + size_t v = hash_all(hash_holder(*e.prefix), e.parent_slot, + e.is_reset, e.escapes); DEBUG_PRINTF("%zu vertices, parent_slot=%u, escapes=%s, is_reset=%d " "hashes to %zx\n", num_vertices(*e.prefix), e.parent_slot, @@ -143,7 +137,7 @@ u32 SomSlotManager::getSomSlot(const NGHolder &prefix, u32 SomSlotManager::getInitialResetSomSlot(const NGHolder &prefix, const NGHolder &g, - const ue2::unordered_map ®ion_map, + const unordered_map ®ion_map, u32 last_sent_region, bool *prefix_already_implemented) { DEBUG_PRINTF("getting initial reset; last sent region %u\n", last_sent_region); @@ -171,9 +165,9 @@ u32 SomSlotManager::getInitialResetSomSlot(const NGHolder &prefix, // Clone a copy of g (and its region map) that we will be able to store // later on. shared_ptr gg = make_shared(); - ue2::unordered_map orig_to_copy; + unordered_map orig_to_copy; cloneHolder(*gg, g, &orig_to_copy); - ue2::unordered_map gg_region_map; + unordered_map gg_region_map; for (const auto &m : region_map) { assert(contains(region_map, m.first)); gg_region_map.emplace(orig_to_copy.at(m.first), m.second); diff --git a/src/som/slot_manager.h b/src/som/slot_manager.h index ddb105f5..e5b2d794 100644 --- a/src/som/slot_manager.h +++ b/src/som/slot_manager.h @@ -38,10 +38,10 @@ #include "nfagraph/ng_holder.h" #include "util/bytecode_ptr.h" #include "util/noncopyable.h" -#include "util/ue2_containers.h" #include #include +#include struct NFA; @@ -69,7 +69,7 @@ public: /** prefix must be acting as a resetting sentinel and should be a dag (if * not how are we establish som?) */ u32 getInitialResetSomSlot(const NGHolder &prefix, const NGHolder &g, - const ue2::unordered_map ®ion_map, + const std::unordered_map ®ion_map, u32 last_sent_region, bool *prefix_already_implemented); diff --git a/src/som/slot_manager_internal.h b/src/som/slot_manager_internal.h index 46bfbe83..7e1fecc7 100644 --- a/src/som/slot_manager_internal.h +++ b/src/som/slot_manager_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,10 +32,11 @@ #include "nfagraph/ng.h" #include "nfagraph/ng_is_equal.h" #include "util/charreach.h" -#include "util/ue2_containers.h" #include "ue2common.h" #include +#include +#include #include namespace ue2 { @@ -43,14 +44,14 @@ namespace ue2 { struct InitialResetEntry { InitialResetEntry(std::shared_ptr sent_in, std::shared_ptr body_in, - const ue2::unordered_map &body_regions_in, + const std::unordered_map &body_regions_in, u32 sent_region_in, u32 first_bad_region_in) : sent(sent_in), body(body_in), body_regions(body_regions_in), sent_region(sent_region_in), first_bad_region(first_bad_region_in) {} std::shared_ptr sent; std::shared_ptr body; - ue2::unordered_map body_regions; + std::unordered_map body_regions; u32 sent_region; u32 first_bad_region; /* ~0U if it must cover the whole g */ }; @@ -85,7 +86,7 @@ struct SlotEntryEqual { }; struct SlotCache { - typedef ue2::unordered_set CacheStore; void insert(const NGHolder &prefix, const CharReach &escapes, @@ -96,8 +97,8 @@ struct SlotCache { CacheStore store; - ue2::unordered_set, NGHolderHasher, - NGHolderEqual> initial_prefixes; + std::unordered_set, NGHolderHasher, + NGHolderEqual> initial_prefixes; std::vector initial_resets; }; diff --git a/src/util/accel_scheme.h b/src/util/accel_scheme.h index f524fe93..2a067b30 100644 --- a/src/util/accel_scheme.h +++ b/src/util/accel_scheme.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,7 +30,7 @@ #define ACCEL_SCHEME_H #include "util/charreach.h" -#include "util/ue2_containers.h" +#include "util/flat_containers.h" #include @@ -39,7 +39,7 @@ namespace ue2 { #define MAX_ACCEL_DEPTH 4 struct AccelScheme { - flat_set > double_byte; + flat_set> double_byte; CharReach cr = CharReach::dot(); CharReach double_cr; u32 offset = MAX_ACCEL_DEPTH + 1; diff --git a/src/util/bitfield.h b/src/util/bitfield.h index a71c1f88..24c0c580 100644 --- a/src/util/bitfield.h +++ b/src/util/bitfield.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,12 +36,12 @@ #include "ue2common.h" #include "popcount.h" #include "util/bitutils.h" +#include "util/hash.h" #include #include #include -#include namespace ue2 { @@ -373,7 +373,7 @@ public: /// Simple hash. size_t hash() const { - return boost::hash_range(std::begin(bits), std::end(bits)); + return ue2_hasher()(bits); } /// Sentinel value meaning "no more bits", used by find_first and @@ -420,12 +420,17 @@ private: std::array bits; }; -/** \brief Boost-style hash free function. */ -template -size_t hash_value(const bitfield &b) { - return b.hash(); -} - } // namespace ue2 +namespace std { + +template +struct hash> { + size_t operator()(const ue2::bitfield &b) const { + return b.hash(); + } +}; + +} // namespace std + #endif // BITFIELD_H diff --git a/src/util/charreach.h b/src/util/charreach.h index 53f2a5d2..f6d3a2af 100644 --- a/src/util/charreach.h +++ b/src/util/charreach.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -196,12 +196,17 @@ bool isSubsetOf(const CharReach &small, const CharReach &big); bool isutf8ascii(const CharReach &cr); bool isutf8start(const CharReach &cr); -/** \brief Boost-style hash free function. */ -static really_inline -size_t hash_value(const CharReach &cr) { - return cr.hash(); -} - } // namespace ue2 +namespace std { + +template<> +struct hash { + size_t operator()(const ue2::CharReach &cr) const { + return cr.hash(); + } +}; + +} // namespace std + #endif // NG_CHARREACH_H diff --git a/src/util/clique.cpp b/src/util/clique.cpp index 79f06932..c2befea4 100644 --- a/src/util/clique.cpp +++ b/src/util/clique.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,7 +34,6 @@ #include "container.h" #include "graph_range.h" #include "make_unique.h" -#include "ue2_containers.h" #include #include diff --git a/src/util/depth.h b/src/util/depth.h index 9af1ded8..5305c6f1 100644 --- a/src/util/depth.h +++ b/src/util/depth.h @@ -221,8 +221,8 @@ public: std::string str() const; #endif - friend size_t hash_value(const depth &d) { - return d.val; + size_t hash() const { + return val; } private: @@ -260,10 +260,6 @@ struct DepthMinMax : totally_ordered { }; -inline size_t hash_value(const DepthMinMax &d) { - return hash_all(d.min, d.max); -} - /** * \brief Merge two DepthMinMax values together to produce their union. */ @@ -271,4 +267,22 @@ DepthMinMax unionDepthMinMax(const DepthMinMax &a, const DepthMinMax &b); } // namespace ue2 +namespace std { + +template<> +struct hash { + size_t operator()(const ue2::depth &d) const { + return d.hash(); + } +}; + +template<> +struct hash { + size_t operator()(const ue2::DepthMinMax &d) const { + return hash_all(d.min, d.max); + } +}; + +} // namespace + #endif // DEPTH_H diff --git a/src/util/ue2_containers.h b/src/util/flat_containers.h similarity index 96% rename from src/util/ue2_containers.h rename to src/util/flat_containers.h index d345a4fa..41452eb4 100644 --- a/src/util/ue2_containers.h +++ b/src/util/flat_containers.h @@ -26,10 +26,11 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef UTIL_UE2_CONTAINERS_H_ -#define UTIL_UE2_CONTAINERS_H_ +#ifndef UTIL_FLAT_CONTAINERS_H +#define UTIL_FLAT_CONTAINERS_H #include "ue2common.h" +#include "util/hash.h" #include "util/operators.h" #include "util/small_vector.h" @@ -38,19 +39,10 @@ #include #include -#include #include -#include -#include namespace ue2 { -/** \brief Unordered set container implemented internally as a hash table. */ -using boost::unordered_set; - -/** \brief Unordered map container implemented internally as a hash table. */ -using boost::unordered_map; - namespace flat_detail { // Iterator facade that wraps an underlying iterator, so that we get our @@ -363,11 +355,6 @@ public: friend void swap(flat_set &a, flat_set &b) { a.swap(b); } - - // Free hash function. - friend size_t hash_value(const flat_set &a) { - return boost::hash_range(a.begin(), a.end()); - } }; /** @@ -652,13 +639,26 @@ public: friend void swap(flat_map &a, flat_map &b) { a.swap(b); } +}; - // Free hash function. - friend size_t hash_value(const flat_map &a) { - return boost::hash_range(a.begin(), a.end()); +} // namespace ue2 + +namespace std { + +template +struct hash> { + size_t operator()(const ue2::flat_set &f) { + return ue2::ue2_hasher()(f); } }; -} // namespace +template +struct hash> { + size_t operator()(const ue2::flat_map &f) { + return ue2::ue2_hasher()(f); + } +}; -#endif // UTIL_UE2_CONTAINERS_H_ +} // namespace std + +#endif // UTIL_FLAT_CONTAINERS_H diff --git a/src/util/graph.h b/src/util/graph.h index 39e86487..9d6bb955 100644 --- a/src/util/graph.h +++ b/src/util/graph.h @@ -35,8 +35,9 @@ #include "container.h" #include "ue2common.h" +#include "util/flat_containers.h" #include "util/graph_range.h" -#include "util/ue2_containers.h" +#include "util/unordered.h" #include #include @@ -115,7 +116,7 @@ bool has_proper_successor(const typename Graph::vertex_descriptor &v, template void find_reachable(const Graph &g, const SourceCont &sources, OutCont *out) { using vertex_descriptor = typename Graph::vertex_descriptor; - ue2::unordered_map colours; + std::unordered_map colours; for (auto v : sources) { boost::depth_first_visit(g, v, @@ -133,7 +134,7 @@ void find_reachable(const Graph &g, const SourceCont &sources, OutCont *out) { template void find_unreachable(const Graph &g, const SourceCont &sources, OutCont *out) { using vertex_descriptor = typename Graph::vertex_descriptor; - ue2::unordered_set reachable; + std::unordered_set reachable; find_reachable(g, sources, &reachable); @@ -182,7 +183,8 @@ find_vertices_in_cycles(const Graph &g) { template bool has_parallel_edge(const Graph &g) { using vertex_descriptor = typename Graph::vertex_descriptor; - ue2::unordered_set> seen; + ue2_unordered_set> seen; + for (const auto &e : edges_range(g)) { auto u = source(e, g); auto v = target(e, g); diff --git a/src/util/hash.h b/src/util/hash.h index 6f76e43d..1c35d20c 100644 --- a/src/util/hash.h +++ b/src/util/hash.h @@ -34,16 +34,126 @@ #ifndef UTIL_HASH_H #define UTIL_HASH_H -#include -#include +#include +#include +#include namespace ue2 { namespace hash_detail { +inline +void hash_combine_impl(size_t &seed, size_t value) { + // Note: constants explicitly truncated on 32-bit platforms. + const size_t a = (size_t)0x0b4e0ef37bc32127ULL; + const size_t b = (size_t)0x318f07b0c8eb9be9ULL; + seed ^= value * a; + seed += b; +} + +/** \brief Helper that determines whether std::begin() exists for T. */ +template +struct is_container_check { +private: + template + static auto has_begin_function(const C &obj) -> decltype(std::begin(obj)) { + return std::begin(obj); + } + static void has_begin_function(...) { + return; + } + using has_begin_type = decltype(has_begin_function(std::declval())); + +public: + static const bool value = !std::is_void::value; +}; + +/** \brief Type trait to enable on whether T is a container. */ +template +struct is_container + : public ::std::integral_constant::value> {}; + +/** \brief Helper that determines whether T::hash() exists. */ +template +struct has_hash_member_check { +private: + template + static auto has_hash_member_function(const C &obj) -> decltype(obj.hash()) { + return obj.hash(); + } + static void has_hash_member_function(...) { + return; + } + using has_hash = decltype(has_hash_member_function(std::declval())); + +public: + static const bool value = !std::is_void::value; +}; + +/** \brief Type trait to enable on whether T::hash() exists. */ +template +struct has_hash_member + : public ::std::integral_constant::value> {}; + +/** \brief Default hash: falls back to std::hash. */ +template +struct ue2_hash { + using decayed_type = typename std::decay::type; + size_t operator()(const T &obj) const { + return std::hash()(obj); + } +}; + +/** \brief Hash for std::pair. */ +template +struct ue2_hash, void> { + size_t operator()(const std::pair &p) const { + size_t v = 0; + hash_combine_impl(v, ue2_hash()(p.first)); + hash_combine_impl(v, ue2_hash()(p.second)); + return v; + } +}; + +/** \brief Hash for any type that has a hash() member function. */ +template +struct ue2_hash::value>::type> { + size_t operator()(const T &obj) const { + return obj.hash(); + } +}; + +/** \brief Hash for any container type that supports std::begin(). */ +template +struct ue2_hash::value && + !has_hash_member::value>::type> { + size_t operator()(const T &obj) const { + size_t v = 0; + for (const auto &elem : obj) { + using element_type = typename std::decay::type; + hash_combine_impl(v, ue2_hash()(elem)); + } + return v; + } +}; + +/** \brief Hash for enum types. */ +template +struct ue2_hash::value>::type> { + size_t operator()(const T &obj) const { + using utype = typename std::underlying_type::type; + return ue2_hash()(static_cast(obj)); + } +}; + +template +void hash_combine(size_t &seed, const T &obj) { + hash_combine_impl(seed, ue2_hash()(obj)); +} + template void hash_build(size_t &v, const T &obj) { - boost::hash_combine(v, obj); + hash_combine(v, obj); } template @@ -54,6 +164,21 @@ void hash_build(size_t &v, const T &obj, Args&&... args) { } // namespace hash_detail +using hash_detail::hash_combine; + +/** + * \brief Hasher for general use. + * + * Provides operators for most standard containers and falls back to + * std::hash. + */ +struct ue2_hasher { + template + size_t operator()(const T &obj) const { + return hash_detail::ue2_hash()(obj); + } +}; + /** * \brief Computes the combined hash of all its arguments. * @@ -70,15 +195,6 @@ size_t hash_all(Args&&... args) { return v; } -/** - * \brief Compute the hash of all the elements of any range on which we can - * call std::begin() and std::end(). - */ -template -size_t hash_range(const Range &r) { - return boost::hash_range(std::begin(r), std::end(r)); -} - } // namespace ue2 #endif // UTIL_HASH_H diff --git a/src/util/hash_dynamic_bitset.h b/src/util/hash_dynamic_bitset.h index 315aed34..65bc29c3 100644 --- a/src/util/hash_dynamic_bitset.h +++ b/src/util/hash_dynamic_bitset.h @@ -34,8 +34,9 @@ #ifndef UTIL_HASH_DYNAMIC_BITSET_H #define UTIL_HASH_DYNAMIC_BITSET_H +#include "hash.h" + #include -#include #include @@ -68,7 +69,7 @@ struct hash_output_it { template void operator=(const T &val) const { - boost::hash_combine(*out, val); + hash_combine(*out, val); } private: diff --git a/src/util/multibit_build.h b/src/util/multibit_build.h index 2d7b5fc2..ba5c8dfa 100644 --- a/src/util/multibit_build.h +++ b/src/util/multibit_build.h @@ -43,10 +43,16 @@ bool operator==(const mmbit_sparse_iter &a, const mmbit_sparse_iter &b) { return a.mask == b.mask && a.val == b.val; } -inline -size_t hash_value(const mmbit_sparse_iter &iter) { - return ue2::hash_all(iter.mask, iter.val); -} +namespace std { + +template<> +struct hash { + size_t operator()(const mmbit_sparse_iter &iter) const { + return ue2::hash_all(iter.mask, iter.val); + } +}; + +} // namespace std namespace ue2 { diff --git a/src/util/partitioned_set.h b/src/util/partitioned_set.h index a9e4644d..41710fe7 100644 --- a/src/util/partitioned_set.h +++ b/src/util/partitioned_set.h @@ -31,7 +31,7 @@ #include "container.h" #include "noncopyable.h" -#include "ue2_containers.h" +#include "flat_containers.h" #include "ue2common.h" #include diff --git a/src/util/report.h b/src/util/report.h index a8e233ff..0d5e69b8 100644 --- a/src/util/report.h +++ b/src/util/report.h @@ -206,13 +206,6 @@ bool operator==(const Report &a, const Report &b) { a.topSquashDistance == b.topSquashDistance; } -inline -size_t hash_value(const Report &r) { - return hash_all(r.type, r.quashSom, r.minOffset, r.maxOffset, r.minLength, - r.ekey, r.offsetAdjust, r.onmatch, r.revNfaIndex, - r.somDistance, r.topSquashDistance); -} - static inline Report makeECallback(u32 report, s32 offsetAdjust, u32 ekey) { Report ir(EXTERNAL_CALLBACK, report); @@ -262,6 +255,19 @@ bool isSimpleExhaustible(const Report &ir) { return true; } -} // namespace +} // namespace ue2 + +namespace std { + +template<> +struct hash { + std::size_t operator()(const ue2::Report &r) const { + return ue2::hash_all(r.type, r.quashSom, r.minOffset, r.maxOffset, + r.minLength, r.ekey, r.offsetAdjust, r.onmatch, + r.revNfaIndex, r.somDistance, r.topSquashDistance); + } +}; + +} // namespace std #endif // UTIL_REPORT_H diff --git a/src/util/report_manager.h b/src/util/report_manager.h index 95e14a2c..aa359ed7 100644 --- a/src/util/report_manager.h +++ b/src/util/report_manager.h @@ -38,10 +38,10 @@ #include "util/compile_error.h" #include "util/noncopyable.h" #include "util/report.h" -#include "util/ue2_containers.h" #include #include +#include #include namespace ue2 { @@ -131,17 +131,17 @@ private: /** \brief Mapping from Report to ID (inverse of \ref reportIds * vector). */ - unordered_map reportIdToInternalMap; + std::unordered_map reportIdToInternalMap; /** \brief Mapping from ReportID to dedupe key. */ - unordered_map reportIdToDedupeKey; + std::unordered_map reportIdToDedupeKey; /** \brief Mapping from ReportID to Rose program offset in bytecode. */ - unordered_map reportIdToProgramOffset; + std::unordered_map reportIdToProgramOffset; /** \brief Mapping from external match ids to information about that * id. */ - unordered_map externalIdMap; + std::unordered_map externalIdMap; /** \brief Mapping from expression index to exhaustion key. */ std::map toExhaustibleKeyMap; diff --git a/src/util/ue2_graph.h b/src/util/ue2_graph.h index 1409e091..bf719fd7 100644 --- a/src/util/ue2_graph.h +++ b/src/util/ue2_graph.h @@ -34,7 +34,6 @@ #include "util/noncopyable.h" #include "util/operators.h" -#include #include /* vertex_index_t, ... */ #include /* no_property */ #include @@ -42,7 +41,9 @@ #include #include +#include /* hash */ #include /* tie */ +#include /* is_same, etc */ #include /* pair, declval */ /* @@ -187,9 +188,8 @@ public: } bool operator==(const vertex_descriptor b) const { return p == b.p; } - friend size_t hash_value(vertex_descriptor v) { - using boost::hash_value; - return hash_value(v.serial); + size_t hash() const { + return std::hash()(serial); } private: @@ -227,9 +227,8 @@ public: } bool operator==(const edge_descriptor b) const { return p == b.p; } - friend size_t hash_value(edge_descriptor e) { - using boost::hash_value; - return hash_value(e.serial); + size_t hash() const { + return std::hash()(serial); } private: @@ -1288,7 +1287,7 @@ edge_index_upper_bound(const Graph &g) { using boost::vertex_index; using boost::edge_index; -} +} // namespace ue2 namespace boost { @@ -1305,5 +1304,29 @@ struct property_map())) const_type; }; -} +} // namespace boost + +namespace std { + +/* Specialization of std::hash so that vertex_descriptor can be used in + * unordered containers. */ +template +struct hash> { + using vertex_descriptor = ue2::graph_detail::vertex_descriptor; + std::size_t operator()(const vertex_descriptor &v) const { + return v.hash(); + } +}; + +/* Specialization of std::hash so that edge_descriptor can be used in + * unordered containers. */ +template +struct hash> { + using edge_descriptor = ue2::graph_detail::edge_descriptor; + std::size_t operator()(const edge_descriptor &e) const { + return e.hash(); + } +}; + +} // namespace std #endif diff --git a/src/util/ue2string.cpp b/src/util/ue2string.cpp index bde975ad..02d7b713 100644 --- a/src/util/ue2string.cpp +++ b/src/util/ue2string.cpp @@ -29,11 +29,14 @@ /** \file * \brief Tools for string manipulation, ue2_literal definition. */ -#include "charreach.h" -#include "compare.h" + #include "ue2string.h" +#include "charreach.h" +#include "compare.h" + #include +#include #include #include #include diff --git a/src/util/ue2string.h b/src/util/ue2string.h index d9fbadcd..9eef65da 100644 --- a/src/util/ue2string.h +++ b/src/util/ue2string.h @@ -208,14 +208,6 @@ private: std::vector nocase; /* for trolling value */ }; -inline -size_t hash_value(const ue2_literal::elem &elem) { - return hash_all(elem.c, elem.nocase); -} - -inline -size_t hash_value(const ue2_literal &lit) { return hash_range(lit); } - /// Return a reversed copy of this literal. ue2_literal reverse_literal(const ue2_literal &in); @@ -314,4 +306,22 @@ std::string escapeString(const ue2_literal &lit); } // namespace ue2 +namespace std { + +template<> +struct hash { + size_t operator()(const ue2::ue2_literal::elem &elem) const { + return ue2::hash_all(elem.c, elem.nocase); + } +}; + +template<> +struct hash { + size_t operator()(const ue2::ue2_literal &lit) const { + return ue2::ue2_hasher()(lit); + } +}; + +} // namespace std + #endif diff --git a/src/util/unordered.h b/src/util/unordered.h new file mode 100644 index 00000000..a8aa61cd --- /dev/null +++ b/src/util/unordered.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UTIL_UNORDERED_H +#define UTIL_UNORDERED_H + +/** + * \file + * \brief Unordered set and map containers that default to using our own hasher. + */ + +#include "hash.h" + +#include +#include + +namespace ue2 { + +template +using ue2_unordered_set = std::unordered_set; + +template +using ue2_unordered_map = std::unordered_map; + +} // namespace ue2 + + +#endif // UTIL_UNORDERED_H diff --git a/unit/internal/bitfield.cpp b/unit/internal/bitfield.cpp index e5c5f0ce..40087ef7 100644 --- a/unit/internal/bitfield.cpp +++ b/unit/internal/bitfield.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,10 +30,11 @@ #include "gtest/gtest.h" #include "util/bitfield.h" -#include "util/ue2_containers.h" #include +#include +using namespace std; using namespace ue2; template @@ -393,9 +394,9 @@ TYPED_TEST(BitfieldTest, find_nth_sparse) { TYPED_TEST(BitfieldTest, unordered_set) { const size_t size = TypeParam::size(); - // Exercise the hash_value free function by adding bitfields to an + // Exercise the hash specialisation by adding bitfields to an // unordered_set. - ue2::unordered_set s; + unordered_set s; s.reserve(size); for (size_t i = 0; i < size; ++i) { diff --git a/unit/internal/depth.cpp b/unit/internal/depth.cpp index ad9ffe38..726aa92c 100644 --- a/unit/internal/depth.cpp +++ b/unit/internal/depth.cpp @@ -29,9 +29,11 @@ #include "config.h" #include "util/depth.h" -#include "util/ue2_containers.h" #include "gtest/gtest.h" +#include + +using namespace std; using namespace ue2; static UNUSED @@ -265,7 +267,7 @@ TEST(depth, u64a_operators) { } TEST(depth, unordered_set) { - ue2::unordered_set depths; + unordered_set depths; for (const auto &val : finite_values) { depths.emplace(val); diff --git a/unit/internal/flat_map.cpp b/unit/internal/flat_map.cpp index 6a81bbfe..610c71e1 100644 --- a/unit/internal/flat_map.cpp +++ b/unit/internal/flat_map.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -28,7 +28,7 @@ #include "config.h" -#include "util/ue2_containers.h" +#include "util/flat_containers.h" #include "ue2common.h" #include "gtest/gtest.h" @@ -403,6 +403,11 @@ TEST(flat_map, max_size) { ASSERT_LE(1ULL << 24, f.max_size()); } +template +size_t hash_value(const FlatMap &f) { + return std::hash()(f); +} + TEST(flat_map, hash_value) { const vector> input = { {0, 0}, {3, 1}, {76, 2}, {132, 3}, {77, 4}, {99999, 5}, {100, 6}}; diff --git a/unit/internal/flat_set.cpp b/unit/internal/flat_set.cpp index 3bee0edb..10607a6f 100644 --- a/unit/internal/flat_set.cpp +++ b/unit/internal/flat_set.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -28,7 +28,7 @@ #include "config.h" -#include "util/ue2_containers.h" +#include "util/flat_containers.h" #include "ue2common.h" #include "gtest/gtest.h" @@ -393,6 +393,11 @@ TEST(flat_set, max_size) { ASSERT_LE(1ULL << 24, f.max_size()); } +template +size_t hash_value(const FlatSet &f) { + return std::hash()(f); +} + TEST(flat_set, hash_value) { const vector input = {0, 15, 3, 1, 20, 32768, 24000000, 17, 100, 101, 104, 99999}; diff --git a/unit/internal/nfagraph_util.cpp b/unit/internal/nfagraph_util.cpp index b6952f5a..e6a58b55 100644 --- a/unit/internal/nfagraph_util.cpp +++ b/unit/internal/nfagraph_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -79,8 +79,8 @@ TEST(NFAGraph, split1) { NFAVertex pivot = c; - ue2::unordered_map lhs_map; - ue2::unordered_map rhs_map; + unordered_map lhs_map; + unordered_map rhs_map; splitGraph(src, pivot, &lhs, &lhs_map, &rhs, &rhs_map); @@ -130,8 +130,8 @@ TEST(NFAGraph, split2) { NFAVertex pivot = c; - ue2::unordered_map lhs_map; - ue2::unordered_map rhs_map; + unordered_map lhs_map; + unordered_map rhs_map; splitGraph(src, pivot, &lhs, &lhs_map, &rhs, &rhs_map); @@ -203,8 +203,8 @@ TEST(NFAGraph, split3) { pivots.push_back(d); pivots.push_back(g); - ue2::unordered_map lhs_map; - ue2::unordered_map rhs_map; + unordered_map lhs_map; + unordered_map rhs_map; splitGraph(src, pivots, &lhs, &lhs_map, &rhs, &rhs_map); @@ -280,8 +280,8 @@ TEST(NFAGraph, split4) { pivots.push_back(d); pivots.push_back(g); - ue2::unordered_map lhs_map; - ue2::unordered_map rhs_map; + unordered_map lhs_map; + unordered_map rhs_map; splitGraph(src, pivots, &lhs, &lhs_map, &rhs, &rhs_map); diff --git a/unit/internal/rose_build_merge.cpp b/unit/internal/rose_build_merge.cpp index 291c241a..ed7c2bdc 100644 --- a/unit/internal/rose_build_merge.cpp +++ b/unit/internal/rose_build_merge.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,7 +42,11 @@ #include "smallwrite/smallwrite_build.h" #include "som/slot_manager.h" -using std::vector; +#include +#include +#include + +using namespace std; using namespace ue2; static @@ -78,7 +82,7 @@ RoseVertex addVertex(RoseBuildImpl &build, RoseVertex parent, u32 lit_id) { static size_t numUniqueSuffixGraphs(const RoseGraph &g) { - ue2::unordered_set seen; + unordered_set seen; for (const auto &v : vertices_range(g)) { if (g[v].suffix) { diff --git a/util/ng_corpus_generator.cpp b/util/ng_corpus_generator.cpp index 19ab7edf..c5fad785 100644 --- a/util/ng_corpus_generator.cpp +++ b/util/ng_corpus_generator.cpp @@ -42,7 +42,6 @@ #include "util/container.h" #include "util/graph_range.h" #include "util/make_unique.h" -#include "util/ue2_containers.h" #include "util/ue2string.h" #include "util/unicode_def.h" #include "util/unicode_set.h" @@ -52,6 +51,7 @@ #include #include #include +#include #include #include @@ -143,7 +143,7 @@ void findPaths(const NGHolder &g, CorpusProperties &cProps, vector> open; open.push_back(ue2::make_unique(1, g.start)); - ue2::unordered_set one_way_in; + unordered_set one_way_in; for (const auto &v : vertices_range(g)) { if (in_degree(v, g) <= 1) { one_way_in.insert(v); diff --git a/util/ng_find_matches.cpp b/util/ng_find_matches.cpp index 97a18375..0a896f73 100644 --- a/util/ng_find_matches.cpp +++ b/util/ng_find_matches.cpp @@ -41,6 +41,7 @@ #include "util/compare.h" #include "util/report.h" #include "util/report_manager.h" +#include "util/unordered.h" #include @@ -770,7 +771,7 @@ struct EdgeCache { } private: - unordered_map, NFAEdge> cache; + ue2_unordered_map, NFAEdge> cache; }; struct fmstate { From 33823d60d17107054bfe672ea1db8f3034684833 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 19 Jul 2017 11:20:39 +1000 Subject: [PATCH 101/190] tidy: "ue2::flat_set/map" -> "flat_set/map" --- src/nfa/castlecompile.cpp | 2 +- src/nfa/goughcompile_internal.h | 16 +++++++-------- src/nfagraph/ng_fixed_width.cpp | 6 +++--- src/nfagraph/ng_holder.h | 2 +- src/nfagraph/ng_limex_accel.cpp | 2 +- src/nfagraph/ng_limex_accel.h | 3 +-- src/nfagraph/ng_literal_decorated.cpp | 4 ++-- src/nfagraph/ng_repeat.cpp | 4 ++-- src/nfagraph/ng_small_literal_set.cpp | 8 ++++---- src/nfagraph/ng_som.cpp | 6 +++--- src/nfagraph/ng_util.cpp | 4 ++-- src/parser/Parser.rl | 2 +- src/parser/check_refs.cpp | 2 +- src/rose/rose_build_add.cpp | 4 ++-- src/rose/rose_build_add_internal.h | 9 ++++----- src/rose/rose_build_add_mask.cpp | 29 +++++++++++++-------------- src/rose/rose_build_compile.cpp | 4 ++-- src/rose/rose_build_dedupe.cpp | 6 +++--- src/rose/rose_build_exclusive.cpp | 7 ++++--- src/rose/rose_build_merge.cpp | 4 ++-- src/util/graph.h | 4 ++-- src/util/partitioned_set.h | 5 ++--- src/util/report_manager.cpp | 2 +- 23 files changed, 65 insertions(+), 70 deletions(-) diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index 3505e08a..b709d3c5 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -883,7 +883,7 @@ bool is_equal(const CastleProto &c1, const CastleProto &c2) { } bool requiresDedupe(const CastleProto &proto, - const ue2::flat_set &reports) { + const flat_set &reports) { for (const auto &report : reports) { auto it = proto.report_map.find(report); if (it == end(proto.report_map)) { diff --git a/src/nfa/goughcompile_internal.h b/src/nfa/goughcompile_internal.h index 9de88c77..e6454052 100644 --- a/src/nfa/goughcompile_internal.h +++ b/src/nfa/goughcompile_internal.h @@ -106,10 +106,10 @@ struct GoughSSAVarJoin; struct GoughSSAVar : noncopyable { GoughSSAVar(void) : seen(false), slot(INVALID_SLOT) {} virtual ~GoughSSAVar(); - const ue2::flat_set &get_inputs() const { + const flat_set &get_inputs() const { return inputs; } - const ue2::flat_set &get_outputs() const { + const flat_set &get_outputs() const { return outputs; } virtual void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) = 0; @@ -127,8 +127,8 @@ struct GoughSSAVar : noncopyable { clear_outputs(); } protected: - ue2::flat_set inputs; - ue2::flat_set outputs; + flat_set inputs; + flat_set outputs; friend struct GoughSSAVarWithInputs; friend struct GoughSSAVarMin; friend struct GoughSSAVarJoin; @@ -184,16 +184,14 @@ struct GoughSSAVarJoin : public GoughSSAVarWithInputs { void add_input(GoughSSAVar *v, GoughEdge prev); - const ue2::flat_set &get_edges_for_input(GoughSSAVar *input) - const; - const std::map > &get_input_map() - const; + const flat_set &get_edges_for_input(GoughSSAVar *input) const; + const std::map> &get_input_map() const; protected: void remove_input_raw(GoughSSAVar *v) override; private: - std::map> input_map; + std::map> input_map; }; struct gough_accel_state_info { diff --git a/src/nfagraph/ng_fixed_width.cpp b/src/nfagraph/ng_fixed_width.cpp index 978dad44..8fb264d8 100644 --- a/src/nfagraph/ng_fixed_width.cpp +++ b/src/nfagraph/ng_fixed_width.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -48,7 +48,7 @@ namespace ue2 { static bool findMask(const NGHolder &g, vector *mask, bool *anchored, - ue2::flat_set *reports) { + flat_set *reports) { DEBUG_PRINTF("looking for a mask pattern\n"); set s_succ; insert(&s_succ, adjacent_vertices(g.start, g)); @@ -117,7 +117,7 @@ bool handleFixedWidth(RoseBuild &rose, const NGHolder &g, const Grey &grey) { return false; } - ue2::flat_set reports; + flat_set reports; bool anchored = false; vector mask; diff --git a/src/nfagraph/ng_holder.h b/src/nfagraph/ng_holder.h index f61c476a..36cf6244 100644 --- a/src/nfagraph/ng_holder.h +++ b/src/nfagraph/ng_holder.h @@ -67,7 +67,7 @@ struct NFAGraphEdgeProps { /** \brief For graphs that will be implemented as multi-top engines, this * specifies the top events. Only used on edges from the start vertex. */ - ue2::flat_set tops; + flat_set tops; /** \brief Flags associated with assertions. */ u32 assert_flags = 0; diff --git a/src/nfagraph/ng_limex_accel.cpp b/src/nfagraph/ng_limex_accel.cpp index 80e08a7f..fa46a42c 100644 --- a/src/nfagraph/ng_limex_accel.cpp +++ b/src/nfagraph/ng_limex_accel.cpp @@ -335,7 +335,7 @@ struct DAccelScheme { return false; } - ue2::flat_set > double_byte; + flat_set> double_byte; CharReach double_cr; u32 double_offset = 0; }; diff --git a/src/nfagraph/ng_limex_accel.h b/src/nfagraph/ng_limex_accel.h index 4c3d2b91..f6f7f1b3 100644 --- a/src/nfagraph/ng_limex_accel.h +++ b/src/nfagraph/ng_limex_accel.h @@ -57,8 +57,7 @@ struct CompileContext; void findAccelFriends(const NGHolder &g, NFAVertex v, const std::map &br_cyclic, - u32 offset, - ue2::flat_set *friends); + u32 offset, flat_set *friends); #define DOUBLE_SHUFTI_LIMIT 20 diff --git a/src/nfagraph/ng_literal_decorated.cpp b/src/nfagraph/ng_literal_decorated.cpp index 1a8cafac..61a31dbf 100644 --- a/src/nfagraph/ng_literal_decorated.cpp +++ b/src/nfagraph/ng_literal_decorated.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -192,7 +192,7 @@ struct PathMask { } vector mask; - ue2::flat_set reports; + flat_set reports; bool is_anchored; bool is_eod; }; diff --git a/src/nfagraph/ng_repeat.cpp b/src/nfagraph/ng_repeat.cpp index da42b36d..7add6557 100644 --- a/src/nfagraph/ng_repeat.cpp +++ b/src/nfagraph/ng_repeat.cpp @@ -1598,7 +1598,7 @@ vector getUnionedTrigger(const NGHolder &g, const NFAVertex v) { vector trigger; - ue2::flat_set curr, next; + flat_set curr, next; insert(&curr, inv_adjacent_vertices(v, g)); if (contains(curr, g.start)) { @@ -2234,7 +2234,7 @@ void analyseRepeats(NGHolder &g, const ReportManager *rm, #ifndef NDEBUG // So we can assert that the number of tops hasn't changed at the end of // this analysis. - const ue2::flat_set allTops = getTops(g); + const flat_set allTops = getTops(g); #endif // Later on, we're (a little bit) dependent on depth information for diff --git a/src/nfagraph/ng_small_literal_set.cpp b/src/nfagraph/ng_small_literal_set.cpp index fb191efa..9c2d9ba3 100644 --- a/src/nfagraph/ng_small_literal_set.cpp +++ b/src/nfagraph/ng_small_literal_set.cpp @@ -100,7 +100,7 @@ bool operator<(const sls_literal &a, const sls_literal &b) { static bool checkLongMixedSensitivityLiterals( - const map> &literals) { + const map> &literals) { const size_t len = MAX_MASK2_WIDTH; for (const sls_literal &lit : literals | map_keys) { @@ -114,7 +114,7 @@ bool checkLongMixedSensitivityLiterals( static bool findLiterals(const NGHolder &g, - map> *literals) { + map> *literals) { vector order = getTopoOrdering(g); vector> built(num_vertices(g)); @@ -198,7 +198,7 @@ bool findLiterals(const NGHolder &g, } static -size_t min_period(const map> &literals) { +size_t min_period(const map> &literals) { size_t rv = SIZE_MAX; for (const sls_literal &lit : literals | map_keys) { @@ -229,7 +229,7 @@ bool handleSmallLiteralSets(RoseBuild &rose, const NGHolder &g, DEBUG_PRINTF("looking for literals\n"); - map> literals; + map> literals; if (!findLiterals(g, &literals)) { DEBUG_PRINTF(":(\n"); return false; diff --git a/src/nfagraph/ng_som.cpp b/src/nfagraph/ng_som.cpp index 6520a590..45917b45 100644 --- a/src/nfagraph/ng_som.cpp +++ b/src/nfagraph/ng_som.cpp @@ -267,7 +267,7 @@ bool validateEXSL(const NGHolder &g, const vector escapes_vec(1, escapes); const vector notescapes_vec(1, ~escapes); - ue2::flat_set states; + flat_set states; /* turn on all states past the prefix */ DEBUG_PRINTF("region %u is cutover\n", region); for (auto v : vertices_range(g)) { @@ -280,7 +280,7 @@ bool validateEXSL(const NGHolder &g, states = execute_graph(g, escapes_vec, states); /* flood with any number of not escapes */ - ue2::flat_set prev_states; + flat_set prev_states; while (prev_states != states) { prev_states = states; states = execute_graph(g, notescapes_vec, states); @@ -290,7 +290,7 @@ bool validateEXSL(const NGHolder &g, /* find input starts to use for when we are running the prefix through as * when the escape character arrives we may be in matching the prefix * already */ - ue2::flat_set prefix_start_states; + flat_set prefix_start_states; for (auto v : vertices_range(prefix)) { if (v != prefix.accept && v != prefix.acceptEod /* and as we have already made it past the prefix once */ diff --git a/src/nfagraph/ng_util.cpp b/src/nfagraph/ng_util.cpp index 83b29257..59c73498 100644 --- a/src/nfagraph/ng_util.cpp +++ b/src/nfagraph/ng_util.cpp @@ -450,8 +450,8 @@ void appendLiteral(NGHolder &h, const ue2_literal &s) { } } -ue2::flat_set getTops(const NGHolder &h) { - ue2::flat_set tops; +flat_set getTops(const NGHolder &h) { + flat_set tops; for (const auto &e : out_edges_range(h.start, h)) { insert(&tops, h[e].tops); } diff --git a/src/parser/Parser.rl b/src/parser/Parser.rl index 43dfc760..8643aebf 100644 --- a/src/parser/Parser.rl +++ b/src/parser/Parser.rl @@ -1950,7 +1950,7 @@ unique_ptr parse(const char *ptr, ParseMode &globalMode) { unsigned groupIndex = 1; // Set storing group names that are currently in use. - ue2::flat_set groupNames; + flat_set groupNames; // Root sequence. unique_ptr rootSeq = ue2::make_unique(); diff --git a/src/parser/check_refs.cpp b/src/parser/check_refs.cpp index 0badc780..60b5b6ba 100644 --- a/src/parser/check_refs.cpp +++ b/src/parser/check_refs.cpp @@ -114,7 +114,7 @@ public: ReferenceVisitor::~ReferenceVisitor() {} void checkReferences(const Component &root, unsigned int groupIndices, - const ue2::flat_set &groupNames) { + const flat_set &groupNames) { ReferenceVisitor vis(groupIndices, groupNames); root.accept(vis); } diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index f36fa576..b003336a 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -121,7 +121,7 @@ RoseVertex createVertex(RoseBuildImpl *build, u32 literalId, u32 min_offset, RoseVertex createVertex(RoseBuildImpl *build, const RoseVertex parent, u32 minBound, u32 maxBound, u32 literalId, size_t literalLength, - const ue2::flat_set &reports) { + const flat_set &reports) { assert(parent != RoseGraph::null_vertex()); RoseGraph &g = build->g; @@ -1641,7 +1641,7 @@ bool roseCheckRose(const RoseInGraph &ig, bool prefilter, } void RoseBuildImpl::add(bool anchored, bool eod, const ue2_literal &lit, - const ue2::flat_set &reports) { + const flat_set &reports) { assert(!reports.empty()); if (cc.grey.floodAsPuffette && !anchored && !eod && is_flood(lit) && diff --git a/src/rose/rose_build_add_internal.h b/src/rose/rose_build_add_internal.h index 569485a4..143f1dfa 100644 --- a/src/rose/rose_build_add_internal.h +++ b/src/rose/rose_build_add_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,8 +31,7 @@ #include "rose_graph.h" #include "ue2common.h" - -#include +#include "util/flat_containers.h" namespace ue2 { @@ -41,8 +40,8 @@ class RoseBuildImpl; RoseVertex createVertex(RoseBuildImpl *build, const RoseVertex parent, u32 minBound, u32 maxBound, u32 literalId, size_t literalLength, - const ue2::flat_set &reports); + const flat_set &reports); } // namespace ue2 -#endif +#endif // ROSE_BUILD_ADD_INTERNAL_H diff --git a/src/rose/rose_build_add_mask.cpp b/src/rose/rose_build_add_mask.cpp index bd8eed0c..c60c053e 100644 --- a/src/rose/rose_build_add_mask.cpp +++ b/src/rose/rose_build_add_mask.cpp @@ -414,8 +414,8 @@ bool validateTransientMask(const vector &mask, bool anchored, static bool maskIsNeeded(const ue2_literal &lit, const NGHolder &g) { - ue2::flat_set curr = {g.accept}; - ue2::flat_set next; + flat_set curr = {g.accept}; + flat_set next; for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) { const CharReach &cr = *it; @@ -451,7 +451,7 @@ bool maskIsNeeded(const ue2_literal &lit, const NGHolder &g) { static void addTransientMask(RoseBuildImpl &build, const vector &mask, - const ue2::flat_set &reports, bool anchored, + const flat_set &reports, bool anchored, bool eod) { vector lits; u32 lit_minBound; /* minBound of each literal in lit */ @@ -516,7 +516,7 @@ void addTransientMask(RoseBuildImpl &build, const vector &mask, ENSURE_AT_LEAST(&build.ematcher_region_size, mask.size()); } - const ue2::flat_set no_reports; + const flat_set no_reports; for (const auto &lit : lits) { u32 lit_id = build.getLiteralId(lit, msk, cmp, delay, table); @@ -553,7 +553,7 @@ void addTransientMask(RoseBuildImpl &build, const vector &mask, } static -unique_ptr buildMaskRhs(const ue2::flat_set &reports, +unique_ptr buildMaskRhs(const flat_set &reports, const vector &mask, u32 suffix_len) { assert(suffix_len); @@ -581,10 +581,9 @@ unique_ptr buildMaskRhs(const ue2::flat_set &reports, } static -void doAddMask(RoseBuildImpl &tbi, bool anchored, - const vector &mask, const ue2_literal &lit, - u32 prefix_len, u32 suffix_len, - const ue2::flat_set &reports) { +void doAddMask(RoseBuildImpl &tbi, bool anchored, const vector &mask, + const ue2_literal &lit, u32 prefix_len, u32 suffix_len, + const flat_set &reports) { /* Note: bounds are relative to literal start */ RoseInGraph ig; RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(anchored), ig); @@ -711,7 +710,7 @@ bool checkAllowMask(const vector &mask, ue2_literal *lit, } bool RoseBuildImpl::add(bool anchored, const vector &mask, - const ue2::flat_set &reports) { + const flat_set &reports) { if (validateTransientMask(mask, anchored, false, cc.grey)) { bool eod = false; addTransientMask(*this, mask, reports, anchored, eod); @@ -734,14 +733,14 @@ bool RoseBuildImpl::add(bool anchored, const vector &mask, } bool RoseBuildImpl::validateMask(const vector &mask, - UNUSED const ue2::flat_set &reports, + UNUSED const flat_set &reports, bool anchored, bool eod) const { return validateTransientMask(mask, anchored, eod, cc.grey); } static unique_ptr makeAnchoredGraph(const vector &mask, - const ue2::flat_set &reports, + const flat_set &reports, bool eod) { auto gp = ue2::make_unique(); NGHolder &g = *gp; @@ -763,7 +762,7 @@ unique_ptr makeAnchoredGraph(const vector &mask, static bool addAnchoredMask(RoseBuildImpl &build, const vector &mask, - const ue2::flat_set &reports, bool eod) { + const flat_set &reports, bool eod) { if (!build.cc.grey.allowAnchoredAcyclic) { return false; } @@ -775,8 +774,8 @@ bool addAnchoredMask(RoseBuildImpl &build, const vector &mask, } void RoseBuildImpl::addMask(const vector &mask, - const ue2::flat_set &reports, - bool anchored, bool eod) { + const flat_set &reports, bool anchored, + bool eod) { if (anchored && addAnchoredMask(*this, mask, reports, eod)) { DEBUG_PRINTF("added mask as anchored acyclic graph\n"); return; diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index e24d0b2e..1cf3bbe6 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -1087,13 +1087,13 @@ bool triggerKillsRoseGraph(const RoseBuildImpl &build, const left_id &left, assert(left.graph()); const NGHolder &h = *left.graph(); - ue2::flat_set all_states; + flat_set all_states; insert(&all_states, vertices(h)); assert(out_degree(h.startDs, h) == 1); /* triggered don't use sds */ DEBUG_PRINTF("removing sds\n"); all_states.erase(h.startDs); - ue2::flat_set states; + flat_set states; /* check each pred literal to see if they all kill previous graph * state */ diff --git a/src/rose/rose_build_dedupe.cpp b/src/rose/rose_build_dedupe.cpp index d3e72313..04144f56 100644 --- a/src/rose/rose_build_dedupe.cpp +++ b/src/rose/rose_build_dedupe.cpp @@ -39,7 +39,7 @@ using namespace std; namespace ue2 { static -bool requiresDedupe(const NGHolder &h, const ue2::flat_set &reports, +bool requiresDedupe(const NGHolder &h, const flat_set &reports, const Grey &grey) { /* TODO: tighten */ NFAVertex seen_vert = NGHolder::null_vertex(); @@ -83,10 +83,10 @@ class RoseDedupeAuxImpl : public RoseDedupeAux { public: explicit RoseDedupeAuxImpl(const RoseBuildImpl &build_in); bool requiresDedupeSupport( - const ue2::flat_set &reports) const override; + const flat_set &reports) const override; private: - bool hasSafeMultiReports(const ue2::flat_set &reports) const; + bool hasSafeMultiReports(const flat_set &reports) const; const RoseBuildImpl &build; map> vert_map; //!< ordinary literals diff --git a/src/rose/rose_build_exclusive.cpp b/src/rose/rose_build_exclusive.cpp index 25585ec0..4c8796f5 100644 --- a/src/rose/rose_build_exclusive.cpp +++ b/src/rose/rose_build_exclusive.cpp @@ -26,9 +26,9 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "ue2common.h" - #include "rose_build_exclusive.h" + +#include "ue2common.h" #include "rose_build_merge.h" #include "nfa/castlecompile.h" #include "nfagraph/ng_execute.h" @@ -37,6 +37,7 @@ #include "util/clique.h" #include "util/compile_context.h" #include "util/container.h" +#include "util/flat_containers.h" #include "util/graph.h" #include "util/make_unique.h" @@ -228,7 +229,7 @@ bool isExclusive(const NGHolder &h, lower_bound = ~0U; } - ue2::flat_set states; + flat_set states; for (const auto &v : vertices_range(h)) { if (h[v].index >= lower_bound || h[v].index < 2) { states.insert(v); diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index 15a1ae78..5d4d46e4 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -1738,7 +1738,7 @@ void dedupeLeftfixesVariableLag(RoseBuildImpl &tbi) { } static -u32 findUnusedTop(const ue2::flat_set &tops) { +u32 findUnusedTop(const flat_set &tops) { u32 i = 0; while (contains(tops, i)) { i++; @@ -1766,7 +1766,7 @@ void replaceTops(NGHolder &h, const map &top_mapping) { static bool setDistinctTops(NGHolder &h1, const NGHolder &h2, map &top_mapping) { - ue2::flat_set tops1 = getTops(h1), tops2 = getTops(h2); + flat_set tops1 = getTops(h1), tops2 = getTops(h2); DEBUG_PRINTF("before: h1 has %zu tops, h2 has %zu tops\n", tops1.size(), tops2.size()); diff --git a/src/util/graph.h b/src/util/graph.h index 9d6bb955..660afd02 100644 --- a/src/util/graph.h +++ b/src/util/graph.h @@ -146,7 +146,7 @@ void find_unreachable(const Graph &g, const SourceCont &sources, OutCont *out) { } template -ue2::flat_set +flat_set find_vertices_in_cycles(const Graph &g) { using vertex_descriptor = typename Graph::vertex_descriptor; @@ -160,7 +160,7 @@ find_vertices_in_cycles(const Graph &g) { comps[e.second].push_back(e.first); } - ue2::flat_set rv; + flat_set rv; for (const auto &comp : comps | boost::adaptors::map_values) { /* every vertex in a strongly connected component is reachable from diff --git a/src/util/partitioned_set.h b/src/util/partitioned_set.h index 41710fe7..313c08e1 100644 --- a/src/util/partitioned_set.h +++ b/src/util/partitioned_set.h @@ -98,8 +98,7 @@ public: * If the set was not split (due to there being no overlap with splitter or * being a complete subset), INVALID_SUBSET is returned. */ - size_t split(size_t subset_index, - const typename ue2::flat_set &splitter) { + size_t split(size_t subset_index, const flat_set &splitter) { assert(!splitter.empty()); if (splitter.empty()) { return INVALID_SUBSET; @@ -193,7 +192,7 @@ public: /** * Returns all subsets which have a member in keys. */ - void find_overlapping(const typename ue2::flat_set &keys, + void find_overlapping(const flat_set &keys, std::vector *containing) const { boost::dynamic_bitset<> seen(subsets.size()); // all zero by default. diff --git a/src/util/report_manager.cpp b/src/util/report_manager.cpp index a846eb25..c0e9ee15 100644 --- a/src/util/report_manager.cpp +++ b/src/util/report_manager.cpp @@ -133,7 +133,7 @@ vector ReportManager::getDkeyToReportTable() const { void ReportManager::assignDkeys(const RoseBuild *rose) { DEBUG_PRINTF("assigning...\n"); - map> ext_to_int; + map> ext_to_int; for (u32 i = 0; i < reportIds.size(); i++) { const Report &ir = reportIds[i]; From 4528485a560d83a5d8d89bfd7c5d8d7b5db4f45d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 21 Jul 2017 16:43:16 +1000 Subject: [PATCH 102/190] determinise: use find first, rather than emplace For non-trivial StateSet types, copying to do the emplace if it is already in the map is more expensive than checking with find() first. --- src/util/determinise.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/util/determinise.h b/src/util/determinise.h index eb56d970..102a1974 100644 --- a/src/util/determinise.h +++ b/src/util/determinise.h @@ -139,14 +139,16 @@ bool determinise(Auto &n, std::vector &dstates, size_t state_limit, if (s && succs[s] == succs[s - 1]) { succ_id = dstates[curr_id].next[s - 1]; } else { - auto p = dstate_ids.emplace(succs[s], dstates.size()); - succ_id = p.first->second; - if (!p.second) { /* succs[s] is already present */ + auto p = dstate_ids.find(succs[s]); + if (p != dstate_ids.end()) { // succ[s] is already present + succ_id = p->second; if (succ_id > curr_id && !dstates[succ_id].daddy && n.unalpha[s] < N_CHARS) { dstates[succ_id].daddy = curr_id; } } else { + succ_id = dstate_ids.size(); + dstate_ids.emplace(succs[s], succ_id); dstates.push_back(ds(alphabet_size)); dstates.back().daddy = n.unalpha[s] < N_CHARS ? curr_id : 0; q.emplace(succs[s], succ_id); From 8da2d13baae229bb900428386af1a3dcfd12206a Mon Sep 17 00:00:00 2001 From: "Chang, Harry" Date: Thu, 13 Jul 2017 14:38:06 +0800 Subject: [PATCH 103/190] AVX512 Reinforced FAT teddy. --- src/fdr/teddy.c | 6 +- src/fdr/teddy_avx2.c | 298 +++++++++++++++++++++++++++++++++++++- src/fdr/teddy_compile.cpp | 70 +++++---- src/util/simd_utils.h | 29 +++- 4 files changed, 366 insertions(+), 37 deletions(-) diff --git a/src/fdr/teddy.c b/src/fdr/teddy.c index db68749a..0b3fe28f 100644 --- a/src/fdr/teddy.c +++ b/src/fdr/teddy.c @@ -298,7 +298,7 @@ do { \ const u8 *ptr = a->buf + a->start_offset; \ u32 floodBackoff = FLOOD_BACKOFF_START; \ const u8 *tryFloodDetect = a->firstFloodDetect; \ - u32 last_match = (u32)-1; \ + u32 last_match = ones_u32; \ const struct Teddy *teddy = (const struct Teddy *)fdr; \ const size_t iterBytes = 128; \ DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", \ @@ -533,7 +533,7 @@ do { \ const u8 *ptr = a->buf + a->start_offset; \ u32 floodBackoff = FLOOD_BACKOFF_START; \ const u8 *tryFloodDetect = a->firstFloodDetect; \ - u32 last_match = (u32)-1; \ + u32 last_match = ones_u32; \ const struct Teddy *teddy = (const struct Teddy *)fdr; \ const size_t iterBytes = 64; \ DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", \ @@ -712,7 +712,7 @@ do { \ const u8 *ptr = a->buf + a->start_offset; \ u32 floodBackoff = FLOOD_BACKOFF_START; \ const u8 *tryFloodDetect = a->firstFloodDetect; \ - u32 last_match = (u32)-1; \ + u32 last_match = ones_u32; \ const struct Teddy *teddy = (const struct Teddy *)fdr; \ const size_t iterBytes = 32; \ DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", \ diff --git a/src/fdr/teddy_avx2.c b/src/fdr/teddy_avx2.c index 1d037028..8f98344c 100644 --- a/src/fdr/teddy_avx2.c +++ b/src/fdr/teddy_avx2.c @@ -134,6 +134,300 @@ const m256 *getMaskBase_avx2(const struct Teddy *teddy) { return (const m256 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy))); } +#if defined(HAVE_AVX512) + +static really_inline +const u64a *getReinforcedMaskBase_avx2(const struct Teddy *teddy, u8 numMask) { + return (const u64a *)((const u8 *)getMaskBase_avx2(teddy) + + ROUNDUP_CL(2 * numMask * sizeof(m256))); +} + +#ifdef ARCH_64_BIT +#define CONFIRM_FAT_TEDDY(var, bucket, offset, reason, conf_fn) \ +do { \ + if (unlikely(diff512(var, ones512()))) { \ + m512 swap = swap256in512(var); \ + m512 r = interleave512lo(var, swap); \ + m128 r0 = extract128from512(r, 0); \ + m128 r1 = extract128from512(r, 1); \ + u64a part1 = movq(r0); \ + u64a part2 = extract64from128(r0, 1); \ + u64a part5 = movq(r1); \ + u64a part6 = extract64from128(r1, 1); \ + r = interleave512hi(var, swap); \ + r0 = extract128from512(r, 0); \ + r1 = extract128from512(r, 1); \ + u64a part3 = movq(r0); \ + u64a part4 = extract64from128(r0, 1); \ + u64a part7 = movq(r1); \ + u64a part8 = extract64from128(r1, 1); \ + CONF_FAT_CHUNK_64(part1, bucket, offset, reason, conf_fn); \ + CONF_FAT_CHUNK_64(part2, bucket, offset + 4, reason, conf_fn); \ + CONF_FAT_CHUNK_64(part3, bucket, offset + 8, reason, conf_fn); \ + CONF_FAT_CHUNK_64(part4, bucket, offset + 12, reason, conf_fn); \ + CONF_FAT_CHUNK_64(part5, bucket, offset + 16, reason, conf_fn); \ + CONF_FAT_CHUNK_64(part6, bucket, offset + 20, reason, conf_fn); \ + CONF_FAT_CHUNK_64(part7, bucket, offset + 24, reason, conf_fn); \ + CONF_FAT_CHUNK_64(part8, bucket, offset + 28, reason, conf_fn); \ + } \ +} while(0) +#else +#define CONFIRM_FAT_TEDDY(var, bucket, offset, reason, conf_fn) \ +do { \ + if (unlikely(diff512(var, ones512()))) { \ + m512 swap = swap256in512(var); \ + m512 r = interleave512lo(var, swap); \ + m128 r0 = extract128from512(r, 0); \ + m128 r1 = extract128from512(r, 1); \ + u32 part1 = movd(r0); \ + u32 part2 = extract32from128(r0, 1); \ + u32 part3 = extract32from128(r0, 2); \ + u32 part4 = extract32from128(r0, 3); \ + u32 part9 = movd(r1); \ + u32 part10 = extract32from128(r1, 1); \ + u32 part11 = extract32from128(r1, 2); \ + u32 part12 = extract32from128(r1, 3); \ + r = interleave512hi(var, swap); \ + r0 = extract128from512(r, 0); \ + r1 = extract128from512(r, 1); \ + u32 part5 = movd(r0); \ + u32 part6 = extract32from128(r0, 1); \ + u32 part7 = extract32from128(r0, 2); \ + u32 part8 = extract32from128(r0, 3); \ + u32 part13 = movd(r1); \ + u32 part14 = extract32from128(r1, 1); \ + u32 part15 = extract32from128(r1, 2); \ + u32 part16 = extract32from128(r1, 3); \ + CONF_FAT_CHUNK_32(part1, bucket, offset, reason, conf_fn); \ + CONF_FAT_CHUNK_32(part2, bucket, offset + 2, reason, conf_fn); \ + CONF_FAT_CHUNK_32(part3, bucket, offset + 4, reason, conf_fn); \ + CONF_FAT_CHUNK_32(part4, bucket, offset + 6, reason, conf_fn); \ + CONF_FAT_CHUNK_32(part5, bucket, offset + 8, reason, conf_fn); \ + CONF_FAT_CHUNK_32(part6, bucket, offset + 10, reason, conf_fn); \ + CONF_FAT_CHUNK_32(part7, bucket, offset + 12, reason, conf_fn); \ + CONF_FAT_CHUNK_32(part8, bucket, offset + 14, reason, conf_fn); \ + CONF_FAT_CHUNK_32(part9, bucket, offset + 16, reason, conf_fn); \ + CONF_FAT_CHUNK_32(part10, bucket, offset + 18, reason, conf_fn); \ + CONF_FAT_CHUNK_32(part11, bucket, offset + 20, reason, conf_fn); \ + CONF_FAT_CHUNK_32(part12, bucket, offset + 22, reason, conf_fn); \ + CONF_FAT_CHUNK_32(part13, bucket, offset + 24, reason, conf_fn); \ + CONF_FAT_CHUNK_32(part14, bucket, offset + 26, reason, conf_fn); \ + CONF_FAT_CHUNK_32(part15, bucket, offset + 28, reason, conf_fn); \ + CONF_FAT_CHUNK_32(part16, bucket, offset + 30, reason, conf_fn); \ + } \ +} while(0) +#endif + +static really_inline +m512 vectoredLoad2x256(m512 *p_mask, const u8 *ptr, const size_t start_offset, + const u8 *lo, const u8 *hi, + const u8 *buf_history, size_t len_history, + const u32 nMasks) { + m256 p_mask256; + m512 ret = set2x256(vectoredLoad256(&p_mask256, ptr, start_offset, lo, hi, + buf_history, len_history, nMasks)); + *p_mask = set2x256(p_mask256); + return ret; +} + +#define PREP_FAT_SHUF_MASK_NO_REINFORCEMENT(val) \ + m512 lo = and512(val, *lo_mask); \ + m512 hi = and512(rshift64_m512(val, 4), *lo_mask) + +#define PREP_FAT_SHUF_MASK \ + PREP_FAT_SHUF_MASK_NO_REINFORCEMENT(set2x256(load256(ptr))); \ + *c_16 = *(ptr + 15); \ + m512 r_msk = set512_64(0ULL, r_msk_base_hi[*c_16], \ + 0ULL, r_msk_base_hi[*c_0], \ + 0ULL, r_msk_base_lo[*c_16], \ + 0ULL, r_msk_base_lo[*c_0]); \ + *c_0 = *(ptr + 31) + +#define FAT_SHIFT_OR_M1 \ + or512(pshufb_m512(dup_mask[0], lo), pshufb_m512(dup_mask[1], hi)) + +#define FAT_SHIFT_OR_M2 \ + or512(lshift128_m512(or512(pshufb_m512(dup_mask[2], lo), \ + pshufb_m512(dup_mask[3], hi)), \ + 1), FAT_SHIFT_OR_M1) + +#define FAT_SHIFT_OR_M3 \ + or512(lshift128_m512(or512(pshufb_m512(dup_mask[4], lo), \ + pshufb_m512(dup_mask[5], hi)), \ + 2), FAT_SHIFT_OR_M2) + +#define FAT_SHIFT_OR_M4 \ + or512(lshift128_m512(or512(pshufb_m512(dup_mask[6], lo), \ + pshufb_m512(dup_mask[7], hi)), \ + 3), FAT_SHIFT_OR_M3) + +static really_inline +m512 prep_conf_fat_teddy_no_reinforcement_m1(const m512 *lo_mask, + const m512 *dup_mask, + const m512 val) { + PREP_FAT_SHUF_MASK_NO_REINFORCEMENT(val); + return FAT_SHIFT_OR_M1; +} + +static really_inline +m512 prep_conf_fat_teddy_no_reinforcement_m2(const m512 *lo_mask, + const m512 *dup_mask, + const m512 val) { + PREP_FAT_SHUF_MASK_NO_REINFORCEMENT(val); + return FAT_SHIFT_OR_M2; +} + +static really_inline +m512 prep_conf_fat_teddy_no_reinforcement_m3(const m512 *lo_mask, + const m512 *dup_mask, + const m512 val) { + PREP_FAT_SHUF_MASK_NO_REINFORCEMENT(val); + return FAT_SHIFT_OR_M3; +} + +static really_inline +m512 prep_conf_fat_teddy_no_reinforcement_m4(const m512 *lo_mask, + const m512 *dup_mask, + const m512 val) { + PREP_FAT_SHUF_MASK_NO_REINFORCEMENT(val); + return FAT_SHIFT_OR_M4; +} + +static really_inline +m512 prep_conf_fat_teddy_m1(const m512 *lo_mask, const m512 *dup_mask, + const u8 *ptr, const u64a *r_msk_base_lo, + const u64a *r_msk_base_hi, u32 *c_0, u32 *c_16) { + PREP_FAT_SHUF_MASK; + return or512(FAT_SHIFT_OR_M1, r_msk); +} + +static really_inline +m512 prep_conf_fat_teddy_m2(const m512 *lo_mask, const m512 *dup_mask, + const u8 *ptr, const u64a *r_msk_base_lo, + const u64a *r_msk_base_hi, u32 *c_0, u32 *c_16) { + PREP_FAT_SHUF_MASK; + return or512(FAT_SHIFT_OR_M2, r_msk); +} + +static really_inline +m512 prep_conf_fat_teddy_m3(const m512 *lo_mask, const m512 *dup_mask, + const u8 *ptr, const u64a *r_msk_base_lo, + const u64a *r_msk_base_hi, u32 *c_0, u32 *c_16) { + PREP_FAT_SHUF_MASK; + return or512(FAT_SHIFT_OR_M3, r_msk); +} + +static really_inline +m512 prep_conf_fat_teddy_m4(const m512 *lo_mask, const m512 *dup_mask, + const u8 *ptr, const u64a *r_msk_base_lo, + const u64a *r_msk_base_hi, u32 *c_0, u32 *c_16) { + PREP_FAT_SHUF_MASK; + return or512(FAT_SHIFT_OR_M4, r_msk); +} + +#define PREP_CONF_FAT_FN_NO_REINFORCEMENT(val, n) \ + prep_conf_fat_teddy_no_reinforcement_m##n(&lo_mask, dup_mask, val) + +#define PREP_CONF_FAT_FN(ptr, n) \ + prep_conf_fat_teddy_m##n(&lo_mask, dup_mask, ptr, \ + r_msk_base_lo, r_msk_base_hi, &c_0, &c_16) + +#define DUP_FAT_MASK(a) mask_set2x256(set2x256(swap128in256(a)), 0xC3, a) + +#define PREPARE_FAT_MASKS_1 \ + dup_mask[0] = DUP_FAT_MASK(maskBase[0]); \ + dup_mask[1] = DUP_FAT_MASK(maskBase[1]); + +#define PREPARE_FAT_MASKS_2 \ + PREPARE_FAT_MASKS_1 \ + dup_mask[2] = DUP_FAT_MASK(maskBase[2]); \ + dup_mask[3] = DUP_FAT_MASK(maskBase[3]); + +#define PREPARE_FAT_MASKS_3 \ + PREPARE_FAT_MASKS_2 \ + dup_mask[4] = DUP_FAT_MASK(maskBase[4]); \ + dup_mask[5] = DUP_FAT_MASK(maskBase[5]); + +#define PREPARE_FAT_MASKS_4 \ + PREPARE_FAT_MASKS_3 \ + dup_mask[6] = DUP_FAT_MASK(maskBase[6]); \ + dup_mask[7] = DUP_FAT_MASK(maskBase[7]); + +#define PREPARE_FAT_MASKS(n) \ + m512 lo_mask = set64x8(0xf); \ + m512 dup_mask[n * 2]; \ + PREPARE_FAT_MASKS_##n + +#define FDR_EXEC_FAT_TEDDY(fdr, a, control, n_msk, conf_fn) \ +do { \ + const u8 *buf_end = a->buf + a->len; \ + const u8 *ptr = a->buf + a->start_offset; \ + u32 floodBackoff = FLOOD_BACKOFF_START; \ + const u8 *tryFloodDetect = a->firstFloodDetect; \ + u32 last_match = ones_u32; \ + const struct Teddy *teddy = (const struct Teddy *)fdr; \ + const size_t iterBytes = 64; \ + DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", \ + a->buf, a->len, a->start_offset); \ + \ + const m256 *maskBase = getMaskBase_avx2(teddy); \ + PREPARE_FAT_MASKS(n_msk); \ + const u32 *confBase = getConfBase(teddy); \ + \ + const u64a *r_msk_base_lo = getReinforcedMaskBase_avx2(teddy, n_msk); \ + const u64a *r_msk_base_hi = r_msk_base_lo + (N_CHARS + 1); \ + u32 c_0 = 0x100; \ + u32 c_16 = 0x100; \ + const u8 *mainStart = ROUNDUP_PTR(ptr, 32); \ + DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); \ + if (ptr < mainStart) { \ + ptr = mainStart - 32; \ + m512 p_mask; \ + m512 val_0 = vectoredLoad2x256(&p_mask, ptr, a->start_offset, \ + a->buf, buf_end, \ + a->buf_history, a->len_history, n_msk); \ + m512 r_0 = PREP_CONF_FAT_FN_NO_REINFORCEMENT(val_0, n_msk); \ + r_0 = or512(r_0, p_mask); \ + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, conf_fn); \ + ptr += 32; \ + } \ + \ + if (ptr + 32 <= buf_end) { \ + m512 r_0 = PREP_CONF_FAT_FN(ptr, n_msk); \ + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, conf_fn); \ + ptr += 32; \ + } \ + \ + for (; ptr + iterBytes <= buf_end; ptr += iterBytes) { \ + __builtin_prefetch(ptr + (iterBytes * 4)); \ + CHECK_FLOOD; \ + m512 r_0 = PREP_CONF_FAT_FN(ptr, n_msk); \ + CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, conf_fn); \ + m512 r_1 = PREP_CONF_FAT_FN(ptr + 32, n_msk); \ + CONFIRM_FAT_TEDDY(r_1, 16, 32, NOT_CAUTIOUS, conf_fn); \ + } \ + \ + if (ptr + 32 <= buf_end) { \ + m512 r_0 = PREP_CONF_FAT_FN(ptr, n_msk); \ + CONFIRM_FAT_TEDDY(r_0, 16, 0, NOT_CAUTIOUS, conf_fn); \ + ptr += 32; \ + } \ + \ + assert(ptr + 32 > buf_end); \ + if (ptr < buf_end) { \ + m512 p_mask; \ + m512 val_0 = vectoredLoad2x256(&p_mask, ptr, 0, ptr, buf_end, \ + a->buf_history, a->len_history, n_msk); \ + m512 r_0 = PREP_CONF_FAT_FN_NO_REINFORCEMENT(val_0, n_msk); \ + r_0 = or512(r_0, p_mask); \ + CONFIRM_FAT_TEDDY(r_0, 16, 0, VECTORING, conf_fn); \ + } \ + \ + return HWLM_SUCCESS; \ +} while(0) + +#else // HAVE_AVX512 + #ifdef ARCH_64_BIT #define CONFIRM_FAT_TEDDY(var, bucket, offset, reason, conf_fn) \ do { \ @@ -282,7 +576,7 @@ do { \ const u8 *ptr = a->buf + a->start_offset; \ u32 floodBackoff = FLOOD_BACKOFF_START; \ const u8 *tryFloodDetect = a->firstFloodDetect; \ - u32 last_match = (u32)-1; \ + u32 last_match = ones_u32; \ const struct Teddy *teddy = (const struct Teddy *)fdr; \ const size_t iterBytes = 32; \ DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", \ @@ -342,6 +636,8 @@ do { \ return HWLM_SUCCESS; \ } while(0) +#endif // HAVE_AVX512 + hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr, const struct FDR_Runtime_Args *a, hwlm_group_t control) { diff --git a/src/fdr/teddy_compile.cpp b/src/fdr/teddy_compile.cpp index 98736134..8b8a6420 100644 --- a/src/fdr/teddy_compile.cpp +++ b/src/fdr/teddy_compile.cpp @@ -325,44 +325,56 @@ bool pack(const vector &lits, #define REINFORCED_MSK_LEN 8 static -void initReinforcedTable(u8 *reinforcedMsk) { - u64a *mask = (u64a *)reinforcedMsk; - fill_n(mask, N_CHARS, 0x00ffffffffffffffULL); +void initReinforcedTable(u8 *rmsk, const size_t rmsklen, + const u32 maskWidth) { + for (u32 b = 0; b < maskWidth; b++) { + u64a *mask = (u64a *)(rmsk + b * (rmsklen / maskWidth)); + fill_n(mask, N_CHARS, 0x00ffffffffffffffULL); + } } static -void fillReinforcedMskZero(u8 *reinforcedMsk) { - u8 *mc = reinforcedMsk + NO_REINFORCEMENT * REINFORCED_MSK_LEN; - fill_n(mc, REINFORCED_MSK_LEN, 0x00); +void fillReinforcedMskZero(u8 *rmsk, const size_t rmsklen, + const u32 maskWidth) { + for (u32 b = 0; b < maskWidth; b++) { + u8 *mc = rmsk + b * (rmsklen / maskWidth) + + NO_REINFORCEMENT * REINFORCED_MSK_LEN; + fill_n(mc, REINFORCED_MSK_LEN, 0x00); + } } static -void fillReinforcedMsk(u8 *reinforcedMsk, u16 c, u32 j, u8 bmsk) { +void fillReinforcedMsk(u8 *rmsk, u32 boff, u16 c, u32 j, u8 bmsk) { assert(j > 0); if (c == ALL_CHAR_SET) { for (size_t i = 0; i < N_CHARS; i++) { - u8 *mc = reinforcedMsk + i * REINFORCED_MSK_LEN; + u8 *mc = rmsk + boff + i * REINFORCED_MSK_LEN; mc[j - 1] &= ~bmsk; } } else { - u8 *mc = reinforcedMsk + c * REINFORCED_MSK_LEN; + u8 *mc = rmsk + boff + c * REINFORCED_MSK_LEN; mc[j - 1] &= ~bmsk; } } #ifdef TEDDY_DEBUG static -void dumpReinforcedMaskTable(const u8 *msks) { - for (u32 i = 0; i <= N_CHARS; i++) { - printf("0x%02x: ", i); - for (u32 j = 0; j < REINFORCED_MSK_LEN; j++) { - u8 val = msks[i * REINFORCED_MSK_LEN + j]; - for (u32 k = 0; k < 8; k++) { - printf("%s", ((val >> k) & 0x1) ? "1" : "0"); +void dumpReinforcedMaskTable(const u8 *rmsk, const size_t rmsklen, + const u32 maskWidth) { + for (u32 b = 0; b < maskWidth; b++) { + printf("reinforcement table for bucket %u..%u:\n", b * 8, b * 8 + 7); + for (u32 i = 0; i <= N_CHARS; i++) { + printf("0x%02x: ", i); + for (u32 j = 0; j < REINFORCED_MSK_LEN; j++) { + u8 val = rmsk[b * (rmsklen / maskWidth) + + i * REINFORCED_MSK_LEN + j]; + for (u32 k = 0; k < 8; k++) { + printf("%s", ((val >> k) & 0x1) ? "1" : "0"); + } + printf(" "); } - printf(" "); + printf("\n"); } - printf("\n"); } } #endif @@ -443,12 +455,13 @@ static void fillReinforcedTable(const map> &bucketToLits, const vector &lits, - u8 *reinforcedMsk) { - initReinforcedTable(reinforcedMsk); + u8 *rmsk, const size_t rmsklen, const u32 maskWidth) { + initReinforcedTable(rmsk, rmsklen, maskWidth); for (const auto &b2l : bucketToLits) { const u32 &bucket_id = b2l.first; const vector &ids = b2l.second; + const u32 boff = (bucket_id / 8) * (rmsklen / maskWidth); const u8 bmsk = 1U << (bucket_id % 8); for (const LiteralIndex &lit_id : ids) { @@ -459,23 +472,23 @@ void fillReinforcedTable(const map TeddyCompiler::build() { @@ -483,7 +496,7 @@ bytecode_ptr TeddyCompiler::build() { size_t headerSize = sizeof(Teddy); size_t maskLen = eng.numMasks * 16 * 2 * maskWidth; - size_t reinforcedMaskLen = (N_CHARS + 1) * REINFORCED_MSK_LEN; + size_t reinforcedMaskLen = (N_CHARS + 1) * REINFORCED_MSK_LEN * maskWidth; auto floodTable = setupFDRFloodControl(lits, eng, grey); auto confirmTable = setupFullConfs(lits, eng, bucketToLits, make_small); @@ -525,7 +538,8 @@ bytecode_ptr TeddyCompiler::build() { // Write reinforcement masks. u8 *reinforcedMsk = baseMsk + ROUNDUP_CL(maskLen); - fillReinforcedTable(bucketToLits, lits, reinforcedMsk); + fillReinforcedTable(bucketToLits, lits, reinforcedMsk, + reinforcedMaskLen, maskWidth); #ifdef TEDDY_DEBUG for (u32 i = 0; i < eng.numMasks * 2; i++) { @@ -541,7 +555,7 @@ bytecode_ptr TeddyCompiler::build() { printf("\n===============================================\n" "reinforced mask table for low boundary (original)\n\n"); - dumpReinforcedMaskTable(reinforcedMsk); + dumpReinforcedMaskTable(reinforcedMsk, reinforcedMaskLen, maskWidth); #endif return fdr; diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index 8c469d16..c1449711 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -169,16 +169,24 @@ m128 load_m128_from_u64a(const u64a *p) { #define rshiftbyte_m128(a, count_immed) _mm_srli_si128(a, count_immed) #define lshiftbyte_m128(a, count_immed) _mm_slli_si128(a, count_immed) +#if defined(HAVE_SSE41) +#define extract32from128(a, imm) _mm_extract_epi32(a, imm) +#define extract64from128(a, imm) _mm_extract_epi64(a, imm) +#else +#define extract32from128(a, imm) movd(_mm_srli_si128(a, imm << 2)) +#define extract64from128(a, imm) movq(_mm_srli_si128(a, imm << 3)) +#endif + #if !defined(HAVE_AVX2) // TODO: this entire file needs restructuring - this carveout is awful #define extractlow64from256(a) movq(a.lo) #define extractlow32from256(a) movd(a.lo) #if defined(HAVE_SSE41) #define extract32from256(a, imm) _mm_extract_epi32((imm >> 2) ? a.hi : a.lo, imm % 4) -#define extract64from256(a, imm) _mm_extract_epi64((imm >> 2) ? a.hi : a.lo, imm % 2) +#define extract64from256(a, imm) _mm_extract_epi64((imm >> 1) ? a.hi : a.lo, imm % 2) #else -#define extract32from256(a, imm) movd(_mm_srli_si128((imm >> 2) ? a.hi : a.lo, (imm % 4) * 8)) -#define extract64from256(a, imm) movq(_mm_srli_si128((imm >> 2) ? a.hi : a.lo, (imm % 2) * 8)) +#define extract32from256(a, imm) movd(_mm_srli_si128((imm >> 2) ? a.hi : a.lo, (imm % 4) * 4)) +#define extract64from256(a, imm) movq(_mm_srli_si128((imm >> 1) ? a.hi : a.lo, (imm % 2) * 8)) #endif #endif // !AVX2 @@ -741,8 +749,8 @@ m128 movdq_lo(m256 x) { #define extract32from256(a, imm) _mm_extract_epi32(_mm256_extracti128_si256(a, imm >> 2), imm % 4) #define extractlow64from256(a) _mm_cvtsi128_si64(cast256to128(a)) #define extractlow32from256(a) movd(cast256to128(a)) -#define interleave256hi(a, b) _mm256_unpackhi_epi8(a, b); -#define interleave256lo(a, b) _mm256_unpacklo_epi8(a, b); +#define interleave256hi(a, b) _mm256_unpackhi_epi8(a, b) +#define interleave256lo(a, b) _mm256_unpacklo_epi8(a, b) #define vpalignr(r, l, offset) _mm256_alignr_epi8(r, l, offset) static really_inline @@ -757,6 +765,11 @@ m256 combine2x128(m128 hi, m128 lo) { #if defined(HAVE_AVX512) #define extract128from512(a, imm) _mm512_extracti32x4_epi32(a, imm) +#define interleave512hi(a, b) _mm512_unpackhi_epi8(a, b) +#define interleave512lo(a, b) _mm512_unpacklo_epi8(a, b) +#define set2x256(a) _mm512_broadcast_i64x4(a) +#define mask_set2x256(src, k, a) _mm512_mask_broadcast_i64x4(src, k, a) +#define vpermq512(idx, a) _mm512_permutexvar_epi64(idx, a) #endif /**** @@ -980,6 +993,12 @@ m512 set512_64(u64a hi_3, u64a hi_2, u64a hi_1, u64a hi_0, lo_3, lo_2, lo_1, lo_0); } +static really_inline +m512 swap256in512(m512 a) { + m512 idx = set512_64(3ULL, 2ULL, 1ULL, 0ULL, 7ULL, 6ULL, 5ULL, 4ULL); + return vpermq512(idx, a); +} + static really_inline m512 set4x128(m128 a) { return _mm512_broadcast_i32x4(a); From 2b1d3383aa9832efe1f73a606be606be235bca9e Mon Sep 17 00:00:00 2001 From: "Chang, Harry" Date: Mon, 24 Jul 2017 15:09:17 +0800 Subject: [PATCH 104/190] replace "_avx2" with "_fat". --- src/fdr/fdr.c | 16 ++++++------ src/fdr/teddy.h | 48 +++++++++++++++++------------------ src/fdr/teddy_avx2.c | 60 ++++++++++++++++++++++---------------------- 3 files changed, 62 insertions(+), 62 deletions(-) diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c index f7da6981..d33756d3 100644 --- a/src/fdr/fdr.c +++ b/src/fdr/fdr.c @@ -803,14 +803,14 @@ static const FDRFUNCTYPE funcs[] = { fdr_engine_exec, NULL, /* old: fast teddy */ NULL, /* old: fast teddy */ - ONLY_AVX2(fdr_exec_teddy_avx2_msks1_fat), - ONLY_AVX2(fdr_exec_teddy_avx2_msks1_pck_fat), - ONLY_AVX2(fdr_exec_teddy_avx2_msks2_fat), - ONLY_AVX2(fdr_exec_teddy_avx2_msks2_pck_fat), - ONLY_AVX2(fdr_exec_teddy_avx2_msks3_fat), - ONLY_AVX2(fdr_exec_teddy_avx2_msks3_pck_fat), - ONLY_AVX2(fdr_exec_teddy_avx2_msks4_fat), - ONLY_AVX2(fdr_exec_teddy_avx2_msks4_pck_fat), + ONLY_AVX2(fdr_exec_fat_teddy_msks1), + ONLY_AVX2(fdr_exec_fat_teddy_msks1_pck), + ONLY_AVX2(fdr_exec_fat_teddy_msks2), + ONLY_AVX2(fdr_exec_fat_teddy_msks2_pck), + ONLY_AVX2(fdr_exec_fat_teddy_msks3), + ONLY_AVX2(fdr_exec_fat_teddy_msks3_pck), + ONLY_AVX2(fdr_exec_fat_teddy_msks4), + ONLY_AVX2(fdr_exec_fat_teddy_msks4_pck), fdr_exec_teddy_msks1, fdr_exec_teddy_msks1_pck, fdr_exec_teddy_msks2, diff --git a/src/fdr/teddy.h b/src/fdr/teddy.h index 35756c53..40ae0756 100644 --- a/src/fdr/teddy.h +++ b/src/fdr/teddy.h @@ -73,37 +73,37 @@ hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr, #if defined(HAVE_AVX2) -hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a, - hwlm_group_t control); +hwlm_error_t fdr_exec_fat_teddy_msks1(const struct FDR *fdr, + const struct FDR_Runtime_Args *a, + hwlm_group_t control); -hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a, - hwlm_group_t control); +hwlm_error_t fdr_exec_fat_teddy_msks1_pck(const struct FDR *fdr, + const struct FDR_Runtime_Args *a, + hwlm_group_t control); -hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a, - hwlm_group_t control); +hwlm_error_t fdr_exec_fat_teddy_msks2(const struct FDR *fdr, + const struct FDR_Runtime_Args *a, + hwlm_group_t control); -hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a, - hwlm_group_t control); +hwlm_error_t fdr_exec_fat_teddy_msks2_pck(const struct FDR *fdr, + const struct FDR_Runtime_Args *a, + hwlm_group_t control); -hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a, - hwlm_group_t control); +hwlm_error_t fdr_exec_fat_teddy_msks3(const struct FDR *fdr, + const struct FDR_Runtime_Args *a, + hwlm_group_t control); -hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a, - hwlm_group_t control); +hwlm_error_t fdr_exec_fat_teddy_msks3_pck(const struct FDR *fdr, + const struct FDR_Runtime_Args *a, + hwlm_group_t control); -hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a, - hwlm_group_t control); +hwlm_error_t fdr_exec_fat_teddy_msks4(const struct FDR *fdr, + const struct FDR_Runtime_Args *a, + hwlm_group_t control); -hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a, - hwlm_group_t control); +hwlm_error_t fdr_exec_fat_teddy_msks4_pck(const struct FDR *fdr, + const struct FDR_Runtime_Args *a, + hwlm_group_t control); #endif /* HAVE_AVX2 */ diff --git a/src/fdr/teddy_avx2.c b/src/fdr/teddy_avx2.c index 8f98344c..e1743a9c 100644 --- a/src/fdr/teddy_avx2.c +++ b/src/fdr/teddy_avx2.c @@ -130,15 +130,15 @@ do { \ } while(0) static really_inline -const m256 *getMaskBase_avx2(const struct Teddy *teddy) { +const m256 *getMaskBase_fat(const struct Teddy *teddy) { return (const m256 *)((const u8 *)teddy + ROUNDUP_CL(sizeof(struct Teddy))); } #if defined(HAVE_AVX512) static really_inline -const u64a *getReinforcedMaskBase_avx2(const struct Teddy *teddy, u8 numMask) { - return (const u64a *)((const u8 *)getMaskBase_avx2(teddy) +const u64a *getReinforcedMaskBase_fat(const struct Teddy *teddy, u8 numMask) { + return (const u64a *)((const u8 *)getMaskBase_fat(teddy) + ROUNDUP_CL(2 * numMask * sizeof(m256))); } @@ -370,11 +370,11 @@ do { \ DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", \ a->buf, a->len, a->start_offset); \ \ - const m256 *maskBase = getMaskBase_avx2(teddy); \ + const m256 *maskBase = getMaskBase_fat(teddy); \ PREPARE_FAT_MASKS(n_msk); \ const u32 *confBase = getConfBase(teddy); \ \ - const u64a *r_msk_base_lo = getReinforcedMaskBase_avx2(teddy, n_msk); \ + const u64a *r_msk_base_lo = getReinforcedMaskBase_fat(teddy, n_msk); \ const u64a *r_msk_base_hi = r_msk_base_lo + (N_CHARS + 1); \ u32 c_0 = 0x100; \ u32 c_16 = 0x100; \ @@ -582,7 +582,7 @@ do { \ DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", \ a->buf, a->len, a->start_offset); \ \ - const m256 *maskBase = getMaskBase_avx2(teddy); \ + const m256 *maskBase = getMaskBase_fat(teddy); \ const u32 *confBase = getConfBase(teddy); \ \ FDR_EXEC_FAT_TEDDY_RES_OLD(n_msk); \ @@ -638,51 +638,51 @@ do { \ #endif // HAVE_AVX512 -hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a, - hwlm_group_t control) { +hwlm_error_t fdr_exec_fat_teddy_msks1(const struct FDR *fdr, + const struct FDR_Runtime_Args *a, + hwlm_group_t control) { FDR_EXEC_FAT_TEDDY(fdr, a, control, 1, do_confWithBit_teddy); } -hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a, - hwlm_group_t control) { +hwlm_error_t fdr_exec_fat_teddy_msks1_pck(const struct FDR *fdr, + const struct FDR_Runtime_Args *a, + hwlm_group_t control) { FDR_EXEC_FAT_TEDDY(fdr, a, control, 1, do_confWithBit_teddy); } -hwlm_error_t fdr_exec_teddy_avx2_msks2_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a, - hwlm_group_t control) { +hwlm_error_t fdr_exec_fat_teddy_msks2(const struct FDR *fdr, + const struct FDR_Runtime_Args *a, + hwlm_group_t control) { FDR_EXEC_FAT_TEDDY(fdr, a, control, 2, do_confWithBit_teddy); } -hwlm_error_t fdr_exec_teddy_avx2_msks2_pck_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a, - hwlm_group_t control) { +hwlm_error_t fdr_exec_fat_teddy_msks2_pck(const struct FDR *fdr, + const struct FDR_Runtime_Args *a, + hwlm_group_t control) { FDR_EXEC_FAT_TEDDY(fdr, a, control, 2, do_confWithBit_teddy); } -hwlm_error_t fdr_exec_teddy_avx2_msks3_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a, - hwlm_group_t control) { +hwlm_error_t fdr_exec_fat_teddy_msks3(const struct FDR *fdr, + const struct FDR_Runtime_Args *a, + hwlm_group_t control) { FDR_EXEC_FAT_TEDDY(fdr, a, control, 3, do_confWithBit_teddy); } -hwlm_error_t fdr_exec_teddy_avx2_msks3_pck_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a, - hwlm_group_t control) { +hwlm_error_t fdr_exec_fat_teddy_msks3_pck(const struct FDR *fdr, + const struct FDR_Runtime_Args *a, + hwlm_group_t control) { FDR_EXEC_FAT_TEDDY(fdr, a, control, 3, do_confWithBit_teddy); } -hwlm_error_t fdr_exec_teddy_avx2_msks4_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a, - hwlm_group_t control) { +hwlm_error_t fdr_exec_fat_teddy_msks4(const struct FDR *fdr, + const struct FDR_Runtime_Args *a, + hwlm_group_t control) { FDR_EXEC_FAT_TEDDY(fdr, a, control, 4, do_confWithBit_teddy); } -hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr, - const struct FDR_Runtime_Args *a, - hwlm_group_t control) { +hwlm_error_t fdr_exec_fat_teddy_msks4_pck(const struct FDR *fdr, + const struct FDR_Runtime_Args *a, + hwlm_group_t control) { FDR_EXEC_FAT_TEDDY(fdr, a, control, 4, do_confWithBit_teddy); } From 72d21a9acf8b42f2dbbb846ceb3da6e56b958ae1 Mon Sep 17 00:00:00 2001 From: "Chang, Harry" Date: Mon, 24 Jul 2017 17:37:42 +0800 Subject: [PATCH 105/190] Refactored building reinforcement table at compile time and updated comments. --- src/fdr/teddy_avx2.c | 23 +++++++++++++ src/fdr/teddy_compile.cpp | 68 ++++++++++++++++++++------------------- src/fdr/teddy_internal.h | 5 ++- 3 files changed, 62 insertions(+), 34 deletions(-) diff --git a/src/fdr/teddy_avx2.c b/src/fdr/teddy_avx2.c index e1743a9c..56ec739f 100644 --- a/src/fdr/teddy_avx2.c +++ b/src/fdr/teddy_avx2.c @@ -332,6 +332,29 @@ m512 prep_conf_fat_teddy_m4(const m512 *lo_mask, const m512 *dup_mask, prep_conf_fat_teddy_m##n(&lo_mask, dup_mask, ptr, \ r_msk_base_lo, r_msk_base_hi, &c_0, &c_16) +/* + * In FAT teddy, it needs 2 bytes to represent result of each position, + * so each nibble's(for example, lo nibble of last byte) FAT teddy mask + * has 16x2 bytes: + * |----------------------------------|----------------------------------| + * 16bytes (bucket 0..7 in each byte) 16bytes (bucket 8..15 in each byte) + * A B + * at runtime FAT teddy reads 16 bytes once and duplicate them to 32 bytes: + * |----------------------------------|----------------------------------| + * 16bytes input data (lo nibbles) 16bytes duplicated data (lo nibbles) + * X X + * then do pshufb_m256(AB, XX). + * + * In AVX512 reinforced FAT teddy, it reads 32 bytes once and duplicate them + * to 64 bytes: + * |----------------|----------------|----------------|----------------| + * X Y X Y + * in this case we need DUP_FAT_MASK to construct AABB: + * |----------------|----------------|----------------|----------------| + * A A B B + * then do pshufb_m512(AABB, XYXY). + */ + #define DUP_FAT_MASK(a) mask_set2x256(set2x256(swap128in256(a)), 0xC3, a) #define PREPARE_FAT_MASKS_1 \ diff --git a/src/fdr/teddy_compile.cpp b/src/fdr/teddy_compile.cpp index 8b8a6420..3709465b 100644 --- a/src/fdr/teddy_compile.cpp +++ b/src/fdr/teddy_compile.cpp @@ -324,50 +324,44 @@ bool pack(const vector &lits, // each item's reinforcement mask has REINFORCED_MSK_LEN bytes #define REINFORCED_MSK_LEN 8 +// reinforcement table size for each 8 buckets set +#define RTABLE_SIZE ((N_CHARS + 1) * REINFORCED_MSK_LEN) + static -void initReinforcedTable(u8 *rmsk, const size_t rmsklen, - const u32 maskWidth) { - for (u32 b = 0; b < maskWidth; b++) { - u64a *mask = (u64a *)(rmsk + b * (rmsklen / maskWidth)); - fill_n(mask, N_CHARS, 0x00ffffffffffffffULL); - } +void initReinforcedTable(u8 *rmsk) { + u64a *mask = (u64a *)rmsk; + fill_n(mask, N_CHARS, 0x00ffffffffffffffULL); } static -void fillReinforcedMskZero(u8 *rmsk, const size_t rmsklen, - const u32 maskWidth) { - for (u32 b = 0; b < maskWidth; b++) { - u8 *mc = rmsk + b * (rmsklen / maskWidth) + - NO_REINFORCEMENT * REINFORCED_MSK_LEN; - fill_n(mc, REINFORCED_MSK_LEN, 0x00); - } +void fillReinforcedMskZero(u8 *rmsk) { + u8 *mc = rmsk + NO_REINFORCEMENT * REINFORCED_MSK_LEN; + fill_n(mc, REINFORCED_MSK_LEN, 0x00); } static -void fillReinforcedMsk(u8 *rmsk, u32 boff, u16 c, u32 j, u8 bmsk) { +void fillReinforcedMsk(u8 *rmsk, u16 c, u32 j, u8 bmsk) { assert(j > 0); if (c == ALL_CHAR_SET) { for (size_t i = 0; i < N_CHARS; i++) { - u8 *mc = rmsk + boff + i * REINFORCED_MSK_LEN; + u8 *mc = rmsk + i * REINFORCED_MSK_LEN; mc[j - 1] &= ~bmsk; } } else { - u8 *mc = rmsk + boff + c * REINFORCED_MSK_LEN; + u8 *mc = rmsk + c * REINFORCED_MSK_LEN; mc[j - 1] &= ~bmsk; } } #ifdef TEDDY_DEBUG static -void dumpReinforcedMaskTable(const u8 *rmsk, const size_t rmsklen, - const u32 maskWidth) { - for (u32 b = 0; b < maskWidth; b++) { +void dumpReinforcedMaskTable(const u8 *rmsk, const u32 num_tables) { + for (u32 b = 0; b < num_tables; b++) { printf("reinforcement table for bucket %u..%u:\n", b * 8, b * 8 + 7); for (u32 i = 0; i <= N_CHARS; i++) { printf("0x%02x: ", i); for (u32 j = 0; j < REINFORCED_MSK_LEN; j++) { - u8 val = rmsk[b * (rmsklen / maskWidth) + - i * REINFORCED_MSK_LEN + j]; + u8 val = rmsk[b * RTABLE_SIZE + i * REINFORCED_MSK_LEN + j]; for (u32 k = 0; k < 8; k++) { printf("%s", ((val >> k) & 0x1) ? "1" : "0"); } @@ -455,13 +449,20 @@ static void fillReinforcedTable(const map> &bucketToLits, const vector &lits, - u8 *rmsk, const size_t rmsklen, const u32 maskWidth) { - initReinforcedTable(rmsk, rmsklen, maskWidth); + u8 *rtable_base, const u32 num_tables) { + vector tables; + for (u32 i = 0; i < num_tables; i++) { + tables.push_back(rtable_base + i * RTABLE_SIZE); + } + + for (auto t : tables) { + initReinforcedTable(t); + } for (const auto &b2l : bucketToLits) { const u32 &bucket_id = b2l.first; const vector &ids = b2l.second; - const u32 boff = (bucket_id / 8) * (rmsklen / maskWidth); + u8 *rmsk = tables[bucket_id / 8]; const u8 bmsk = 1U << (bucket_id % 8); for (const LiteralIndex &lit_id : ids) { @@ -472,23 +473,25 @@ void fillReinforcedTable(const map TeddyCompiler::build() { @@ -496,7 +499,7 @@ bytecode_ptr TeddyCompiler::build() { size_t headerSize = sizeof(Teddy); size_t maskLen = eng.numMasks * 16 * 2 * maskWidth; - size_t reinforcedMaskLen = (N_CHARS + 1) * REINFORCED_MSK_LEN * maskWidth; + size_t reinforcedMaskLen = RTABLE_SIZE * maskWidth; auto floodTable = setupFDRFloodControl(lits, eng, grey); auto confirmTable = setupFullConfs(lits, eng, bucketToLits, make_small); @@ -538,8 +541,7 @@ bytecode_ptr TeddyCompiler::build() { // Write reinforcement masks. u8 *reinforcedMsk = baseMsk + ROUNDUP_CL(maskLen); - fillReinforcedTable(bucketToLits, lits, reinforcedMsk, - reinforcedMaskLen, maskWidth); + fillReinforcedTable(bucketToLits, lits, reinforcedMsk, maskWidth); #ifdef TEDDY_DEBUG for (u32 i = 0; i < eng.numMasks * 2; i++) { @@ -555,7 +557,7 @@ bytecode_ptr TeddyCompiler::build() { printf("\n===============================================\n" "reinforced mask table for low boundary (original)\n\n"); - dumpReinforcedMaskTable(reinforcedMsk, reinforcedMaskLen, maskWidth); + dumpReinforcedMaskTable(reinforcedMsk, maskWidth); #endif return fdr; diff --git a/src/fdr/teddy_internal.h b/src/fdr/teddy_internal.h index 174710bc..1e9e603f 100644 --- a/src/fdr/teddy_internal.h +++ b/src/fdr/teddy_internal.h @@ -33,7 +33,10 @@ * * | | teddy masks * * | | * * |-----| - * * | | reinforcement mask table + * * | | reinforcement mask table for bucket 0..7 + * * | | + * * |-----| + * * | | reinforcement mask table for bucket 8..15 (FAT teddy) * * | | * * |-----| * * | | confirm From aa6025012b01c5e2dee47469a69dbffe464d789c Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 19 Jul 2017 10:02:55 +1000 Subject: [PATCH 106/190] Ensure max width of repeat before transforming graph. --- src/nfagraph/ng_repeat.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/nfagraph/ng_repeat.cpp b/src/nfagraph/ng_repeat.cpp index 7add6557..96c553de 100644 --- a/src/nfagraph/ng_repeat.cpp +++ b/src/nfagraph/ng_repeat.cpp @@ -762,13 +762,20 @@ void getSuccessors(const NGHolder &g, const ReachSubgraph &rsi, * NFA graph and replace it with a cyclic state. */ static void replaceSubgraphWithSpecial(NGHolder &g, ReachSubgraph &rsi, - vector *repeats, - unordered_map &depths, - unordered_set &created) { + vector *repeats, + unordered_map &depths, + unordered_set &created) { assert(!rsi.bad); + /* As we may need to unpeel 2 vertices, we need the width to be more than 2. + * This should only happen if the graph did not have redundancy pass + * performed on as vertex count checks would be prevent us reaching here. + */ + if (rsi.repeatMax <= depth(2)) { + return; + } assert(rsi.repeatMin > depth(0)); assert(rsi.repeatMax >= rsi.repeatMin); - assert(rsi.repeatMax > depth(2)); /* may need to unpeel 2 vertices */ + assert(rsi.repeatMax > depth(2)); DEBUG_PRINTF("entry\n"); From 68c8845d15c652599a154c61d835dba810ef3ab0 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Tue, 18 Jul 2017 12:49:32 +1000 Subject: [PATCH 107/190] Do equivalency removal before violet's implementablity check. This is helpful as removing/restoring literals may introduce redundancy in the graphs. Also improve the implementation by caching known good holders. --- src/nfagraph/ng_violet.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index 6742fec9..e19a6211 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -1759,7 +1759,6 @@ void removeRedundantLiteralsFromInfixes(RoseInGraph &g, } } - static void removeRedundantLiterals(RoseInGraph &g, const CompileContext &cc) { removeRedundantLiteralsFromPrefixes(g, cc); @@ -2886,6 +2885,7 @@ bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, bool changed = false; bool need_to_recalc = false; u32 added_count = 0; + unordered_set good; /* known to be implementable */ do { changed = false; DEBUG_PRINTF("added %u\n", added_count); @@ -2901,13 +2901,19 @@ bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, } } for (NGHolder *h : graphs) { + if (contains(good, h)) { + continue; + } + reduceGraphEquivalences(*h, cc); if (isImplementableNFA(*h, &rm, cc)) { + good.insert(h); continue; } if (tryForEarlyDfa(*h, cc) && doEarlyDfa(rose, vg, *h, edges_by_graph[h], final_chance, rm, cc)) { + good.insert(h); continue; } @@ -2923,6 +2929,7 @@ bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, return false; } changed = true; + good.insert(h); continue; } From 2a044427c83c28d9e0fe7c4a0aea95ec69f398d7 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 13 Jun 2017 14:26:24 +1000 Subject: [PATCH 108/190] cmake: another convenience lib for compile side --- CMakeLists.txt | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9aa30819..57cf1043 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -651,7 +651,7 @@ set (hs_exec_avx2_SRCS ) -SET (hs_SRCS +SET (hs_compile_SRCS ${hs_HEADERS} src/crc32.h src/database.h @@ -659,7 +659,6 @@ SET (hs_SRCS src/grey.h src/hs.cpp src/hs_internal.h - src/hs_version.c src/hs_version.h src/scratch.h src/state.h @@ -1080,7 +1079,7 @@ set(hs_dump_SRCS ) if (DUMP_SUPPORT) - set(hs_SRCS ${hs_SRCS} ${hs_dump_SRCS}) + set(hs_compile_SRCS ${hs_compile_SRCS} ${hs_dump_SRCS}) endif() # we group things by sublibraries, specifying shared and static and then @@ -1103,12 +1102,20 @@ if (NOT FAT_RUNTIME) add_library(hs_runtime STATIC src/hs_version.c src/hs_valid_platform.c $) set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) - add_library(hs STATIC ${hs_SRCS} src/hs_valid_platform.c $) + add_library(hs_compile OBJECT ${hs_compile_SRCS}) + + add_library(hs STATIC + src/hs_version.c + src/hs_valid_platform.c + $ + $) endif (BUILD_STATIC_LIBS) if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) add_library(hs_exec_shared OBJECT ${hs_exec_SRCS}) set_target_properties(hs_exec_shared PROPERTIES POSITION_INDEPENDENT_CODE TRUE) + add_library(hs_compile_shared OBJECT ${hs_compile_SRCS}) + set_target_properties(hs_compile_shared PROPERTIES POSITION_INDEPENDENT_CODE TRUE) endif() else (FAT_RUNTIME) @@ -1162,10 +1169,11 @@ else (FAT_RUNTIME) $ ${RUNTIME_LIBS}) set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) + add_library(hs_compile OBJECT ${hs_compile_SRCS}) # we want the static lib for testing add_library(hs STATIC src/hs_version.c src/hs_valid_platform.c - ${hs_SRCS} + $ $ ${RUNTIME_LIBS}) @@ -1173,6 +1181,8 @@ else (FAT_RUNTIME) if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) # build shared libs + add_library(hs_compile_shared OBJECT ${hs_compile_SRCS}) + set_target_properties(hs_compile_shared PROPERTIES POSITION_INDEPENDENT_CODE TRUE) add_library(hs_exec_shared_core2 OBJECT ${hs_exec_SRCS}) list(APPEND RUNTIME_SHLIBS $) set_target_properties(hs_exec_shared_core2 PROPERTIES @@ -1253,10 +1263,10 @@ endif() if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) if (NOT FAT_RUNTIME) add_library(hs_shared SHARED src/hs_version.c src/hs_valid_platform.c - ${hs_SRCS} $) + $ $) else() add_library(hs_shared SHARED src/hs_version.c src/hs_valid_platform.c - ${hs_SRCS} $ + $ $ ${RUNTIME_SHLIBS}) endif() From 14cf5c3684986f4cc4d32451bffd2bd3ee3fe153 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 31 Jul 2017 12:26:45 +1000 Subject: [PATCH 109/190] small_vector: require boost >= 1.61 We use the small_vector constructors introduced in Boost 1.61 (trac bug 11866, github commit b436c91). If the Boost version is too old, we fall back to using std::vector. --- src/util/small_vector.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/util/small_vector.h b/src/util/small_vector.h index 6293759c..0f54bbf6 100644 --- a/src/util/small_vector.h +++ b/src/util/small_vector.h @@ -33,7 +33,12 @@ #include -#if BOOST_VERSION >= 105800 +/* + * We use the small_vector constructors introduced in Boost 1.61 (trac bug + * #11866, github commit b436c91). If the Boost version is too old, we fall + * back to using std::vector. + */ +#if BOOST_VERSION >= 106100 # define HAVE_BOOST_CONTAINER_SMALL_VECTOR #endif From 404f73981112d69488f431585c53b724801987ed Mon Sep 17 00:00:00 2001 From: "Chang, Harry" Date: Thu, 27 Jul 2017 17:21:05 +0800 Subject: [PATCH 110/190] Compile dump of teddy's nibble masks and reinforcement table in fdr_dump.cpp --- src/fdr/fdr_dump.cpp | 47 +++++++++++++++++++++++++++++++++++++++ src/fdr/teddy_compile.cpp | 37 ------------------------------ 2 files changed, 47 insertions(+), 37 deletions(-) diff --git a/src/fdr/fdr_dump.cpp b/src/fdr/fdr_dump.cpp index 0a4d7415..f4cd1f44 100644 --- a/src/fdr/fdr_dump.cpp +++ b/src/fdr/fdr_dump.cpp @@ -86,6 +86,45 @@ void dumpConfirms(const void *fdr_base, u32 conf_offset, u32 num_confirms, } } +static +void dumpTeddyReinforced(const u8 *rmsk, const u32 num_tables, FILE *f) { + // dump reinforcement masks + for (u32 b = 0; b < num_tables; b++) { + fprintf(f, " reinforcement table for bucket %u..%u:\n", + b * 8, b * 8 + 7); + for (u32 i = 0; i <= N_CHARS; i++) { + fprintf(f, " 0x%02x: ", i); + for (u32 j = 0; j < 8; j++) { + u8 val = rmsk[b * ((N_CHARS + 1) * 8) + i * 8 + j]; + for (u32 k = 0; k < 8; k++) { + fprintf(f, "%s", ((val >> k) & 0x1) ? "1" : "0"); + } + fprintf(f, " "); + } + fprintf(f, "\n"); + } + fprintf(f, "\n"); + } +} + +static +void dumpTeddyMasks(const u8 *baseMsk, u32 numMasks, u32 maskWidth, FILE *f) { + // dump nibble masks + fprintf(f, " nibble masks:\n"); + for (u32 i = 0; i < numMasks * 2; i++) { + fprintf(f, " -%d%s: ", 1 + i / 2, (i % 2) ? "hi" : "lo"); + for (u32 j = 0; j < 16 * maskWidth; j++) { + u8 val = baseMsk[i * 16 * maskWidth + j]; + for (u32 k = 0; k < 8; k++) { + fprintf(f, "%s", ((val >> k) & 0x1) ? "1" : "0"); + } + fprintf(f, " "); + } + fprintf(f, "\n"); + } + fprintf(f, "\n"); +} + static void dumpTeddy(const Teddy *teddy, FILE *f) { fprintf(f, "TEDDY: %u\n", teddy->engineID); @@ -105,6 +144,14 @@ void dumpTeddy(const Teddy *teddy, FILE *f) { teddy->floodOffset); fprintf(f, "\n"); + u32 maskWidth = des->getNumBuckets() / 8; + size_t headerSize = sizeof(Teddy); + size_t maskLen = des->numMasks * 16 * 2 * maskWidth; + const u8 *teddy_base = (const u8 *)teddy; + const u8 *baseMsk = teddy_base + ROUNDUP_CL(headerSize); + const u8 *rmsk = baseMsk + ROUNDUP_CL(maskLen); + dumpTeddyMasks(baseMsk, des->numMasks, maskWidth, f); + dumpTeddyReinforced(rmsk, maskWidth, f); dumpConfirms(teddy, teddy->confOffset, des->getNumBuckets(), f); } diff --git a/src/fdr/teddy_compile.cpp b/src/fdr/teddy_compile.cpp index 3709465b..9a1e54a1 100644 --- a/src/fdr/teddy_compile.cpp +++ b/src/fdr/teddy_compile.cpp @@ -353,26 +353,6 @@ void fillReinforcedMsk(u8 *rmsk, u16 c, u32 j, u8 bmsk) { } } -#ifdef TEDDY_DEBUG -static -void dumpReinforcedMaskTable(const u8 *rmsk, const u32 num_tables) { - for (u32 b = 0; b < num_tables; b++) { - printf("reinforcement table for bucket %u..%u:\n", b * 8, b * 8 + 7); - for (u32 i = 0; i <= N_CHARS; i++) { - printf("0x%02x: ", i); - for (u32 j = 0; j < REINFORCED_MSK_LEN; j++) { - u8 val = rmsk[b * RTABLE_SIZE + i * REINFORCED_MSK_LEN + j]; - for (u32 k = 0; k < 8; k++) { - printf("%s", ((val >> k) & 0x1) ? "1" : "0"); - } - printf(" "); - } - printf("\n"); - } - } -} -#endif - static void fillNibbleMasks(const map> &bucketToLits, @@ -543,23 +523,6 @@ bytecode_ptr TeddyCompiler::build() { u8 *reinforcedMsk = baseMsk + ROUNDUP_CL(maskLen); fillReinforcedTable(bucketToLits, lits, reinforcedMsk, maskWidth); -#ifdef TEDDY_DEBUG - for (u32 i = 0; i < eng.numMasks * 2; i++) { - for (u32 j = 0; j < 16; j++) { - u8 val = baseMsk[i * 16 + j]; - for (u32 k = 0; k < 8; k++) { - printf("%s", ((val >> k) & 0x1) ? "1" : "0"); - } - printf(" "); - } - printf("\n"); - } - - printf("\n===============================================\n" - "reinforced mask table for low boundary (original)\n\n"); - dumpReinforcedMaskTable(reinforcedMsk, maskWidth); -#endif - return fdr; } From f8544505ce68c67fcdb58170ab813ebe268b5df5 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 2 Aug 2017 12:54:02 +1000 Subject: [PATCH 111/190] mergeLeftfixesVariableLag: update comments, debugging support --- src/rose/rose_build_merge.cpp | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index 5d4d46e4..4001b118 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -1283,6 +1283,12 @@ bool mergeRosePair(RoseBuildImpl &tbi, left_id &r1, left_id &r2, const deque &verts2) { assert(!verts1.empty() && !verts2.empty()); + DEBUG_PRINTF("merging rose pair:\n"); + DEBUG_PRINTF(" A:%016zx: tops %s\n", r1.hash(), + as_string_list(all_tops(r1)).c_str()); + DEBUG_PRINTF(" B:%016zx: tops %s\n", r2.hash(), + as_string_list(all_tops(r2)).c_str()); + RoseGraph &g = tbi.g; if (r1.graph()) { @@ -1293,9 +1299,14 @@ bool mergeRosePair(RoseBuildImpl &tbi, left_id &r1, left_id &r2, return false; } - // The graph in r1 has been merged into the graph in r2. Update r1's - // vertices with the new graph ptr. Since the parent vertices are the - // same, we know that tops will already have been distinct. + /* The graph in r1 has been merged into the graph in r2. Update r1's + * vertices with the new graph ptr. mergeNfaPair() does not alter the + * tops from the input graph so no need to update top values. + * + * It is the responsibility of the caller to ensure that the tops are + * distinct when they have different trigger conditions. + * [Note: mergeLeftfixesVariableLag() should have a common parent set] + */ shared_ptr &h = g[verts2.front()].left.graph; for (RoseVertex v : verts1) { g[v].left.graph = h; @@ -1465,6 +1476,10 @@ u32 commonPrefixLength(left_id &r1, left_id &r2) { * This pass attempts to merge prefix/infix engines which share a common set of * parent vertices. * + * TODO: this function should be rewritten as it assumes all instances of an + * engine have the same set of parent vertices. This can cause the same set of + * merges to be attempted multiple times. + * * Engines are greedily merged pairwise by this process based on a priority * queue keyed off the common prefix length. * @@ -1472,7 +1487,13 @@ u32 commonPrefixLength(left_id &r1, left_id &r2) { * the stop alphabet. * * Infixes: - * - LBR candidates are not considered. + * - LBR candidates are not considered. However, LBRs which have already been + * converted to castles are considered for merging with other castles. + * TODO: Check if we can still have LBR candidates at this stage and if these + * criteria still makes sense and then add explanation as to why there are + * both castles and graphs which are LBR candidates at this stage. + * - It is expected that when this is run all infixes are still at the single + * top stage. * * Prefixes: * - transient prefixes are not considered. @@ -1486,6 +1507,7 @@ void mergeLeftfixesVariableLag(RoseBuildImpl &tbi) { if (!tbi.cc.grey.mergeRose) { return; } + assert(!hasOrphanedTops(tbi)); map rosesByParent; RoseGraph &g = tbi.g; @@ -1535,6 +1557,10 @@ void mergeLeftfixesVariableLag(RoseBuildImpl &tbi) { // We collapse the anchored root into the root vertex when calculating // parents, so that we can merge differently-anchored prefix roses // together. (Prompted by UE-2100) + + /* TODO: check this if this still does anything given that + * mergeableRoseVertices() does a strict check. + */ parents.clear(); for (auto u : inv_adjacent_vertices_range(v, g)) { if (tbi.isAnyStart(u)) { @@ -1612,6 +1638,7 @@ void mergeLeftfixesVariableLag(RoseBuildImpl &tbi) { DEBUG_PRINTF("-----\n"); DEBUG_PRINTF("exit\n"); DEBUG_PRINTF("-----\n"); + assert(!hasOrphanedTops(tbi)); } namespace { From 1f3cfdccef3e799403faaa30b5f12c7b3ea318ef Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 2 Aug 2017 13:07:24 +1000 Subject: [PATCH 112/190] mergeCastle: merge common repeats from the castles --- src/nfa/castlecompile.cpp | 2 +- src/nfa/castlecompile.h | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index b709d3c5..661c9c2c 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -772,7 +772,7 @@ bool mergeCastle(CastleProto &c1, const CastleProto &c2, const u32 top = m.first; const PureRepeat &pr = m.second; DEBUG_PRINTF("top %u\n", top); - u32 new_top = c1.add(pr); + u32 new_top = c1.merge(pr); top_map[top] = new_top; DEBUG_PRINTF("adding repeat: map %u->%u\n", top, new_top); } diff --git a/src/nfa/castlecompile.h b/src/nfa/castlecompile.h index aa4ed354..ea5f06da 100644 --- a/src/nfa/castlecompile.h +++ b/src/nfa/castlecompile.h @@ -128,7 +128,9 @@ buildCastle(const CastleProto &proto, const CompileContext &cc, const ReportManager &rm); /** - * \brief Merge two CastleProto prototypes together, if possible. + * \brief Merge two CastleProto prototypes together, if possible. If a + * particular repeat from c2 is already in c1, then it will be reused rather + * than adding a duplicate repeat. * * Returns true if merge of all repeats in c2 into c1 succeeds, and fills * mapping with the repeat indices. From d9e2c3daca396309f4fb72c22f7b075ff0cba1bd Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 2 Aug 2017 13:36:24 +1000 Subject: [PATCH 113/190] make ComponentRepeat::vacuous_everywhere() more accurate --- src/parser/ComponentRepeat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/ComponentRepeat.cpp b/src/parser/ComponentRepeat.cpp index ff02703c..09f59d05 100644 --- a/src/parser/ComponentRepeat.cpp +++ b/src/parser/ComponentRepeat.cpp @@ -234,7 +234,7 @@ void ComponentRepeat::optimise(bool connected_to_sds) { } bool ComponentRepeat::vacuous_everywhere() const { - return !m_min; + return !m_min || sub_comp->vacuous_everywhere(); } bool ComponentRepeat::checkEmbeddedStartAnchor(bool at_start) const { From 952f0aad21af5ff4d5db63799f7078bac8a6a559 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Tue, 14 Feb 2017 14:18:13 +1100 Subject: [PATCH 114/190] support dynamic stream compression --- CMakeLists.txt | 2 + src/hs_common.h | 12 ++ src/hs_runtime.h | 113 ++++++++++++ src/rose/rose_build_bytecode.cpp | 18 +- src/rose/rose_build_dump.cpp | 17 +- src/rose/rose_internal.h | 35 +++- src/rose/runtime.h | 4 +- src/runtime.c | 99 ++++++++++- src/stream_compress.c | 95 +++++++++++ src/stream_compress.h | 51 ++++++ src/stream_compress_impl.h | 190 +++++++++++++++++++++ unit/hyperscan/arg_checks.cpp | 285 ++++++++++++++++++++++++++++++- 12 files changed, 894 insertions(+), 27 deletions(-) create mode 100644 src/stream_compress.c create mode 100644 src/stream_compress.h create mode 100644 src/stream_compress_impl.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 57cf1043..eb9a62e1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -662,6 +662,8 @@ SET (hs_compile_SRCS src/hs_version.h src/scratch.h src/state.h + src/stream_compress.c + src/stream_compress.h src/ue2common.h src/compiler/asserts.cpp src/compiler/asserts.h diff --git a/src/hs_common.h b/src/hs_common.h index ffea397e..e1f079f2 100644 --- a/src/hs_common.h +++ b/src/hs_common.h @@ -561,6 +561,18 @@ hs_error_t HS_CDECL hs_valid_platform(void); */ #define HS_ARCH_ERROR (-11) +/** + * Provided buffer was too small. + * + * This error indicates that there was insufficient space in the buffer. The + * call should be repeated with a larger provided buffer. + * + * Note: in this situation, it is normal for the amount of space required to be + * returned in the same manner as the used space would have been returned if the + * call was successful. + */ +#define HS_INSUFFICIENT_SPACE (-12) + /** @} */ #ifdef __cplusplus diff --git a/src/hs_runtime.h b/src/hs_runtime.h index ecd97ca5..a93437b8 100644 --- a/src/hs_runtime.h +++ b/src/hs_runtime.h @@ -321,6 +321,119 @@ hs_error_t HS_CDECL hs_reset_and_copy_stream(hs_stream_t *to_id, match_event_handler onEvent, void *context); +/** + * Creates a compressed representation of the provided stream in the buffer + * provided. This compressed representation can be converted back into a stream + * state by using @ref hs_expand_stream() or @ref hs_reset_and_expand_stream(). + * The size of the compressed representation will be placed into @a used_space. + * + * If there is not sufficient space in the buffer to hold the compressed + * represention, @ref HS_INSUFFICIENT_SPACE will be returned and @a used_space + * will be populated with the amount of space required. + * + * Note: this function does not close the provided stream, you may continue to + * use the stream or to free it with @ref hs_close_stream(). + * + * @param stream + * The stream (as created by @ref hs_open_stream()) to be compressed. + * + * @param buf + * Buffer to write the compressed representation into. Note: if the call is + * just being used to determine the amount of space required, it is allowed + * to pass NULL here and @a buf_space as 0. + * + * @param buf_space + * The number of bytes in @a buf. If buf_space is too small, the call will + * fail with @ref HS_INSUFFICIENT_SPACE. + * + * @param used_space + * Pointer to where the amount of used space will be written to. The used + * buffer space is always less than or equal to @a buf_space. If the call + * fails with @ref HS_INSUFFICIENT_SPACE, this pointer will be used to + * write out the amount of buffer space required. + * + * @return + * @ref HS_SUCCESS on success, @ref HS_INSUFFICIENT_SPACE if the provided + * buffer is too small. + */ +hs_error_t hs_compress_stream(const hs_stream_t *stream, char *buf, + size_t buf_space, size_t *used_space); + +/** + * Decompresses a compressed representation created by @ref hs_compress_stream() + * into a new stream. + * + * Note: @a buf must correspond to a complete compressed representation created + * by @ref hs_compress_stream() of a stream that was opened against @a db. It is + * not always possible to detect misuse of this API and behaviour is undefined + * if these properties are not satisfied. + * + * @param db + * The compiled pattern database that the compressed stream was opened + * against. + * + * @param stream + * On success, a pointer to the expanded @ref hs_stream_t will be + * returned; NULL on failure. + * + * @param buf + * A compressed representation of a stream. These compressed forms are + * created by @ref hs_compress_stream(). + * + * @param buf_size + * The size in bytes of the compressed representation. + * + * @return + * @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t hs_expand_stream(const hs_database_t *db, hs_stream_t **stream, + const char *buf, size_t buf_size); + +/** + * Decompresses a compressed representation created by @ref hs_compress_stream() + * on top of the 'to' stream. The 'to' stream will first be reset (reporting + * any EOD matches if a non-NULL @a onEvent callback handler is provided). + * + * Note: the 'to' stream must be opened against the same database as the + * compressed stream. + * + * Note: @a buf must correspond to a complete compressed representation created + * by @ref hs_compress_stream() of a stream that was opened against @a db. It is + * not always possible to detect misuse of this API and behaviour is undefined + * if these properties are not satisfied. + * + * @param to_stream + * A pointer to the generated @ref hs_stream_t will be + * returned; NULL on failure. + * + * @param buf + * A compressed representation of a stream. These compressed forms are + * created by @ref hs_compress_stream(). + * + * @param buf_size + * The size in bytes of the compressed representation. + * + * @param scratch + * A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is + * allowed to be NULL only if the @a onEvent callback is also NULL. + * + * @param onEvent + * Pointer to a match event callback function. If a NULL pointer is given, + * no matches will be returned. + * + * @param context + * The user defined pointer which will be passed to the callback function + * when a match occurs. + * + * @return + * @ref HS_SUCCESS on success, other values on failure. + */ +hs_error_t hs_reset_and_expand_stream(hs_stream_t *to_stream, + const char *buf, size_t buf_size, + hs_scratch_t *scratch, + match_event_handler onEvent, + void *context); + /** * The block (non-streaming) regular expression scanner. * diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index e7fd6271..cf3de55c 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -393,13 +393,15 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount, so->activeLeafArray = curr_offset; /* TODO: limit size of array */ curr_offset += mmbit_size(activeArrayCount); + so->activeLeafArray_size = mmbit_size(activeArrayCount); so->activeLeftArray = curr_offset; /* TODO: limit size of array */ + curr_offset += mmbit_size(activeLeftCount); so->activeLeftArray_size = mmbit_size(activeLeftCount); - curr_offset += so->activeLeftArray_size; so->longLitState = curr_offset; curr_offset += longLitStreamStateRequired; + so->longLitState_size = longLitStreamStateRequired; // ONE WHOLE BYTE for each active leftfix with lag. so->leftfixLagTable = curr_offset; @@ -420,6 +422,7 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount, // Exhaustion multibit. so->exhausted = curr_offset; curr_offset += mmbit_size(build.rm.numEkeys()); + so->exhausted_size = mmbit_size(build.rm.numEkeys()); // SOM locations and valid/writeable multibit structures. if (build.ssm.numSomSlots()) { @@ -435,6 +438,7 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount, curr_offset += mmbit_size(build.ssm.numSomSlots()); so->somWritable = curr_offset; curr_offset += mmbit_size(build.ssm.numSomSlots()); + so->somMultibit_size = mmbit_size(build.ssm.numSomSlots()); } else { // No SOM handling, avoid growing the stream state any further. so->somLocation = 0; @@ -443,6 +447,7 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount, } // note: state space for mask nfas is allocated later + so->nfaStateBegin = curr_offset; so->end = curr_offset; } @@ -2039,7 +2044,7 @@ bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif, static void allocateStateSpace(const engine_info &eng_info, NfaInfo &nfa_info, RoseStateOffsets *so, u32 *scratchStateSize, - u32 *streamStateSize, u32 *transientStateSize) { + u32 *transientStateSize) { u32 state_offset; if (eng_info.transient) { // Transient engines do not use stream state, but must have room in @@ -2050,7 +2055,6 @@ void allocateStateSpace(const engine_info &eng_info, NfaInfo &nfa_info, // Pack NFA stream state on to the end of the Rose stream state. state_offset = so->end; so->end += eng_info.stream_size; - *streamStateSize += eng_info.stream_size; } nfa_info.stateOffset = state_offset; @@ -2064,12 +2068,11 @@ void allocateStateSpace(const engine_info &eng_info, NfaInfo &nfa_info, static void updateNfaState(const build_context &bc, vector &nfa_infos, RoseStateOffsets *so, u32 *scratchStateSize, - u32 *streamStateSize, u32 *transientStateSize) { + u32 *transientStateSize) { if (nfa_infos.empty()) { return; } - *streamStateSize = 0; *transientStateSize = 0; *scratchStateSize = 0; @@ -2077,7 +2080,7 @@ void updateNfaState(const build_context &bc, vector &nfa_infos, NfaInfo &nfa_info = nfa_infos[qi]; const auto &eng_info = bc.engine_info_by_queue.at(qi); allocateStateSpace(eng_info, nfa_info, so, scratchStateSize, - streamStateSize, transientStateSize); + transientStateSize); } } @@ -2491,7 +2494,7 @@ void writeNfaInfo(const RoseBuildImpl &build, build_context &bc, // Update state offsets to do with NFAs in proto and in the NfaInfo // structures. updateNfaState(bc, infos, &proto.stateOffsets, &proto.scratchStateSize, - &proto.nfaStateSize, &proto.tStateSize); + &proto.tStateSize); proto.nfaInfoOffset = bc.engine_blob.add_range(infos); } @@ -3782,7 +3785,6 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { proto.totalNumLiterals = verify_u32(literal_info.size()); proto.asize = verify_u32(atable.size()); proto.ematcherRegionSize = ematcher_region_size; - proto.longLitStreamState = verify_u32(longLitStreamStateRequired); proto.size = currOffset; diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 5ab9fc99..b70112f2 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -2026,15 +2026,17 @@ void roseDumpText(const RoseEngine *t, FILE *f) { fprintf(f, "state space required : %u bytes\n", t->stateOffsets.end); fprintf(f, " - history buffer : %u bytes\n", t->historyRequired); - fprintf(f, " - exhaustion vector : %u bytes\n", (t->ekeyCount + 7) / 8); + fprintf(f, " - exhaustion vector : %u bytes\n", + t->stateOffsets.exhausted_size); fprintf(f, " - role state mmbit : %u bytes\n", t->stateSize); fprintf(f, " - long lit matcher : %u bytes\n", t->longLitStreamState); fprintf(f, " - active array : %u bytes\n", - mmbit_size(t->activeArrayCount)); + t->stateOffsets.activeLeafArray_size); fprintf(f, " - active rose : %u bytes\n", - mmbit_size(t->activeLeftCount)); + t->stateOffsets.activeLeftArray_size); fprintf(f, " - anchored state : %u bytes\n", t->anchorStateSize); - fprintf(f, " - nfa state : %u bytes\n", t->nfaStateSize); + fprintf(f, " - nfa state : %u bytes\n", + t->stateOffsets.end - t->stateOffsets.nfaStateBegin); fprintf(f, " - (trans. nfa state): %u bytes\n", t->tStateSize); fprintf(f, " - one whole bytes : %u bytes\n", t->stateOffsets.anchorState - t->stateOffsets.leftfixLagTable); @@ -2098,7 +2100,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, rolesWithStateCount); DUMP_U32(t, stateSize); DUMP_U32(t, anchorStateSize); - DUMP_U32(t, nfaStateSize); DUMP_U32(t, tStateSize); DUMP_U32(t, smallWriteOffset); DUMP_U32(t, amatcherOffset); @@ -2148,7 +2149,9 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, delayRebuildLength); DUMP_U32(t, stateOffsets.history); DUMP_U32(t, stateOffsets.exhausted); + DUMP_U32(t, stateOffsets.exhausted_size); DUMP_U32(t, stateOffsets.activeLeafArray); + DUMP_U32(t, stateOffsets.activeLeafArray_size); DUMP_U32(t, stateOffsets.activeLeftArray); DUMP_U32(t, stateOffsets.activeLeftArray_size); DUMP_U32(t, stateOffsets.leftfixLagTable); @@ -2156,9 +2159,12 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, stateOffsets.groups); DUMP_U32(t, stateOffsets.groups_size); DUMP_U32(t, stateOffsets.longLitState); + DUMP_U32(t, stateOffsets.longLitState_size); DUMP_U32(t, stateOffsets.somLocation); DUMP_U32(t, stateOffsets.somValid); DUMP_U32(t, stateOffsets.somWritable); + DUMP_U32(t, stateOffsets.somMultibit_size); + DUMP_U32(t, stateOffsets.nfaStateBegin); DUMP_U32(t, stateOffsets.end); DUMP_U32(t, boundary.reportEodOffset); DUMP_U32(t, boundary.reportZeroOffset); @@ -2174,7 +2180,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, ematcherRegionSize); DUMP_U32(t, somRevCount); DUMP_U32(t, somRevOffsetOffset); - DUMP_U32(t, longLitStreamState); fprintf(f, "}\n"); fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine)); } diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 57395c9d..d38ee8c0 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -170,6 +170,12 @@ struct NfaInfo { #define OWB_ZOMBIE_ALWAYS_YES 128 /* nfa will always answer yes to any rose * prefix checks */ +/* offset of the status flags in the stream state. */ +#define ROSE_STATE_OFFSET_STATUS_FLAGS 0 + +/* offset of role mmbit in stream state (just after the status flag byte). */ +#define ROSE_STATE_OFFSET_ROLE_MMBIT sizeof(u8) + /** * \brief Rose state offsets. * @@ -184,24 +190,28 @@ struct NfaInfo { struct RoseStateOffsets { /** History buffer. * - * First byte is an 8-bit count of the number of valid history bytes - * available, followed by the history itself. Max size of history is - * RoseEngine::historyRequired. */ + * Max size of history is RoseEngine::historyRequired. */ u32 history; - /** Exhausted bitvector. + /** Exhausted multibit. * - * 1 bit per exhaustible key (used by Highlander mode). If a bit is set, + * entry per exhaustible key (used by Highlander mode). If a bit is set, * reports with that ekey should not be delivered to the user. */ u32 exhausted; + /** size of exhausted multibit */ + u32 exhausted_size; + /** Multibit for active suffix/outfix engines. */ u32 activeLeafArray; - /** Multibit for active Rose (prefix/infix) engines. */ + /** Size of multibit for active suffix/outfix engines in bytes. */ + u32 activeLeafArray_size; + + /** Multibit for active leftfix (prefix/infix) engines. */ u32 activeLeftArray; - /** Size of the active Rose array multibit, in bytes. */ + /** Size of multibit for active leftfix (prefix/infix) engines in bytes. */ u32 activeLeftArray_size; /** Table of lag information (stored as one byte per engine) for active @@ -220,6 +230,9 @@ struct RoseStateOffsets { /** State for long literal support. */ u32 longLitState; + /** Size of the long literal state. */ + u32 longLitState_size; + /** Packed SOM location slots. */ u32 somLocation; @@ -229,6 +242,13 @@ struct RoseStateOffsets { /** Multibit guarding SOM location slots. */ u32 somWritable; + /** Size of each of the somValid and somWritable multibits, in bytes. */ + u32 somMultibit_size; + + /** Begin of the region where NFA engine state is stored. + * The NFA state region extends to end. */ + u32 nfaStateBegin; + /** Total size of Rose state, in bytes. */ u32 end; }; @@ -317,7 +337,6 @@ struct RoseEngine { u32 stateSize; /* size of the state bitset * WARNING: not the size of the rose state */ u32 anchorStateSize; /* size of the state for the anchor dfas */ - u32 nfaStateSize; /* total size of the state for the mask/rose nfas */ u32 tStateSize; /* total size of the state for transient rose nfas */ u32 scratchStateSize; /**< uncompressed state req'd for NFAs in scratch; * used for sizing scratch only. */ diff --git a/src/rose/runtime.h b/src/rose/runtime.h index d2a4b5d7..88342b53 100644 --- a/src/rose/runtime.h +++ b/src/rose/runtime.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -68,7 +68,7 @@ const void *getByOffset(const struct RoseEngine *t, u32 offset) { static really_inline void *getRoleState(char *state) { - return state + sizeof(u8); // status flags + return state + ROSE_STATE_OFFSET_ROLE_MMBIT; } /** \brief Fetch the active array for suffix nfas. */ diff --git a/src/runtime.c b/src/runtime.c index 17f13382..5a8168d3 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -53,6 +53,7 @@ #include "som/som_runtime.h" #include "som/som_stream.h" #include "state.h" +#include "stream_compress.h" #include "ue2common.h" #include "util/exhaust.h" #include "util/multibit.h" @@ -153,7 +154,7 @@ void populateCoreInfo(struct hs_scratch *s, const struct RoseEngine *rose, /** \brief Retrieve status bitmask from stream state. */ static really_inline u8 getStreamStatus(const char *state) { - u8 status = *(const u8 *)state; + u8 status = *(const u8 *)(state + ROSE_STATE_OFFSET_STATUS_FLAGS); assert((status & ~STATUS_VALID_BITS) == 0); return status; } @@ -162,7 +163,7 @@ u8 getStreamStatus(const char *state) { static really_inline void setStreamStatus(char *state, u8 status) { assert((status & ~STATUS_VALID_BITS) == 0); - *(u8 *)state = status; + *(u8 *)(state + ROSE_STATE_OFFSET_STATUS_FLAGS) = status; } /** \brief Initialise SOM state. Used in both block and streaming mode. */ @@ -1092,3 +1093,97 @@ hs_error_t HS_CDECL hs_scan_vector(const hs_database_t *db, return HS_SUCCESS; } + +HS_PUBLIC_API +hs_error_t hs_compress_stream(const hs_stream_t *stream, char *buf, + size_t buf_space, size_t *used_space) { + if (unlikely(!stream || !used_space)) { + return HS_INVALID; + } + + if (unlikely(buf_space && !buf)) { + return HS_INVALID; + } + + const struct RoseEngine *rose = stream->rose; + + size_t stream_size = size_compress_stream(rose, stream); + + DEBUG_PRINTF("require %zu [orig %zu]\n", stream_size, + rose->stateOffsets.end + sizeof(struct hs_stream)); + *used_space = stream_size; + + if (buf_space < stream_size) { + return HS_INSUFFICIENT_SPACE; + } + compress_stream(buf, stream_size, rose, stream); + + return HS_SUCCESS; +} + +hs_error_t hs_expand_stream(const hs_database_t *db, hs_stream_t **stream, + const char *buf, size_t buf_size) { + if (unlikely(!stream || !buf)) { + return HS_INVALID; + } + + *stream = NULL; + + hs_error_t err = validDatabase(db); + if (unlikely(err != HS_SUCCESS)) { + return err; + } + + const struct RoseEngine *rose = hs_get_bytecode(db); + if (unlikely(!ISALIGNED_16(rose))) { + return HS_INVALID; + } + + if (unlikely(rose->mode != HS_MODE_STREAM)) { + return HS_DB_MODE_ERROR; + } + + size_t stream_size = rose->stateOffsets.end + sizeof(struct hs_stream); + + struct hs_stream *s = hs_stream_alloc(stream_size); + if (unlikely(!s)) { + return HS_NOMEM; + } + + if (!expand_stream(s, rose, buf, buf_size)) { + hs_stream_free(s); + return HS_INVALID; + } + + *stream = s; + return HS_SUCCESS; +} + +hs_error_t hs_reset_and_expand_stream(hs_stream_t *to_stream, + const char *buf, size_t buf_size, + hs_scratch_t *scratch, + match_event_handler onEvent, + void *context) { + if (unlikely(!to_stream || !buf)) { + return HS_INVALID; + } + + const struct RoseEngine *rose = to_stream->rose; + + if (onEvent) { + if (!scratch || !validScratch(to_stream->rose, scratch)) { + return HS_INVALID; + } + if (unlikely(markScratchInUse(scratch))) { + return HS_SCRATCH_IN_USE; + } + report_eod_matches(to_stream, scratch, onEvent, context); + unmarkScratchInUse(scratch); + } + + if (expand_stream(to_stream, rose, buf, buf_size)) { + return HS_SUCCESS; + } else { + return HS_INVALID; + } +} diff --git a/src/stream_compress.c b/src/stream_compress.c new file mode 100644 index 00000000..3051af36 --- /dev/null +++ b/src/stream_compress.c @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "stream_compress.h" + +#include "state.h" +#include "nfa/nfa_internal.h" +#include "rose/rose_internal.h" +#include "util/multibit.h" +#include "util/uniform_ops.h" + +#include + +#define COPY_IN(p, sz) do { \ + assert(currOffset + sz <= buf_size); \ + memcpy(buf + currOffset, p, sz); \ + currOffset += sz; \ + DEBUG_PRINTF("co = %zu\n", currOffset); \ + } while (0); + +#define COPY_OUT(p, sz) do { \ + if (currOffset + sz > buf_size) { \ + return 0; \ + } \ + memcpy(p, buf + currOffset, sz); \ + currOffset += sz; \ + DEBUG_PRINTF("co = %zu\n", currOffset); \ + } while (0); + +#define SIZE_COPY_IN(p, sz) do { \ + currOffset += sz; \ + DEBUG_PRINTF("co = %zu\n", currOffset); \ + } while (0); + +#define COPY COPY_OUT +#define ASSIGN(lhs, rhs) do { lhs = rhs; } while (0) +#define FN_SUFFIX expand +#define STREAM_QUAL +#define BUF_QUAL const +#include "stream_compress_impl.h" + +int expand_stream(struct hs_stream *stream, const struct RoseEngine *rose, + const char *buf, size_t buf_size) { + return sc_expand(rose, stream, buf, buf_size); +} + +#define COPY COPY_IN +#define ASSIGN(lhs, rhs) do { } while (0) +#define FN_SUFFIX compress +#define STREAM_QUAL const +#define BUF_QUAL +#include "stream_compress_impl.h" + +size_t compress_stream(char *buf, size_t buf_size, + const struct RoseEngine *rose, + const struct hs_stream *stream) { + return sc_compress(rose, stream, buf, buf_size); +} + +#define COPY SIZE_COPY_IN +#define ASSIGN(lhs, rhs) do { } while (0) +#define FN_SUFFIX size +#define STREAM_QUAL const +#define BUF_QUAL UNUSED +#include "stream_compress_impl.h" + +size_t size_compress_stream(const struct RoseEngine *rose, + const struct hs_stream *stream) { + return sc_size(rose, stream, NULL, 0); +} diff --git a/src/stream_compress.h b/src/stream_compress.h new file mode 100644 index 00000000..0d06d1e0 --- /dev/null +++ b/src/stream_compress.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Functions for dynamic compress/expand for streams. + */ + +#ifndef STREAM_COMPRESS_H +#define STREAM_COMPRESS_H + +#include + +struct hs_stream; +struct RoseEngine; + +int expand_stream(struct hs_stream *out, const struct RoseEngine *rose, + const char *buf, size_t buf_size); + +size_t compress_stream(char *buf, size_t buf_size, + const struct RoseEngine *rose, + const struct hs_stream *src); + +size_t size_compress_stream(const struct RoseEngine *rose, + const struct hs_stream *stream); + +#endif diff --git a/src/stream_compress_impl.h b/src/stream_compress_impl.h new file mode 100644 index 00000000..ec054f07 --- /dev/null +++ b/src/stream_compress_impl.h @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "util/join.h" + +#define COPY_FIELD(x) COPY(&x, sizeof(x)) + +/* TODO: replace with a multibit compress/expand call */ +#define COPY_MULTIBIT(mm_p, mm_size_bytes) COPY(mm_p, mm_size_bytes) + +#define COPY_LEFTFIXES JOIN(sc_left_, FN_SUFFIX) +#define COPY_SOM_INFO JOIN(sc_som_, FN_SUFFIX) + +static +size_t COPY_LEFTFIXES(const struct RoseEngine *rose, size_t currOffset, + STREAM_QUAL struct hs_stream *stream, + BUF_QUAL char *buf, UNUSED size_t buf_size) { + if (!rose->activeLeftIterOffset) { + return currOffset; + } + + const struct RoseStateOffsets *so = &rose->stateOffsets; + STREAM_QUAL char *stream_body + = ((STREAM_QUAL char *)stream) + sizeof(struct hs_stream); + + /* Note: in the expand case the active left array has already been copied + * into the stream. */ + const u8 *ara = (const u8 *)(stream_body + so->activeLeftArray); + const u32 arCount = rose->activeLeftCount; + const struct LeftNfaInfo *left_table = getLeftTable(rose); + + /* We only want to look at non-transient leftfixes */ + const struct mmbit_sparse_iter *it = getActiveLeftIter(rose); + struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; + u32 dummy; + u32 ri = mmbit_sparse_iter_begin(ara, arCount, &dummy, it, si_state); + for (; ri != MMB_INVALID; + ri = mmbit_sparse_iter_next(ara, arCount, ri, &dummy, it, si_state)) { + u32 qi = ri + rose->leftfixBeginQueue; + UNUSED const struct LeftNfaInfo *left = left_table + ri; + const struct NfaInfo *nfa_info = getNfaInfoByQueue(rose, qi); + const struct NFA *nfa = getNfaByInfo(rose, nfa_info); + + COPY(stream_body + nfa_info->stateOffset, nfa->streamStateSize); + /* copy the one whole byte for active leftfixes as well */ + assert(left->lagIndex != ROSE_OFFSET_INVALID); + COPY(stream_body + so->leftfixLagTable + left->lagIndex, 1); + } + + return currOffset; +} + +static +size_t COPY_SOM_INFO(const struct RoseEngine *rose, size_t currOffset, + STREAM_QUAL struct hs_stream *stream, + BUF_QUAL char *buf, UNUSED size_t buf_size) { + const struct RoseStateOffsets *so = &rose->stateOffsets; + + if (!so->somLocation) { + assert(!so->somValid); + assert(!so->somWritable); + return currOffset; + } + + STREAM_QUAL char *stream_body + = ((STREAM_QUAL char *)stream) + sizeof(struct hs_stream); + + assert(so->somValid); + assert(so->somWritable); + + COPY_MULTIBIT(stream_body + so->somWritable, so->somMultibit_size); + COPY_MULTIBIT(stream_body + so->somValid, so->somMultibit_size); + + /* Copy only the som slots which contain valid values. */ + /* Note: in the expand case the som valid array has been copied in. */ + const u8 *svalid = (const u8 *)(stream_body + so->somValid); + u32 s_count = rose->somLocationCount; + u32 s_width = rose->somHorizon; + for (u32 slot = mmbit_iterate(svalid, s_count, MMB_INVALID); + slot != MMB_INVALID; slot = mmbit_iterate(svalid, s_count, slot)) { + COPY(stream_body + so->somLocation + slot * s_width, s_width); + } + + return currOffset; +} + +static +size_t JOIN(sc_, FN_SUFFIX)(const struct RoseEngine *rose, + STREAM_QUAL struct hs_stream *stream, + BUF_QUAL char *buf, UNUSED size_t buf_size) { + size_t currOffset = 0; + const struct RoseStateOffsets *so = &rose->stateOffsets; + + STREAM_QUAL char *stream_body + = ((STREAM_QUAL char *)stream) + sizeof(struct hs_stream); + + COPY_FIELD(stream->offset); + ASSIGN(stream->rose, rose); + + COPY(stream_body + ROSE_STATE_OFFSET_STATUS_FLAGS, 1); + COPY_MULTIBIT(stream_body + ROSE_STATE_OFFSET_ROLE_MMBIT, rose->stateSize); + + /* stream is valid in compress/size, and stream->offset has been set already + * on the expand side */ + u64a offset = stream->offset; + u32 history = MIN((u32)offset, rose->historyRequired); + + /* copy the active mmbits */ + COPY_MULTIBIT(stream_body + so->activeLeafArray, so->activeLeafArray_size); + COPY_MULTIBIT(stream_body + so->activeLeftArray, so->activeLeftArray_size); + + COPY(stream_body + so->longLitState, so->longLitState_size); + + /* Leftlag table will be handled later, for active leftfixes */ + + /* anchored table state is not required once we are deep in the stream */ + if (offset <= rose->anchoredDistance) { + COPY(stream_body + so->anchorState, rose->anchorStateSize); + } + + COPY(stream_body + so->groups, so->groups_size); + + /* copy the real bits of history */ + UNUSED u32 hend = so->history + rose->historyRequired; + COPY(stream_body + hend - history, history); + + /* copy the exhaustion multibit */ + COPY_MULTIBIT(stream_body + so->exhausted, so->exhausted_size); + + /* copy nfa stream state for endfixes */ + /* Note: in the expand case the active array has already been copied into + * the stream. */ + const u8 *aa = (const u8 *)(stream_body + so->activeLeafArray); + u32 aaCount = rose->activeArrayCount; + for (u32 qi = mmbit_iterate(aa, aaCount, MMB_INVALID); qi != MMB_INVALID; + qi = mmbit_iterate(aa, aaCount, qi)) { + DEBUG_PRINTF("saving stream state for qi=%u\n", qi); + const struct NfaInfo *nfa_info = getNfaInfoByQueue(rose, qi); + const struct NFA *nfa = getNfaByInfo(rose, nfa_info); + COPY(stream_body + nfa_info->stateOffset, nfa->streamStateSize); + } + + /* copy nfa stream state for leftfixes */ + currOffset = COPY_LEFTFIXES(rose, currOffset, stream, buf, buf_size); + if (!currOffset) { + return 0; + } + + currOffset = COPY_SOM_INFO(rose, currOffset, stream, buf, buf_size); + if (!currOffset) { + return 0; + } + + return currOffset; +} + +#undef ASSIGN +#undef COPY +#undef COPY_FIELD +#undef COPT_LEFTFIXES +#undef COPY_MULTIBIT +#undef COPY_SOM_INFO +#undef FN_SUFFIX +#undef BUF_QUAL +#undef STREAM_QUAL diff --git a/unit/hyperscan/arg_checks.cpp b/unit/hyperscan/arg_checks.cpp index 8e86cc64..0ff4ce5f 100644 --- a/unit/hyperscan/arg_checks.cpp +++ b/unit/hyperscan/arg_checks.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -2318,6 +2318,289 @@ TEST(HyperscanArgChecks, hs_populate_platform_null) { ASSERT_EQ(HS_INVALID, err); } +TEST(HyperscanArgChecks, CompressStreamNoStream) { + char buf[100]; + size_t used; + hs_error_t err = hs_compress_stream(nullptr, buf, sizeof(buf), &used); + ASSERT_EQ(HS_INVALID, err); +} + +TEST(HyperscanArgChecks, CompressStreamNoUsed) { + hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM); + ASSERT_NE(nullptr, db); + + hs_stream_t *stream; + hs_error_t err = hs_open_stream(db, 0, &stream); + ASSERT_EQ(HS_SUCCESS, err); + + char buf[100]; + err = hs_compress_stream(stream, buf, sizeof(buf), nullptr); + ASSERT_EQ(HS_INVALID, err); + + err = hs_close_stream(stream, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_free_database(db); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(HyperscanArgChecks, CompressStreamNoBuf) { + hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM); + ASSERT_NE(nullptr, db); + + hs_stream_t *stream; + hs_error_t err = hs_open_stream(db, 0, &stream); + ASSERT_EQ(HS_SUCCESS, err); + + char buf[100]; + size_t used; + err = hs_compress_stream(stream, nullptr, sizeof(buf), &used); + ASSERT_EQ(HS_INVALID, err); + + err = hs_close_stream(stream, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_free_database(db); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(HyperscanArgChecks, CompressStreamSmallBuff) { + hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM); + ASSERT_NE(nullptr, db); + + hs_stream_t *stream; + hs_error_t err = hs_open_stream(db, 0, &stream); + ASSERT_EQ(HS_SUCCESS, err); + + char buf[100]; + size_t used = 0; + err = hs_compress_stream(stream, buf, 1, &used); + ASSERT_EQ(HS_INSUFFICIENT_SPACE, err); + ASSERT_LT(0, used); + + err = hs_close_stream(stream, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_free_database(db); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(HyperscanArgChecks, ExpandNoDb) { + hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM); + ASSERT_NE(nullptr, db); + + hs_stream_t *stream1; + hs_error_t err = hs_open_stream(db, 0, &stream1); + ASSERT_EQ(HS_SUCCESS, err); + + char buf[2000]; + size_t used = 0; + err = hs_compress_stream(stream1, buf, sizeof(buf), &used); + ASSERT_EQ(HS_SUCCESS, err); + + hs_stream_t *stream2; + err = hs_expand_stream(nullptr, &stream2, buf, used); + ASSERT_EQ(HS_INVALID, err); + + err = hs_close_stream(stream1, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_free_database(db); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(HyperscanArgChecks, ExpandNoTo) { + hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM); + ASSERT_NE(nullptr, db); + + hs_stream_t *stream1; + hs_error_t err = hs_open_stream(db, 0, &stream1); + ASSERT_EQ(HS_SUCCESS, err); + + char buf[2000]; + size_t used = 0; + err = hs_compress_stream(stream1, buf, sizeof(buf), &used); + ASSERT_EQ(HS_SUCCESS, err); + + hs_stream_t *stream2; + err = hs_expand_stream(db, nullptr, buf, used); + ASSERT_EQ(HS_INVALID, err); + + err = hs_close_stream(stream1, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_free_database(db); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(HyperscanArgChecks, ExpandNoBuf) { + hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM); + ASSERT_NE(nullptr, db); + + hs_stream_t *stream1; + hs_error_t err = hs_open_stream(db, 0, &stream1); + ASSERT_EQ(HS_SUCCESS, err); + + char buf[2000]; + size_t used = 0; + err = hs_compress_stream(stream1, buf, sizeof(buf), &used); + ASSERT_EQ(HS_SUCCESS, err); + + hs_stream_t *stream2; + err = hs_expand_stream(db, &stream2, nullptr, used); + ASSERT_EQ(HS_INVALID, err); + + err = hs_close_stream(stream1, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_free_database(db); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(HyperscanArgChecks, ExpandSmallBuf) { + hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM); + ASSERT_NE(nullptr, db); + + hs_stream_t *stream1; + hs_error_t err = hs_open_stream(db, 0, &stream1); + ASSERT_EQ(HS_SUCCESS, err); + + char buf[2000]; + size_t used = 0; + err = hs_compress_stream(stream1, buf, sizeof(buf), &used); + ASSERT_EQ(HS_SUCCESS, err); + + hs_stream_t *stream2; + err = hs_expand_stream(db, &stream2, buf, used / 2); + ASSERT_EQ(HS_INVALID, err); + + err = hs_close_stream(stream1, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_free_database(db); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(HyperscanArgChecks, ResetAndExpandNoStream) { + hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM); + ASSERT_NE(nullptr, db); + + hs_stream_t *stream1; + hs_error_t err = hs_open_stream(db, 0, &stream1); + ASSERT_EQ(HS_SUCCESS, err); + + char buf[2000]; + size_t used = 0; + err = hs_compress_stream(stream1, buf, sizeof(buf), &used); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_reset_and_expand_stream(nullptr, buf, used, nullptr, nullptr, + nullptr); + ASSERT_EQ(HS_INVALID, err); + + err = hs_close_stream(stream1, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_free_database(db); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(HyperscanArgChecks, ResetAndExpandNoBuf) { + hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM); + ASSERT_NE(nullptr, db); + + hs_stream_t *stream1; + hs_error_t err = hs_open_stream(db, 0, &stream1); + ASSERT_EQ(HS_SUCCESS, err); + + char buf[2000]; + size_t used = 0; + err = hs_compress_stream(stream1, buf, sizeof(buf), &used); + ASSERT_EQ(HS_SUCCESS, err); + + hs_stream_t *stream2; + err = hs_open_stream(db, 0, &stream2); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_reset_and_expand_stream(stream2, nullptr, used, nullptr, nullptr, + nullptr); + ASSERT_EQ(HS_INVALID, err); + + err = hs_close_stream(stream1, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_close_stream(stream2, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_free_database(db); + ASSERT_EQ(HS_SUCCESS, err); +} + + +TEST(HyperscanArgChecks, ResetAndExpandSmallBuf) { + hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM); + ASSERT_NE(nullptr, db); + + hs_stream_t *stream1; + hs_error_t err = hs_open_stream(db, 0, &stream1); + ASSERT_EQ(HS_SUCCESS, err); + + char buf[2000]; + size_t used = 0; + err = hs_compress_stream(stream1, buf, sizeof(buf), &used); + ASSERT_EQ(HS_SUCCESS, err); + + hs_stream_t *stream2; + err = hs_open_stream(db, 0, &stream2); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_reset_and_expand_stream(stream2, buf, used / 2, nullptr, nullptr, + nullptr); + ASSERT_EQ(HS_INVALID, err); + + err = hs_close_stream(stream1, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_close_stream(stream2, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_free_database(db); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(HyperscanArgChecks, ResetAndExpandNoScratch) { + hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM); + ASSERT_NE(nullptr, db); + + hs_stream_t *stream1; + hs_error_t err = hs_open_stream(db, 0, &stream1); + ASSERT_EQ(HS_SUCCESS, err); + + char buf[2000]; + size_t used = 0; + err = hs_compress_stream(stream1, buf, sizeof(buf), &used); + ASSERT_EQ(HS_SUCCESS, err); + + hs_stream_t *stream2; + err = hs_open_stream(db, 0, &stream2); + ASSERT_EQ(HS_SUCCESS, err); + + int temp; + + err = hs_reset_and_expand_stream(stream2, buf, used, nullptr, singleHandler, + &temp); + ASSERT_EQ(HS_INVALID, err); + + err = hs_close_stream(stream1, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_close_stream(stream2, nullptr, nullptr, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + err = hs_free_database(db); + ASSERT_EQ(HS_SUCCESS, err); +} + class BadModeTest : public testing::TestWithParam {}; // hs_compile: Compile a pattern with bogus mode flags set. From 5f6291529f4db5d40ad005f738266e6169e8b4ab Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 6 Mar 2017 11:58:53 +1100 Subject: [PATCH 115/190] hsbench: add stream compress functionality --- tools/hsbench/common.h | 1 + tools/hsbench/engine_hyperscan.cpp | 29 ++++++++++++++++++ tools/hsbench/engine_hyperscan.h | 6 +++- tools/hsbench/main.cpp | 49 +++++++++++++++++++++++++++--- 4 files changed, 80 insertions(+), 5 deletions(-) diff --git a/tools/hsbench/common.h b/tools/hsbench/common.h index efff3f99..a8295911 100644 --- a/tools/hsbench/common.h +++ b/tools/hsbench/common.h @@ -40,5 +40,6 @@ extern std::string serializePath; extern unsigned int somPrecisionMode; extern bool forceEditDistance; extern unsigned editDistance; +extern bool printCompressSize; #endif // COMMON_H diff --git a/tools/hsbench/engine_hyperscan.cpp b/tools/hsbench/engine_hyperscan.cpp index 9674e5c8..5f188472 100644 --- a/tools/hsbench/engine_hyperscan.cpp +++ b/tools/hsbench/engine_hyperscan.cpp @@ -205,6 +205,35 @@ void EngineHyperscan::streamScan(EngineStream &stream, const char *data, } } +void EngineHyperscan::streamCompressExpand(EngineStream &stream, + vector &temp) const { + size_t used = 0; + hs_error_t err = hs_compress_stream(stream.id, temp.data(), temp.size(), + &used); + if (err == HS_INSUFFICIENT_SPACE) { + temp.resize(used); + err = hs_compress_stream(stream.id, temp.data(), temp.size(), &used); + } + + if (err != HS_SUCCESS) { + printf("Fatal error: hs_compress_stream returned error %d\n", err); + abort(); + } + + if (printCompressSize) { + printf("stream %u: compressed to %zu\n", stream.sn, used); + } + + err = hs_reset_and_expand_stream(stream.id, temp.data(), temp.size(), + nullptr, nullptr, nullptr); + + if (err != HS_SUCCESS) { + printf("Fatal error: hs_reset_and expand_stream returned error %d\n", + err); + abort(); + } +} + static unsigned makeModeFlags(ScanMode scan_mode) { switch (scan_mode) { diff --git a/tools/hsbench/engine_hyperscan.h b/tools/hsbench/engine_hyperscan.h index 7875decc..2c93959b 100644 --- a/tools/hsbench/engine_hyperscan.h +++ b/tools/hsbench/engine_hyperscan.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,6 +34,7 @@ #include "hs_runtime.h" #include +#include /** Structure for the result of a single complete scan. */ struct ResultEntry { @@ -79,6 +80,9 @@ public: void streamClose(std::unique_ptr stream, ResultEntry &result) const; + void streamCompressExpand(EngineStream &stream, + std::vector &temp) const; + void streamScan(EngineStream &stream, const char *data, unsigned int len, unsigned int id, ResultEntry &result) const; diff --git a/tools/hsbench/main.cpp b/tools/hsbench/main.cpp index 9c5fd6cb..f2ea8e7e 100644 --- a/tools/hsbench/main.cpp +++ b/tools/hsbench/main.cpp @@ -77,10 +77,13 @@ string serializePath(""); unsigned int somPrecisionMode = HS_MODE_SOM_HORIZON_LARGE; bool forceEditDistance = false; unsigned editDistance = 0; +bool printCompressSize = false; + +// Globals local to this file. +static bool compressStream = false; namespace /* anonymous */ { -// Globals local to this file. bool display_per_scan = false; ScanMode scan_mode = ScanMode::STREAMING; unsigned repeats = 20; @@ -212,11 +215,15 @@ void processArgs(int argc, char *argv[], vector &sigSets, int in_sigfile = 0; int do_per_scan = 0; int do_echo_matches = 0; + int do_compress = 0; + int do_compress_size = 0; vector sigFiles; static struct option longopts[] = { {"per-scan", 0, &do_per_scan, 1}, {"echo-matches", 0, &do_echo_matches, 1}, + {"compress-stream", 0, &do_compress, 1}, + {"print-compress-size", 0, &do_compress_size, 1}, {nullptr, 0, nullptr, 0} }; @@ -338,6 +345,12 @@ void processArgs(int argc, char *argv[], vector &sigSets, if (do_per_scan) { display_per_scan = true; } + if (do_compress) { + compressStream = true; + } + if (do_compress_size) { + printCompressSize = true; + } if (exprPath.empty() && !sigFiles.empty()) { /* attempt to infer an expression directory */ @@ -470,10 +483,12 @@ vector prepStreamingData(const ThreadContext *ctx) { } static -void benchStreamingInternal(ThreadContext *ctx, vector &streams) { +void benchStreamingInternal(ThreadContext *ctx, vector &streams, + bool do_compress) { assert(ctx); const EngineHyperscan &e = ctx->engine; const vector &blocks = ctx->corpus_data; + vector compress_buf(do_compress ? 1000 : 0); for (ResultEntry &r : ctx->results) { ctx->timer.start(); @@ -491,6 +506,8 @@ void benchStreamingInternal(ThreadContext *ctx, vector &streams) { printf("Fatal error: stream open failed!\n"); exit(1); } + } else if (do_compress) { + e.streamCompressExpand(*stream.eng_handle, compress_buf); } assert(stream.eng_handle); @@ -521,7 +538,7 @@ void benchStreaming(void *context) { startTotalTimer(ctx); - benchStreamingInternal(ctx, streams); + benchStreamingInternal(ctx, streams, false); // Synchronization point ctx->barrier(); @@ -530,6 +547,26 @@ void benchStreaming(void *context) { stopTotalTimer(ctx); } +static +void benchStreamingCompress(void *context) { + ThreadContext *ctx = (ThreadContext *)context; + vector streams = prepStreamingData(ctx); + + // Synchronization point + ctx->barrier(); + + startTotalTimer(ctx); + + benchStreamingInternal(ctx, streams, true); + + // Synchronization point + ctx->barrier(); + + // Now that all threads are finished, we can stop the clock. + stopTotalTimer(ctx); +} + + /** In-memory structure for a data block to be scanned in vectored mode. */ struct VectoredInfo { vector data; @@ -704,7 +741,11 @@ unique_ptr makeThreadContext(const EngineHyperscan &db, thread_func_t fn = nullptr; switch (scan_mode) { case ScanMode::STREAMING: - fn = benchStreaming; + if (compressStream) { + fn = benchStreamingCompress; + } else { + fn = benchStreaming; + } break; case ScanMode::VECTORED: fn = benchVectored; From 205a5bc98f6fcaf8083d20410abceeee4fef1db7 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Mon, 8 May 2017 15:56:28 -0400 Subject: [PATCH 116/190] multibit compression support --- CMakeLists.txt | 1 + src/stream_compress.c | 35 ++ src/stream_compress_impl.h | 16 +- src/util/multibit_compress.h | 204 +++++++ unit/CMakeLists.txt | 1 + unit/internal/multi_bit_compress.cpp | 785 +++++++++++++++++++++++++++ 6 files changed, 1032 insertions(+), 10 deletions(-) create mode 100644 src/util/multibit_compress.h create mode 100644 unit/internal/multi_bit_compress.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index eb9a62e1..0117110c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -629,6 +629,7 @@ set (hs_exec_SRCS src/util/masked_move.h src/util/multibit.h src/util/multibit.c + src/util/multibit_compress.h src/util/multibit_internal.h src/util/pack_bits.h src/util/popcount.h diff --git a/src/stream_compress.c b/src/stream_compress.c index 3051af36..0cc782da 100644 --- a/src/stream_compress.c +++ b/src/stream_compress.c @@ -32,6 +32,7 @@ #include "nfa/nfa_internal.h" #include "rose/rose_internal.h" #include "util/multibit.h" +#include "util/multibit_compress.h" #include "util/uniform_ops.h" #include @@ -57,7 +58,39 @@ DEBUG_PRINTF("co = %zu\n", currOffset); \ } while (0); +#define COPY_MULTIBIT_IN(p, total_bits) do { \ + size_t sz; \ + STREAM_QUAL u8 *bits = (STREAM_QUAL u8 *)p; \ + BUF_QUAL u8 *comp = (BUF_QUAL u8 *)(buf + currOffset); \ + if (!mmbit_compress(bits, total_bits, comp, &sz, \ + buf_size - currOffset)) { \ + return 0; /* error */ \ + } \ + currOffset += sz; \ + DEBUG_PRINTF("co = %zu\n", currOffset); \ + } while (0); + +#define COPY_MULTIBIT_OUT(p, total_bits) do { \ + size_t sz; \ + STREAM_QUAL u8 *bits = (STREAM_QUAL u8 *)p; \ + BUF_QUAL u8 *comp = (BUF_QUAL u8 *)(buf + currOffset); \ + if (!mmbit_decompress(bits, total_bits, comp, &sz, \ + buf_size - currOffset)) { \ + return 0; /* error */ \ + } \ + currOffset += sz; \ + DEBUG_PRINTF("co = %zu\n", currOffset); \ + } while (0); + +#define COPY_MULTIBIT_SIZE(p, total_bits) do { \ + STREAM_QUAL u8 *bits = (STREAM_QUAL u8 *)p; \ + size_t sz = mmbit_compsize(bits, total_bits); \ + currOffset += sz; \ + DEBUG_PRINTF("co = %zu\n", currOffset); \ + } while (0); + #define COPY COPY_OUT +#define COPY_MULTIBIT COPY_MULTIBIT_OUT #define ASSIGN(lhs, rhs) do { lhs = rhs; } while (0) #define FN_SUFFIX expand #define STREAM_QUAL @@ -70,6 +103,7 @@ int expand_stream(struct hs_stream *stream, const struct RoseEngine *rose, } #define COPY COPY_IN +#define COPY_MULTIBIT COPY_MULTIBIT_IN #define ASSIGN(lhs, rhs) do { } while (0) #define FN_SUFFIX compress #define STREAM_QUAL const @@ -83,6 +117,7 @@ size_t compress_stream(char *buf, size_t buf_size, } #define COPY SIZE_COPY_IN +#define COPY_MULTIBIT COPY_MULTIBIT_SIZE #define ASSIGN(lhs, rhs) do { } while (0) #define FN_SUFFIX size #define STREAM_QUAL const diff --git a/src/stream_compress_impl.h b/src/stream_compress_impl.h index ec054f07..54aebd71 100644 --- a/src/stream_compress_impl.h +++ b/src/stream_compress_impl.h @@ -29,10 +29,6 @@ #include "util/join.h" #define COPY_FIELD(x) COPY(&x, sizeof(x)) - -/* TODO: replace with a multibit compress/expand call */ -#define COPY_MULTIBIT(mm_p, mm_size_bytes) COPY(mm_p, mm_size_bytes) - #define COPY_LEFTFIXES JOIN(sc_left_, FN_SUFFIX) #define COPY_SOM_INFO JOIN(sc_som_, FN_SUFFIX) @@ -93,8 +89,8 @@ size_t COPY_SOM_INFO(const struct RoseEngine *rose, size_t currOffset, assert(so->somValid); assert(so->somWritable); - COPY_MULTIBIT(stream_body + so->somWritable, so->somMultibit_size); - COPY_MULTIBIT(stream_body + so->somValid, so->somMultibit_size); + COPY_MULTIBIT(stream_body + so->somWritable, rose->somLocationCount); + COPY_MULTIBIT(stream_body + so->somValid, rose->somLocationCount); /* Copy only the som slots which contain valid values. */ /* Note: in the expand case the som valid array has been copied in. */ @@ -123,7 +119,7 @@ size_t JOIN(sc_, FN_SUFFIX)(const struct RoseEngine *rose, ASSIGN(stream->rose, rose); COPY(stream_body + ROSE_STATE_OFFSET_STATUS_FLAGS, 1); - COPY_MULTIBIT(stream_body + ROSE_STATE_OFFSET_ROLE_MMBIT, rose->stateSize); + COPY_MULTIBIT(stream_body + ROSE_STATE_OFFSET_ROLE_MMBIT, rose->rolesWithStateCount); /* stream is valid in compress/size, and stream->offset has been set already * on the expand side */ @@ -131,8 +127,8 @@ size_t JOIN(sc_, FN_SUFFIX)(const struct RoseEngine *rose, u32 history = MIN((u32)offset, rose->historyRequired); /* copy the active mmbits */ - COPY_MULTIBIT(stream_body + so->activeLeafArray, so->activeLeafArray_size); - COPY_MULTIBIT(stream_body + so->activeLeftArray, so->activeLeftArray_size); + COPY_MULTIBIT(stream_body + so->activeLeafArray, rose->activeArrayCount); + COPY_MULTIBIT(stream_body + so->activeLeftArray, rose->activeLeftCount); COPY(stream_body + so->longLitState, so->longLitState_size); @@ -150,7 +146,7 @@ size_t JOIN(sc_, FN_SUFFIX)(const struct RoseEngine *rose, COPY(stream_body + hend - history, history); /* copy the exhaustion multibit */ - COPY_MULTIBIT(stream_body + so->exhausted, so->exhausted_size); + COPY_MULTIBIT(stream_body + so->exhausted, rose->ekeyCount); /* copy nfa stream state for endfixes */ /* Note: in the expand case the active array has already been copied into diff --git a/src/util/multibit_compress.h b/src/util/multibit_compress.h new file mode 100644 index 00000000..e7b4fd8e --- /dev/null +++ b/src/util/multibit_compress.h @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** file + * \brief multibit compression API: compress / decompress / size + */ + +#ifndef MULTIBIT_COMPRESS_H +#define MULTIBIT_COMPRESS_H + +#include "multibit.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** \brief size API. */ +static really_inline +size_t mmbit_compsize(const u8 *bits, u32 total_bits) { + // Deal with flat model. + if (total_bits <= MMB_FLAT_MAX_BITS) { + return (ROUNDUP_N(total_bits, 8) / 8); + } + // Deal with all cleared mmb. + if (mmb_load(bits) == 0) { + return sizeof(MMB_TYPE); + } + // Deal with normal pyramid mmb. + const u32 max_level = mmbit_maxlevel(total_bits); + u32 level = 0; + u32 key = 0; + u32 key_rem = 0; + u32 num_block = 0; + // Iteration-version of DFS + while (1) { + if (key_rem < MMB_KEY_BITS) { + const u8 *block_ptr = mmbit_get_level_root_const(bits, level) + + key * sizeof(MMB_TYPE); + MMB_TYPE block = mmb_load(block_ptr); + MMB_TYPE block_1 = block & ~mmb_mask_zero_to_nocheck(key_rem); + if (mmb_popcount(block) == mmb_popcount(block_1)) { + num_block++; + } + if (level < max_level && block_1) { + key = (key << MMB_KEY_SHIFT) + mmb_ctz(block_1); + key_rem = 0; + level++; + continue; + } + } + if (level-- == 0) { + return sizeof(MMB_TYPE) * num_block; + } + key_rem = (key & MMB_KEY_MASK) + 1; + key >>= MMB_KEY_SHIFT; + } +} + +/** \brief compress API. */ +static really_inline +char mmbit_compress(const u8 *bits, u32 total_bits, u8 *comp, + size_t *comp_space, size_t max_comp_space) { + UNUSED u8 *comp_init = comp; + // Compute comp_size first. + size_t comp_size = mmbit_compsize(bits, total_bits); + // Check whether out of writable range. + if (comp_size > max_comp_space) { + return 0; + } + *comp_space = comp_size; // Return comp_size outside. + // Deal with flat model. + if (total_bits <= MMB_FLAT_MAX_BITS) { + memcpy(comp, bits, comp_size); + return 1; + } + // Deal with all cleared mmb. + if (mmb_load(bits) == 0) { + memcpy(comp, bits, sizeof(MMB_TYPE)); + return 1; + } + // Deal with normal pyramid mmb. + const u32 max_level = mmbit_maxlevel(total_bits); + u32 level = 0; + u32 key = 0; + u32 key_rem = 0; + // Iteration-version of DFS + while (1) { + if (key_rem < MMB_KEY_BITS) { + const u8 *block_ptr = mmbit_get_level_root_const(bits, level) + + key * sizeof(MMB_TYPE); + MMB_TYPE block = mmb_load(block_ptr); + MMB_TYPE block_1 = block & ~mmb_mask_zero_to_nocheck(key_rem); + if (mmb_popcount(block) == mmb_popcount(block_1)) { + memcpy(comp, &block, sizeof(MMB_TYPE)); + comp += sizeof(MMB_TYPE); + } + if (level < max_level && block_1) { + key = (key << MMB_KEY_SHIFT) + mmb_ctz(block_1); + key_rem = 0; + level++; + continue; + } + } + if (level-- == 0) { + break; + } + key_rem = (key & MMB_KEY_MASK) + 1; + key >>= MMB_KEY_SHIFT; + } + assert((u32)(comp - comp_init) == comp_size); + return 1; +} + +/** \brief decompress API. */ +static really_inline +char mmbit_decompress(u8 *bits, u32 total_bits, const u8 *comp, + size_t *comp_space, size_t max_comp_space) { + UNUSED const u8 *comp_init = comp; + size_t comp_size; + // Deal with flat model. + if (total_bits <= MMB_FLAT_MAX_BITS) { + comp_size = ROUNDUP_N(total_bits, 8) / 8; + memcpy(bits, comp, comp_size); + *comp_space = comp_size; + return 1; + } + // Deal with all cleared mmb. + if (mmb_load(comp) == 0) { + comp_size = sizeof(MMB_TYPE); + memcpy(bits, comp, comp_size); + *comp_space = comp_size; + return 1; + } + // Deal with normal mmb. + u32 max_level = mmbit_maxlevel(total_bits); + u32 level = 0; + u32 key = 0; + u32 key_rem = 0; + UNUSED const u8 *comp_end = comp_init + max_comp_space; + // Iteration-version of DFS + memcpy(bits, comp, sizeof(MMB_TYPE)); // Copy root block first. + comp += sizeof(MMB_TYPE); + while (1) { + if (key_rem < MMB_KEY_BITS) { + u8 *block_ptr = mmbit_get_level_root(bits, level) + + key * sizeof(MMB_TYPE); + MMB_TYPE block = mmb_load(block_ptr); + MMB_TYPE block_1 = block & ~mmb_mask_zero_to_nocheck(key_rem); + if (level < max_level && block_1) { + key = (key << MMB_KEY_SHIFT) + mmb_ctz(block_1); + u8 *block_ptr_1 = mmbit_get_level_root(bits, level + 1) + + key * sizeof(MMB_TYPE); + memcpy(block_ptr_1, comp, sizeof(MMB_TYPE)); + comp += sizeof(MMB_TYPE); + if (comp > comp_end) { + return 0; // Out of buffer. + } + key_rem = 0; + level++; + continue; + } + } + if (level-- == 0) { + break; + } + key_rem = (key & MMB_KEY_MASK) + 1; + key >>= MMB_KEY_SHIFT; + } + comp_size = (u32)(comp - comp_init); + *comp_space = comp_size; + return 1; +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // MULTBIT_COMPRESS_H + diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index 45e2c7ba..fad8633d 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -82,6 +82,7 @@ set(unit_internal_SOURCES internal/limex_nfa.cpp internal/masked_move.cpp internal/multi_bit.cpp + internal/multi_bit_compress.cpp internal/nfagraph_common.h internal/nfagraph_comp.cpp internal/nfagraph_equivalence.cpp diff --git a/unit/internal/multi_bit_compress.cpp b/unit/internal/multi_bit_compress.cpp new file mode 100644 index 00000000..d7396b81 --- /dev/null +++ b/unit/internal/multi_bit_compress.cpp @@ -0,0 +1,785 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "gtest/gtest.h" +#include "ue2common.h" +#include "util/compile_error.h" +#include "util/make_unique.h" +#include "util/multibit.h" +#include "util/multibit_build.h" +#include "util/multibit_compress.h" + +using namespace std; +using namespace testing; +using namespace ue2; + +/** \brief Print mmbit structure block by block. */ +UNUSED +static +void mmbit_display(const u8 *bits, u32 total_bits) { + for (u32 i = 0; i < mmbit_size(total_bits); i += 8) { + printf("block %d:", i / 8); + for (s32 j = 7; j >= 0; j--) { + u8 a = (*(bits + i + j)); + printf(" %02x", a); + } + printf("\n"); + } + printf("\n"); +} + +/** \brief Print an MMB_TYPE block. */ +UNUSED +static +void mmbit_display_block(const u8 *bits) { + for (s32 j = 7; j >= 0; j--) { + u8 a = (*(bits + j)); + printf(" %02x", a); + } + printf("\n"); +} + +/** \brief Print mmbit structure block by block. */ +UNUSED +static +void mmbit_display_comp(const u8 *bits, u32 comp_size) { + for (u32 i = 0; i < comp_size; i += 8) { + printf("block %d:", i / 8); + for (s32 j = 7; j >= 0; j--) { + u8 a = (*(bits + i + j)); + printf(" %02x", a); + } + printf("\n"); + } + printf("\n"); +} + +namespace { +class mmbit_holder { +public: + mmbit_holder() {} + explicit mmbit_holder(u32 num_bits, u32 excess = 0) + : data(ue2::make_unique(mmbit_size(num_bits) + 7 + excess)) {} + void init(u32 num_bits) { + assert(!data); + data = ue2::make_unique(mmbit_size(num_bits) + 7); + } + operator u8 *() { + assert(data); + return data.get() + 7; + } + operator const u8 *() const { + assert(data); + return data.get() + 7; + } + +private: + unique_ptr data = nullptr; +}; + +class comp_holder { +public: + comp_holder() {} + explicit comp_holder(u32 length) + : data(ue2::make_unique(length + 7)) {} + void init(u32 length) { + assert(!data); + data = ue2::make_unique(length + 7); + } + operator u8 *() { + assert(data); + return data.get() + 7; + } + operator const u8 *() const { + assert(data); + return data.get() + 7; + } + +private: + unique_ptr data = nullptr; +}; +} + +static +void fill_mmbit(u8 *ba, u32 test_size) { + fill_n(ba, mmbit_size(test_size), 0xff); +} + +// We provide both test size and stride so that larger tests don't take forever +// checking every single key. +struct MultiBitCompTestParam { + u32 size; + u32 stride; +}; + +// Parameterized test case for bounded iterator, rather that propagating +// copypasta. Allocates space as given. +class MultiBitCompTest : public TestWithParam { +protected: + virtual void SetUp() { + const MultiBitCompTestParam &p = GetParam(); + test_size = p.size; + stride = p.stride; + ba.init(test_size); + // blast with ones for the lulz + fill_mmbit(ba, test_size); + } + + virtual void TearDown() {} + + u32 test_size; // number of bits in the multibit + u32 stride; // stride to use for scans + mmbit_holder ba; // multibit storage +}; + +TEST(MultiBitComp, CompCompsizeSparse) { + static const u32 test_set[] = { + 257, + 4097, + (1U << 18) + 1, + (1U << 24) + 1, + (1U << 30) + 1 + }; + for (u32 i = 0; i < 5; i++) { + u32 test_size = test_set[i]; + mmbit_holder ba(test_size); + + // Clear all. + mmbit_clear(ba, test_size); + ASSERT_EQ(sizeof(MMB_TYPE), mmbit_compsize(ba, test_size)); + + // Switch 3 bits on. + mmbit_set(ba, test_size, 0); + mmbit_set(ba, test_size, test_size / 2); + mmbit_set(ba, test_size, test_size - 1); + + switch(test_size){ + case 257: + ASSERT_EQ(sizeof(MMB_TYPE) * 4, mmbit_compsize(ba, test_size)); + break; + case 4097: + ASSERT_EQ(sizeof(MMB_TYPE) * 6, mmbit_compsize(ba, test_size)); + break; + case (1U << 18) + 1: + ASSERT_EQ(sizeof(MMB_TYPE) * 9, mmbit_compsize(ba, test_size)); + break; + case (1U << 24) + 1: + ASSERT_EQ(sizeof(MMB_TYPE) * 12, mmbit_compsize(ba, test_size)); + break; + case (1U << 30) + 1: + ASSERT_EQ(sizeof(MMB_TYPE) * 15, mmbit_compsize(ba, test_size)); + break; + } + size_t comp_size = mmbit_compsize(ba, test_size); + + // Switch 3 bits off. + mmbit_unset(ba, test_size, 0); + mmbit_unset(ba, test_size, test_size / 2); + mmbit_unset(ba, test_size, test_size - 1); + + ASSERT_TRUE(mmbit_any(ba, test_size)); + ASSERT_FALSE(mmbit_any_precise(ba, test_size)); + + ASSERT_EQ(comp_size, mmbit_compsize(ba, test_size)); + + // Clear all again. + mmbit_clear(ba, test_size); + + ASSERT_FALSE(mmbit_any(ba, test_size)); + ASSERT_FALSE(mmbit_any_precise(ba, test_size)); + + ASSERT_EQ(sizeof(MMB_TYPE), mmbit_compsize(ba, test_size)); + } +} + +TEST(MultiBitComp, CompCompsizeDense) { + static const u32 test_set[] = { + 257, + 4097, + (1U << 18) + 1, + (1U << 24) + 1, + (1U << 30) + 1 + }; + for (u32 i = 0; i < 5; i++) { + u32 test_size = test_set[i]; + mmbit_holder ba(test_size); + + // Fill all. (fill_mmbit() is not feasible.) + //fill_mmbit(ba, test_size); + mmbit_init_range(ba, test_size, 0, test_size); + + switch(test_size){ + case 257: + ASSERT_EQ(sizeof(MMB_TYPE) * (1 + 5), + mmbit_compsize(ba, test_size)); + break; + case 4097: + ASSERT_EQ(sizeof(MMB_TYPE) * (3 + (1 + 64)), + mmbit_compsize(ba, test_size)); + break; + case (1U << 18) + 1: + ASSERT_EQ(sizeof(MMB_TYPE) * (4 + (1 + 64 + 4096)), + mmbit_compsize(ba, test_size)); + break; + case (1U << 24) + 1: + ASSERT_EQ(sizeof(MMB_TYPE) * (5 + (1 + 64 + 4096 + (1U << 18))), + mmbit_compsize(ba, test_size)); + break; + case (1U << 30) + 1: + ASSERT_EQ(sizeof(MMB_TYPE) * (6 + (1 + 64 + 4096 + (1U << 18) + + (1U << 24))), mmbit_compsize(ba, test_size)); + break; + } + size_t comp_size = mmbit_compsize(ba, test_size); + + // Switch 3 bits off. + mmbit_unset(ba, test_size, 0); + mmbit_unset(ba, test_size, test_size / 2); + mmbit_unset(ba, test_size, test_size - 1); + + ASSERT_EQ(comp_size, mmbit_compsize(ba, test_size)); + + // Switch all bits off, not a clear-up. + mmbit_unset_range(ba, test_size, 0, test_size); + + ASSERT_TRUE(mmbit_any(ba, test_size)); + ASSERT_FALSE(mmbit_any_precise(ba, test_size)); + + ASSERT_EQ(comp_size, mmbit_compsize(ba, test_size)); + } +} + +TEST_P(MultiBitCompTest, CompCompressDecompressSparse) { + SCOPED_TRACE(test_size); + ASSERT_TRUE(ba != nullptr); + + // 1st active range --> empty + mmbit_clear(ba, test_size); + + // op 2. + // 2nd active range --> [1/5, 1/3) + u64a begin = test_size / 5; + u64a end = test_size / 3; + for (u64a i = begin; i < end; i++) { + mmbit_set(ba, test_size, i); + } + + // op 3. + // 3rd active range --> [1/5, 1/2) + begin = test_size / 4; + end = test_size / 2; + for (u64a i = begin; i < end; i++) { + mmbit_set(ba, test_size, i); + } + + // op 4. + // 4th active range --> [1/5, 1/4) and [1/3, 1/2) + begin = test_size / 4; + end = test_size / 3; + mmbit_unset_range(ba, test_size, begin, end); + + // op 5. + // 5th active range --> empty + mmbit_clear(ba, test_size); + + // op 6. + // 6th active range --> [1/4, 1/3) + for (u64a i = begin; i < end; i++) { + mmbit_set(ba, test_size, i); + } + + // Initialize compression space. + size_t comp_size = mmbit_compsize(ba, test_size); + comp_holder ca(comp_size); + ASSERT_EQ(1, mmbit_compress(ba, test_size, ca, &comp_size, comp_size)); + + // Initialize decompression space. + mmbit_holder ba_1(test_size); + fill_mmbit(ba_1, test_size); // Dirty decompression space. + ASSERT_EQ(1, mmbit_decompress(ba_1, test_size, ca, &comp_size, comp_size)); + + // Correctness checking, should be [1/4, 1/3). + // And now, begin = test_size / 4, end = test_size / 3. + for (u64a i = 0; i < test_size; i += stride) { + if (i >= begin && i < end) { + ASSERT_TRUE(mmbit_isset(ba_1, test_size, i)); + } else { + ASSERT_FALSE(mmbit_isset(ba_1, test_size, i)); + } + } +} + +TEST_P(MultiBitCompTest, CompCompressDecompressDense) { + SCOPED_TRACE(test_size); + ASSERT_TRUE(ba != nullptr); + + ASSERT_TRUE(mmbit_all(ba, test_size)); + + // Sequence of set/unset/clear operations. + // op 1. + // 1st active range --> [0, 1/4) and [1/3, 1) + u64a begin = test_size / 4; + u64a end = test_size / 3; + mmbit_unset_range(ba, test_size, begin, end); + + // op 2. + // 2st active range --> empty + mmbit_clear(ba, test_size); + + // op 3. + // 3rd active range --> [1/5, 1/2) + begin = test_size / 5; + end = test_size / 2; + for (u64a i = begin; i < end; i++) { + mmbit_set(ba, test_size, i); + } + + // op 4. + // 4th active range --> [1/3, 1/2) + end = test_size / 3; + mmbit_unset_range(ba, test_size, begin, end); + + // op 5. + //5th active range --> empty + begin = test_size / 4; + end = test_size / 2; + mmbit_unset_range(ba, test_size, begin, end); + + // Initialize compression space. + size_t comp_size = mmbit_compsize(ba, test_size); + comp_holder ca(comp_size); + ASSERT_EQ(1, mmbit_compress(ba, test_size, ca, &comp_size, comp_size)); + + // Initialize decompression space. + mmbit_holder ba_1(test_size); + fill_mmbit(ba_1, test_size); // Dirty decompression space. + ASSERT_EQ(1, mmbit_decompress(ba_1, test_size, ca, &comp_size, comp_size)); + + // Correctness checking, should be empty. + if (test_size <= MMB_FLAT_MAX_BITS) { + ASSERT_FALSE(mmbit_any(ba, test_size)); + ASSERT_FALSE(mmbit_any(ba_1, test_size)); + } else { + ASSERT_TRUE(mmbit_any(ba, test_size)); + ASSERT_TRUE(mmbit_any(ba_1, test_size)); + } + ASSERT_FALSE(mmbit_any_precise(ba, test_size)); + ASSERT_FALSE(mmbit_any_precise(ba_1, test_size)); +} + +TEST(MultiBitComp, CompIntegration1) { + // 256 + 1 --> smallest 2-level mmbit + u32 total_size = mmbit_size(257); + mmbit_holder ba(257); + + //-------------------- 1 -----------------------// + // Operate on mmbit + mmbit_init_range(ba, 257, 0, 100); + // Compress + size_t comp_size = mmbit_compsize(ba, 257); + comp_holder ca(comp_size); + ASSERT_EQ(1, mmbit_compress(ba, 257, ca, &comp_size, comp_size)); + // Decompress + mmbit_holder ba_1(257); + ASSERT_EQ(1, mmbit_decompress(ba_1, 257, ca, &comp_size, comp_size)); + // Check set range: [0,100) + for (u64a i = 0; i < 257; i++) { + if (i < 100) { + ASSERT_TRUE(mmbit_isset(ba_1, 257, i)); + } else { + ASSERT_FALSE(mmbit_isset(ba_1, 257, i)); + } + } + + //-------------------- 2 -----------------------// + // Operate on mmbit + for (u64a i = 190; i < 257; i++) { + mmbit_set(ba_1, 257, i); + } + // Compress + size_t comp_size_1 = mmbit_compsize(ba_1, 257); + comp_holder ca_1(comp_size_1); + ASSERT_EQ(1, mmbit_compress(ba_1, 257, ca_1, &comp_size_1, comp_size_1)); + // Decompress + mmbit_holder ba_2(257); + ASSERT_EQ(1, mmbit_decompress(ba_2, 257, ca_1, &comp_size_1, comp_size_1)); + // Check set range: [0,100) and [190,257) + for (u64a i = 0; i < 257; i++) { + if (i < 100 || i >= 190) { + ASSERT_TRUE(mmbit_isset(ba_2, 257, i)); + } else { + ASSERT_FALSE(mmbit_isset(ba_2, 257, i)); + } + } + + //-------------------- 3 -----------------------// + // Operate on mmbit + mmbit_unset_range(ba_2, 257, 190, 192); + // Compress + size_t comp_size_2 = mmbit_compsize(ba_2, 257); + comp_holder ca_2(comp_size_2); + ASSERT_EQ(1, mmbit_compress(ba_2, 257, ca_2, &comp_size_2, comp_size_2)); + // Decompress + mmbit_holder ba_3(257); + ASSERT_EQ(1, mmbit_decompress(ba_3, 257, ca_2, &comp_size_2, comp_size_2)); + // Check set range: [0,100) and [192,257) + for (u64a i = 0; i < 257; i++) { + if (i < 100 || i >= 192) { + ASSERT_TRUE(mmbit_isset(ba_3, 257, i)); + } else { + ASSERT_FALSE(mmbit_isset(ba_3, 257, i)); + } + } + + //-------------------- 4 -----------------------// + // Operate on mmbit + for (u64a i = 100; i < 200; i++) { + mmbit_set(ba_3, 257, i); + } + // Compress + size_t comp_size_3 = mmbit_compsize(ba_3, 257); + comp_holder ca_3(comp_size_3); + ASSERT_EQ(1, mmbit_compress(ba_3, 257, ca_3, &comp_size_3, comp_size_3)); + // Decompress + mmbit_holder ba_4(257); + ASSERT_EQ(1, mmbit_decompress(ba_4, 257, ca_3, &comp_size_3, comp_size_3)); + // Check set range: full + ASSERT_TRUE(mmbit_all(ba_4, 257)); + + //-------------------- 5 -----------------------// + // Operate on mmbit + mmbit_clear(ba_4, 257); + // Compress + size_t comp_size_4 = mmbit_compsize(ba_4, 257); + comp_holder ca_4(comp_size_4); + ASSERT_EQ(1, mmbit_compress(ba_4, 257, ca_4, &comp_size_4, comp_size_4)); + // Decompress + mmbit_holder ba_5(257); + ASSERT_EQ(1, mmbit_decompress(ba_5, 257, ca_4, &comp_size_4, comp_size_4)); + // Check set range: empty + ASSERT_FALSE(mmbit_any(ba_5, 257)); + ASSERT_FALSE(mmbit_any_precise(ba_5, 257)); + + //-------------------- 6 -----------------------// + // Operate on mmbit + for (u64a i = 100; i < 200; i++) { + mmbit_set(ba_5, 257, i); + } + // Compress + size_t comp_size_5 = mmbit_compsize(ba_5, 257); + comp_holder ca_5(comp_size_5); + ASSERT_EQ(1, mmbit_compress(ba_5, 257, ca_5, &comp_size_5, comp_size_5)); + // Decompress + mmbit_holder ba_6(257); + ASSERT_EQ(1, mmbit_decompress(ba_6, 257, ca_5, &comp_size_5, comp_size_5)); + // Check set range: [100,200) + for (u64a i = 0; i < 257; i++) { + if (i >= 100 && i < 200) { + ASSERT_TRUE(mmbit_isset(ba_6, 257, i)); + } else { + ASSERT_FALSE(mmbit_isset(ba_6, 257, i)); + } + } +} + +TEST(MultiBitComp, CompIntegration2) { + // 64^2 + 1 --> smallest 3-level mmbit + u32 total_size = mmbit_size(4097); + mmbit_holder ba(4097); + + //-------------------- 1 -----------------------// + // Operate on mmbit + mmbit_init_range(ba, 4097, 0, 3200); + // Compress + size_t comp_size = mmbit_compsize(ba, 4097); + comp_holder ca(comp_size); + ASSERT_EQ(1, mmbit_compress(ba, 4097, ca, &comp_size, comp_size)); + // Decompress + mmbit_holder ba_1(4097); + ASSERT_EQ(1, mmbit_decompress(ba_1, 4097, ca, &comp_size, comp_size)); + // Check set range: [0, 3200) + for (u64a i = 0; i < 4097; i++) { + if (i < 3200) { + ASSERT_TRUE(mmbit_isset(ba_1, 4097, i)); + } else { + ASSERT_FALSE(mmbit_isset(ba_1, 4097, i)); + } + } + + //-------------------- 2 -----------------------// + // Operate on mmbit + mmbit_unset_range(ba_1, 4097, 320, 640); + // Compress + size_t comp_size_1 = mmbit_compsize(ba_1, 4097); + comp_holder ca_1(comp_size_1); + ASSERT_EQ(1, mmbit_compress(ba_1, 4097, ca_1, &comp_size_1, comp_size_1)); + // Decompress + mmbit_holder ba_2(4097); + ASSERT_EQ(1, + mmbit_decompress(ba_2, 4097, ca_1, &comp_size_1, comp_size_1)); + // Check set range: [0, 320) and [640, 3200) + for (u64a i = 0; i < 4097; i++) { + if (i < 320 || (i >= 640 && i < 3200)) { + ASSERT_TRUE(mmbit_isset(ba_2, 4097, i)); + } else { + ASSERT_FALSE(mmbit_isset(ba_2, 4097, i)); + } + } + + //-------------------- 3 -----------------------// + // Operate on mmbit + for (u64a i = 3000; i < 4000; i++) { + mmbit_set(ba_2, 4097, i); + } + // Compress + size_t comp_size_2 = mmbit_compsize(ba_2, 4097); + comp_holder ca_2(comp_size_2); + ASSERT_EQ(1, mmbit_compress(ba_2, 4097, ca_2, &comp_size_2, comp_size_2)); + // Decompress + mmbit_holder ba_3(4097); + ASSERT_EQ(1, + mmbit_decompress(ba_3, 4097, ca_2, &comp_size_2, comp_size_2)); + // Check set range: [0, 320) and [640, 4000) + for (u64a i = 0; i < 4097; i++) { + if (i < 320 || (i >= 640 && i < 4000)) { + ASSERT_TRUE(mmbit_isset(ba_3, 4097, i)); + } else { + ASSERT_FALSE(mmbit_isset(ba_3, 4097, i)); + } + } + + //-------------------- 4 -----------------------// + // Operate on mmbit + mmbit_unset(ba_3, 4097, 64); + mmbit_unset(ba_3, 4097, 3200); + // Compress + size_t comp_size_3 = mmbit_compsize(ba_3, 4097); + comp_holder ca_3(comp_size_3); + ASSERT_EQ(1, mmbit_compress(ba_3, 4097, ca_3, &comp_size_3, comp_size_3)); + // Decompress + mmbit_holder ba_4(4097); + ASSERT_EQ(1, + mmbit_decompress(ba_4, 4097, ca_3, &comp_size_3, comp_size_3)); + // Check set range: [0,64) and [65, 320) and [640, 3200) and [3201, 4000) + for (u64a i = 0; i < 4097; i++) { + if (i < 64 || (i >= 65 && i < 320) || (i >= 640 && i < 3200) || + (i >= 3201 && i < 4000)) { + ASSERT_TRUE(mmbit_isset(ba_4, 4097, i)); + } else { + ASSERT_FALSE(mmbit_isset(ba_4, 4097, i)); + } + } + + //-------------------- 5 -----------------------// + // Operate on mmbit + for (u64a i = 0; i < 4097; i++) { + if (i < 64 || (i >= 65 && i < 320) || (i >= 640 && i < 3200) || + (i >= 3201 && i < 4000)) { + mmbit_unset(ba_4, 4097, i); + } + } + // Compress + size_t comp_size_4 = mmbit_compsize(ba_4, 4097); + comp_holder ca_4(comp_size_4); + ASSERT_EQ(1, mmbit_compress(ba_4, 4097, ca_4, &comp_size_4, comp_size_4)); + // Decompress + mmbit_holder ba_5(4097); + ASSERT_EQ(1, + mmbit_decompress(ba_5, 4097, ca_4, &comp_size_4, comp_size_4)); + // Check set range: empty + ASSERT_TRUE(mmbit_any(ba_5, 4097)); + ASSERT_FALSE(mmbit_any_precise(ba_5, 4097)); + + //-------------------- 6 -----------------------// + // Operate on mmbit + mmbit_set(ba_5, 4097, 4096); + // Compress + size_t comp_size_5 = mmbit_compsize(ba_5, 4097); + comp_holder ca_5(comp_size_5); + ASSERT_EQ(1, mmbit_compress(ba_5, 4097, ca_5, &comp_size_5, comp_size_5)); + // Decompress + mmbit_holder ba_6(4097); + ASSERT_EQ(1, + mmbit_decompress(ba_6, 4097, ca_5, &comp_size_5, comp_size_5)); + // Check set range: [4096, 4096] + for (u64a i = 0; i < 4097; i++) { + if (i == 4096) { + ASSERT_TRUE(mmbit_isset(ba_6, 4097, i)); + } else { + ASSERT_FALSE(mmbit_isset(ba_6, 4097, i)); + } + } +} + +TEST(MultiBitComp, CompIntegration3) { + // 64^3 + 1 --> smallest 4-level mmbit + u32 total_size = mmbit_size(262145); + mmbit_holder ba(262145); + + //-------------------- 1 -----------------------// + // Operate on mmbit + mmbit_init_range(ba, 262145, 0, 262145); + // Compress + size_t comp_size = mmbit_compsize(ba, 262145); + comp_holder ca(comp_size); + ASSERT_EQ(1, mmbit_compress(ba, 262145, ca, &comp_size, comp_size)); + // Decompress + mmbit_holder ba_1(262145); + ASSERT_EQ(1, mmbit_decompress(ba_1, 262145, ca, &comp_size, comp_size)); + // Check set range: full + ASSERT_TRUE(mmbit_all(ba_1, 262145)); + + //-------------------- 2 -----------------------// + // Operate on mmbit + mmbit_unset_range(ba_1, 262145, 0, 64000); + // Compress + size_t comp_size_1 = mmbit_compsize(ba_1, 262145); + comp_holder ca_1(comp_size_1); + ASSERT_EQ(1, + mmbit_compress(ba_1, 262145, ca_1, &comp_size_1, comp_size_1)); + // Decompress + mmbit_holder ba_2(262145); + ASSERT_EQ(1, + mmbit_decompress(ba_2, 262145, ca_1, &comp_size_1, comp_size_1)); + // Check set range: [64000, 262145) + for (u64a i = 0; i < 262145; i++) { + if (i < 64000) { + ASSERT_FALSE(mmbit_isset(ba_2, 262145, i)); + } else { + ASSERT_TRUE(mmbit_isset(ba_2, 262145, i)); + } + } + + //-------------------- 3 -----------------------// + // Operate on mmbit + mmbit_unset_range(ba_2, 262145, 64001, 256000); + // Compress + size_t comp_size_2 = mmbit_compsize(ba_2, 262145); + comp_holder ca_2(comp_size_2); + ASSERT_EQ(1, + mmbit_compress(ba_2, 262145, ca_2, &comp_size_2, comp_size_2)); + // Decompress + mmbit_holder ba_3(262145); + ASSERT_EQ(1, + mmbit_decompress(ba_3, 262145, ca_2, &comp_size_2, comp_size_2)); + // Check set range: [64000, 64000] and [256000, 262145) + for (u64a i = 0; i < 262145; i++) { + if (i == 64000 || i >= 256000) { + ASSERT_TRUE(mmbit_isset(ba_3, 262145, i)); + } else { + ASSERT_FALSE(mmbit_isset(ba_3, 262145, i)); + } + } + + //-------------------- 4 -----------------------// + // Operate on mmbit + mmbit_unset_range(ba_3, 262145, 256001, 262145); + // Compress + size_t comp_size_3 = mmbit_compsize(ba_3, 262145); + comp_holder ca_3(comp_size_3); + ASSERT_EQ(1, + mmbit_compress(ba_3, 262145, ca_3, &comp_size_3, comp_size_3)); + // Decompress + mmbit_holder ba_4(262145); + ASSERT_EQ(1, + mmbit_decompress(ba_4, 262145, ca_3, &comp_size_3, comp_size_3)); + // Check set range: [64000, 64000] and [256000, 256000] + ASSERT_EQ(64000, mmbit_iterate(ba_4, 262145, MMB_INVALID)); + ASSERT_EQ(256000, mmbit_iterate(ba_4, 262145, 64000)); + ASSERT_EQ(MMB_INVALID, mmbit_iterate(ba_4, 262145, 256000)); + + //-------------------- 5 -----------------------// + // Operate on mmbit + mmbit_unset(ba_4, 262145, 64000); + mmbit_unset(ba_4, 262145, 256000); + // Compress + size_t comp_size_4 = mmbit_compsize(ba_4, 262145); + comp_holder ca_4(comp_size_4); + ASSERT_EQ(1, + mmbit_compress(ba_4, 262145, ca_4, &comp_size_4, comp_size_4)); + // Decompress + mmbit_holder ba_5(262145); + ASSERT_EQ(1, + mmbit_decompress(ba_5, 262145, ca_4, &comp_size_4, comp_size_4)); + // Check set range: empty + ASSERT_TRUE(mmbit_any(ba_5, 262145)); + ASSERT_FALSE(mmbit_any_precise(ba_5, 262145)); +} + +static const MultiBitCompTestParam multibitCompTests[] = { + // We provide both test size and stride so that larger tests don't take + // forever checking every single key. + + // Small cases, stride 1. + { 4, 1 }, + { 7, 1 }, + { 8, 1 }, + { 13, 1 }, + { 16, 1 }, + { 17, 1 }, + { 32, 1 }, + { 33, 1 }, + { 57, 1 }, + { 64, 1 }, + { 65, 1 }, + { 100, 1 }, + { 128, 1 }, + { 200, 1 }, + { 256, 1 }, + { 257, 1 }, // 257 = 256 + 1 + { 302, 1 }, + { 1024, 1 }, + { 1025, 1 }, + { 2099, 1 }, // 4097 = 64 ^ 2 + 1 + { 4097, 1 }, + { 10000, 1 }, + { 32768, 1 }, + { 32769, 1 }, + { 200000, 1 }, + { 262145, 1 }, // 262145 = 64 * 3 + 1 + + // Larger cases, bigger strides. + { 1U << 19, 3701 }, + { 1U << 20, 3701 }, + { 1U << 21, 3701 }, + { 1U << 22, 3701 }, + { 1U << 23, 3701 }, + { 1U << 24, 3701 }, + { 1U << 25, 3701 }, + { 1U << 26, 3701 }, + { 1U << 27, 7919 }, + { 1U << 28, 15073 }, + { 1U << 29, 24413 }, + { 1U << 30, 50377 }, + { 1U << 31, 104729 }, +}; + +INSTANTIATE_TEST_CASE_P(MultiBitComp, MultiBitCompTest, + ValuesIn(multibitCompTests)); From 7b17f0eed785486a9e39c55039b40f6288ec33c9 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Tue, 2 May 2017 13:16:41 +1000 Subject: [PATCH 117/190] dev reference documentation for stream compression --- doc/dev-reference/runtime.rst | 36 +++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/doc/dev-reference/runtime.rst b/doc/dev-reference/runtime.rst index 665395a8..dbfe7633 100644 --- a/doc/dev-reference/runtime.rst +++ b/doc/dev-reference/runtime.rst @@ -80,6 +80,42 @@ functions for the management of streams: another, resetting the destination stream first. This call avoids the allocation done by :c:func:`hs_copy_stream`. +================== +Stream Compression +================== + +A stream object is allocated as a fixed size region of memory which has been +sized to ensure that no memory allocations are required during scan +operations. When the system is under memory pressure, it may be useful to reduce +the memory consumed by streams that are not expected to be used soon. The +Hyperscan API provides calls for translating a stream to and from a compressed +representation for this purpose. The compressed representation differs from the +full stream object as it does not reserve space for components which are not +required given the current stream state. The Hyperscan API functions for this +functionality are: + +* :c:func:`hs_compress_stream`: fills the provided buffer with a compressed + representation of the stream and returns the number of bytes consumed by the + compressed representation. If the buffer is not large enough to hold the + compressed representation, :c:member:`HS_INSUFFICIENT_SPACE` is returned along + with the required size. This call does not modify the original stream in any + way: it may still be written to with :c:func:`hs_scan_stream`, used as part of + the various reset calls to reinitialise its state, or + :c:func:`hs_close_stream` may be called to free its resources. + +* :c:func:`hs_expand_stream`: creates a new stream based on a buffer containing + a compressed representation. + +* :c:func:`hs_reset_and_expand_stream`: constructs a stream based on a buffer + containing a compressed representation on top of an existing stream, resetting + the existing stream first. This call avoids the allocation done by + :c:func:`hs_expand_stream`. + +Note: it is not recommended to use stream compression between every call to scan +for performance reasons as it takes time to convert between the compressed +representation and a standard stream. + + ********** Block Mode ********** From e099d8552446837425f16d17a4b135b0fcf4575f Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Fri, 4 Aug 2017 11:08:30 +1000 Subject: [PATCH 118/190] CMake: put the stream compress files in the correct part --- CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0117110c..ebdbec9f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -513,6 +513,9 @@ set (hs_exec_SRCS src/crc32.h src/report.h src/runtime.c + src/stream_compress.c + src/stream_compress.h + src/stream_compress_impl.h src/fdr/fdr.c src/fdr/fdr.h src/fdr/fdr_internal.h @@ -663,8 +666,6 @@ SET (hs_compile_SRCS src/hs_version.h src/scratch.h src/state.h - src/stream_compress.c - src/stream_compress.h src/ue2common.h src/compiler/asserts.cpp src/compiler/asserts.h From d878e8cdf3d6d7c372b1ba1a3a884e9a12425a9b Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Fri, 4 Aug 2017 11:40:28 +1000 Subject: [PATCH 119/190] add dynamic compression to the public api --- src/hs_runtime.h | 19 ++++++++++--------- src/runtime.c | 2 ++ 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/hs_runtime.h b/src/hs_runtime.h index a93437b8..98e50068 100644 --- a/src/hs_runtime.h +++ b/src/hs_runtime.h @@ -356,8 +356,8 @@ hs_error_t HS_CDECL hs_reset_and_copy_stream(hs_stream_t *to_id, * @ref HS_SUCCESS on success, @ref HS_INSUFFICIENT_SPACE if the provided * buffer is too small. */ -hs_error_t hs_compress_stream(const hs_stream_t *stream, char *buf, - size_t buf_space, size_t *used_space); +hs_error_t HS_CDECL hs_compress_stream(const hs_stream_t *stream, char *buf, + size_t buf_space, size_t *used_space); /** * Decompresses a compressed representation created by @ref hs_compress_stream() @@ -386,8 +386,9 @@ hs_error_t hs_compress_stream(const hs_stream_t *stream, char *buf, * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_expand_stream(const hs_database_t *db, hs_stream_t **stream, - const char *buf, size_t buf_size); +hs_error_t HS_CDECL hs_expand_stream(const hs_database_t *db, + hs_stream_t **stream, const char *buf, + size_t buf_size); /** * Decompresses a compressed representation created by @ref hs_compress_stream() @@ -428,11 +429,11 @@ hs_error_t hs_expand_stream(const hs_database_t *db, hs_stream_t **stream, * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_reset_and_expand_stream(hs_stream_t *to_stream, - const char *buf, size_t buf_size, - hs_scratch_t *scratch, - match_event_handler onEvent, - void *context); +hs_error_t HS_CDECL hs_reset_and_expand_stream(hs_stream_t *to_stream, + const char *buf, size_t buf_size, + hs_scratch_t *scratch, + match_event_handler onEvent, + void *context); /** * The block (non-streaming) regular expression scanner. diff --git a/src/runtime.c b/src/runtime.c index 5a8168d3..a374984f 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -1121,6 +1121,7 @@ hs_error_t hs_compress_stream(const hs_stream_t *stream, char *buf, return HS_SUCCESS; } +HS_PUBLIC_API hs_error_t hs_expand_stream(const hs_database_t *db, hs_stream_t **stream, const char *buf, size_t buf_size) { if (unlikely(!stream || !buf)) { @@ -1159,6 +1160,7 @@ hs_error_t hs_expand_stream(const hs_database_t *db, hs_stream_t **stream, return HS_SUCCESS; } +HS_PUBLIC_API hs_error_t hs_reset_and_expand_stream(hs_stream_t *to_stream, const char *buf, size_t buf_size, hs_scratch_t *scratch, From 3d58ce83bdf2d8bda398efe2e527cfc928fc11e2 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Mon, 19 Jun 2017 15:00:51 +1000 Subject: [PATCH 120/190] hsbench: use a memstream instead of a temp file --- tools/hsbench/heapstats.cpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/tools/hsbench/heapstats.cpp b/tools/hsbench/heapstats.cpp index d0dffdb3..5fba7c2a 100644 --- a/tools/hsbench/heapstats.cpp +++ b/tools/hsbench/heapstats.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -47,18 +47,21 @@ #include size_t getPeakHeap(void) { - FILE *tmpf = tmpfile(); - if (!tmpf) { + size_t fsize; + char *fptr; + FILE *fstr = open_memstream(&fptr, &fsize); + if (!fstr) { return 0; } - int rv = malloc_info(0, tmpf); + int rv = malloc_info(0, fstr); if (rv != 0) { - fclose(tmpf); + fclose(fstr); + free(fptr); return 0; } - rewind(tmpf); + rewind(fstr); // We don't want to depend on a real XML parser. This is ugly and brittle // and hopefully good enough for the time being. We look for the last @@ -71,7 +74,7 @@ size_t getPeakHeap(void) { size_t len = 0, maxheap = 0; ssize_t read; - while ((read = getline(&line, &len, tmpf)) != -1) { + while ((read = getline(&line, &len, fstr)) != -1) { if (strncmp(line, begin, begin_len) == 0) { errno = 0; maxheap = (size_t)strtoull(line + begin_len, nullptr, 10); @@ -83,7 +86,8 @@ size_t getPeakHeap(void) { finish: free(line); - fclose(tmpf); + fclose(fstr); + free(fptr); return maxheap; } From c7f3150141bc5e1ab5e271feac8d81a185eb78e1 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Mon, 19 Jun 2017 15:52:08 +1000 Subject: [PATCH 121/190] restore formatting flags after use --- examples/patbench.cc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/examples/patbench.cc b/examples/patbench.cc index f82f47a7..20de5745 100644 --- a/examples/patbench.cc +++ b/examples/patbench.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -165,6 +165,7 @@ static bool higher_is_better(Criterion c) { } static void print_criterion(Criterion c, double val) { + std::ios::fmtflags f(cout.flags()); switch (c) { case CRITERION_THROUGHPUT: cout << std::fixed << std::setprecision(3) << val << " Megabits/s"; @@ -179,6 +180,7 @@ static void print_criterion(Criterion c, double val) { cout << static_cast(val) << " bytes"; break; } + cout.flags(f); } // Key for identifying a stream in our pcap input data, using data from its IP @@ -596,11 +598,13 @@ double eval_set(Benchmark &bench, Sigdata &sigs, unsigned int mode, size_t bytes = bench.bytes(); size_t matches = bench.matches(); if (diagnose) { + std::ios::fmtflags f(cout.flags()); cout << "Scan time " << std::fixed << std::setprecision(3) << scan_time << " sec, Scanned " << bytes * repeatCount << " bytes, Throughput " << std::fixed << std::setprecision(3) << (bytes * 8 * repeatCount) / (scan_time * 1000000) << " Mbps, Matches " << matches << endl; + cout.flags(f); } return (bytes * 8 * repeatCount) / (scan_time * 1000000); } @@ -755,10 +759,12 @@ int main(int argc, char **argv) { for (unsigned i = count; i < 16; i++) { cout << " "; } + std::ios::fmtflags out_f(cout.flags()); cout << "Performance: "; print_criterion(criterion, best); cout << " (" << std::fixed << std::setprecision(3) << (best / score_base) << "x) after cutting:" << endl; + cout.flags(out_f); // s now has factor_max signatures for (const auto &found : s) { From 30f93634b8ccc07cf6737877f26b177705ad74ad Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Fri, 16 Jun 2017 15:56:48 +1000 Subject: [PATCH 122/190] use string equality operator --- util/cross_compile.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util/cross_compile.cpp b/util/cross_compile.cpp index b4d1f5f1..f937f42f 100644 --- a/util/cross_compile.cpp +++ b/util/cross_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -62,7 +62,7 @@ unique_ptr xcompileReadMode(const char *s) { if (!opt.empty()) { const size_t numOpts = ARRAY_LENGTH(xcompile_options); for (size_t i = 0; i < numOpts; i++) { - if (opt.compare(xcompile_options[i].name) == 0) { + if (opt == xcompile_options[i].name) { DEBUG_PRINTF("found opt %zu:%llu\n", i, xcompile_options[i].cpu_features); rv.cpu_features = xcompile_options[i].cpu_features; From 7bcb58dea0c74a2692e2ffdfbc1d55d70d53c75b Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Mon, 19 Jun 2017 12:53:34 +1000 Subject: [PATCH 123/190] Catch by reference not value --- src/hs.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hs.cpp b/src/hs.cpp index e3c1f811..9305c924 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -262,7 +262,7 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags, e.hasIndex ? (int)e.index : -1); return HS_COMPILER_ERROR; } - catch (std::bad_alloc) { + catch (const std::bad_alloc &) { *db = nullptr; *comp_error = const_cast(&hs_enomem); return HS_COMPILER_ERROR; @@ -399,7 +399,7 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, *error = generateCompileError(e); return HS_COMPILER_ERROR; } - catch (std::bad_alloc) { + catch (std::bad_alloc &) { *error = const_cast(&hs_enomem); return HS_COMPILER_ERROR; } From 9d5a00bde1180a6d6e7f51bc6c6555705f33f3ac Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Mon, 19 Jun 2017 16:27:17 +1000 Subject: [PATCH 124/190] Open input path once using file descriptor --- util/expressions.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/util/expressions.cpp b/util/expressions.cpp index a81e0cd5..b33f8972 100644 --- a/util/expressions.cpp +++ b/util/expressions.cpp @@ -42,6 +42,7 @@ #include #if !defined(_WIN32) #include +#include #include #else // Windows support is probably very fragile @@ -145,8 +146,9 @@ bool isIgnorable(const std::string &f) { #ifndef _WIN32 void loadExpressions(const string &inPath, ExpressionMap &exprMap) { // Is our input path a file or a directory? + int fd = open(inPath.c_str(), O_RDONLY); struct stat st; - if (stat(inPath.c_str(), &st) != 0) { + if (fstat(fd, &st) != 0) { cerr << "Can't stat path: '" << inPath << "'" << endl; exit(1); } @@ -159,7 +161,7 @@ void loadExpressions(const string &inPath, ExpressionMap &exprMap) { exit(1); } } else if (S_ISDIR(st.st_mode)) { - DIR *d = opendir(inPath.c_str()); + DIR *d = fdopendir(fd); if (d == nullptr) { cerr << "Can't open directory: '" << inPath << "'" << endl; exit(1); @@ -188,11 +190,12 @@ void loadExpressions(const string &inPath, ExpressionMap &exprMap) { exit(1); } } - closedir(d); + (void)closedir(d); } else { cerr << "Can't stat path: '" << inPath << "'" << endl; exit(1); } + (void)close(fd); } #else // windows TODO: improve void HS_CDECL loadExpressions(const string &inPath, ExpressionMap &exprMap) { From b8753e3daf5d4bb46d7b2db6315b847beaf36a3e Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Thu, 22 Jun 2017 10:28:44 +1000 Subject: [PATCH 125/190] clean up loops and add AVX-512 --- util/cross_compile.cpp | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/util/cross_compile.cpp b/util/cross_compile.cpp index f937f42f..b987ccaf 100644 --- a/util/cross_compile.cpp +++ b/util/cross_compile.cpp @@ -39,11 +39,12 @@ using namespace std; struct XcompileMode { - const char *name; + const string name; unsigned long long cpu_features; }; static const XcompileMode xcompile_options[] = { + { "avx512", HS_CPU_FEATURES_AVX512 }, { "avx2", HS_CPU_FEATURES_AVX2 }, { "base", 0 }, }; @@ -60,12 +61,10 @@ unique_ptr xcompileReadMode(const char *s) { bool found_mode = false; if (!opt.empty()) { - const size_t numOpts = ARRAY_LENGTH(xcompile_options); - for (size_t i = 0; i < numOpts; i++) { - if (opt == xcompile_options[i].name) { - DEBUG_PRINTF("found opt %zu:%llu\n", i, - xcompile_options[i].cpu_features); - rv.cpu_features = xcompile_options[i].cpu_features; + for (const auto &xcompile : xcompile_options) { + if (opt == xcompile.name) { + DEBUG_PRINTF("found opt %zu:%llu\n", i, xcompile.cpu_features); + rv.cpu_features = xcompile.cpu_features; found_mode = true; break; } @@ -88,6 +87,11 @@ string to_string(const hs_platform_info &p) { if (p.cpu_features) { u64a features = p.cpu_features; + if (features & HS_CPU_FEATURES_AVX512) { + out << " avx512"; + features &= ~HS_CPU_FEATURES_AVX512; + } + if (features & HS_CPU_FEATURES_AVX2) { out << " avx2"; features &= ~HS_CPU_FEATURES_AVX2; @@ -103,13 +107,11 @@ string to_string(const hs_platform_info &p) { string xcompileUsage(void) { string variants = "Instruction set options: "; - const size_t numOpts = ARRAY_LENGTH(xcompile_options); - for (size_t i = 0; i < numOpts; i++) { - variants += xcompile_options[i].name; - if (i + 1 != numOpts) { - variants += ", "; - } + const auto commaspace = ", "; + auto sep = ""; + for (const auto &xcompile : xcompile_options) { + variants += sep + xcompile.name; + sep = commaspace; } - return variants; } From d33dcc053499623f57f7a568a97fcf723252ab5a Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Fri, 16 Jun 2017 16:06:52 +1000 Subject: [PATCH 126/190] Remove unwanted move constructor --- src/nfagraph/ng_som.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/nfagraph/ng_som.cpp b/src/nfagraph/ng_som.cpp index 45917b45..d23ac408 100644 --- a/src/nfagraph/ng_som.cpp +++ b/src/nfagraph/ng_som.cpp @@ -1735,8 +1735,6 @@ namespace { struct SomRevNfa { SomRevNfa(NFAVertex s, ReportID r, bytecode_ptr n) : sink(s), report(r), nfa(move(n)) {} - SomRevNfa(SomRevNfa &&s) // MSVC2013 needs this for emplace - : sink(s.sink), report(s.report), nfa(move(s.nfa)) {} NFAVertex sink; ReportID report; bytecode_ptr nfa; From a847b4307a6b4d1a9499ea0c8ffd025cbb0c1277 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 7 Aug 2017 09:19:23 +1000 Subject: [PATCH 127/190] add dynamic stream compression to fat runtime --- src/dispatcher.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/dispatcher.c b/src/dispatcher.c index 5ae46b56..c37a984e 100644 --- a/src/dispatcher.c +++ b/src/dispatcher.c @@ -127,6 +127,16 @@ CREATE_DISPATCH(hs_error_t, hs_serialized_database_info, const char *bytes, CREATE_DISPATCH(hs_error_t, hs_serialized_database_size, const char *bytes, const size_t length, size_t *deserialized_size); +CREATE_DISPATCH(hs_error_t, hs_compress_stream, const hs_stream_t *stream, + char *buf, size_t buf_space, size_t *used_space); + +CREATE_DISPATCH(hs_error_t, hs_expand_stream, const hs_database_t *db, + hs_stream_t **stream, const char *buf,size_t buf_size); + +CREATE_DISPATCH(hs_error_t, hs_reset_and_expand_stream, hs_stream_t *to_stream, + const char *buf, size_t buf_size, hs_scratch_t *scratch, + match_event_handler onEvent, void *context); + /** INTERNALS **/ CREATE_DISPATCH(u32, Crc32c_ComputeBuf, u32 inCrc32, const void *buf, size_t bufLen); From 56ec2dfc4a4161877d60369e67e02b7e989e86d7 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Mon, 7 Aug 2017 09:45:28 +1000 Subject: [PATCH 128/190] Remove out of date debug output --- util/cross_compile.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/util/cross_compile.cpp b/util/cross_compile.cpp index b987ccaf..0d136998 100644 --- a/util/cross_compile.cpp +++ b/util/cross_compile.cpp @@ -63,7 +63,6 @@ unique_ptr xcompileReadMode(const char *s) { if (!opt.empty()) { for (const auto &xcompile : xcompile_options) { if (opt == xcompile.name) { - DEBUG_PRINTF("found opt %zu:%llu\n", i, xcompile.cpu_features); rv.cpu_features = xcompile.cpu_features; found_mode = true; break; From 41783fe91272b12433236410e0ad16bed6d0c957 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Fri, 28 Jul 2017 14:32:55 +1000 Subject: [PATCH 129/190] more comments on hwlm/fdr's start parameter --- src/fdr/fdr.h | 4 ++-- src/hwlm/hwlm.h | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/fdr/fdr.h b/src/fdr/fdr.h index 77157a10..4dcef851 100644 --- a/src/fdr/fdr.h +++ b/src/fdr/fdr.h @@ -50,7 +50,7 @@ struct hs_scratch; * \param fdr FDR matcher engine. * \param buf Buffer to scan. * \param len Length of buffer to scan. - * \param start First offset in buf at which a match may end. + * \param start First offset in buf at which a match may start. * \param cb Callback to call when a match is found. * \param scratch Scratch supplied to callback on match. * \param groups Initial groups mask. @@ -67,7 +67,7 @@ hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len, * \param hlen Length of history buffer (hbuf). * \param buf Buffer to scan. * \param len Length of buffer to scan (buf). - * \param start First offset in buf at which a match may end. + * \param start First offset in buf at which a match may start. * \param cb Callback to call when a match is found. * \param scratch Scratch supplied to callback on match. * \param groups Initial groups mask. diff --git a/src/hwlm/hwlm.h b/src/hwlm/hwlm.h index 9262e80c..224ecf6b 100644 --- a/src/hwlm/hwlm.h +++ b/src/hwlm/hwlm.h @@ -107,7 +107,8 @@ typedef hwlmcb_rv_t (*HWLMCallback)(size_t end, u32 id, * Returns \ref HWLM_TERMINATED if scanning is cancelled due to the callback * returning \ref HWLM_TERMINATE_MATCHING. * - * \p start is the first offset at which a match may start. + * \p start is the first offset at which a match may start. Note: match + * starts may include masks overhanging the main literal. * * The underlying engine may choose not to report any match which starts before * the first possible match of a literal which is in the initial group mask. @@ -121,7 +122,8 @@ hwlm_error_t hwlmExec(const struct HWLM *tab, const u8 *buf, size_t len, * \p len is the length of the main buffer to be scanned. * * \p start is an advisory hint representing the first offset at which a match - * may start. Some underlying literal matches may not respect it. + * may start. Some underlying literal matches may not respect it. Note: match + * starts may include masks overhanging the main literal. * * \p scratch is used to access the history buffer, history length and * the main buffer. From ffc2d578b197a98e57133a1b970ae46a9ac63b82 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Fri, 28 Jul 2017 14:51:58 +1000 Subject: [PATCH 130/190] roseQuality() no longer needs to be part of rose's API. --- CMakeLists.txt | 1 + src/rose/rose_build.h | 4 --- src/rose/rose_build_bytecode.cpp | 1 + src/rose/rose_build_misc.cpp | 1 + src/rose/rose_build_misc.h | 44 ++++++++++++++++++++++++++++++++ 5 files changed, 47 insertions(+), 4 deletions(-) create mode 100644 src/rose/rose_build_misc.h diff --git a/CMakeLists.txt b/CMakeLists.txt index ebdbec9f..4a07cffc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -964,6 +964,7 @@ SET (hs_compile_SRCS src/rose/rose_build_merge.cpp src/rose/rose_build_merge.h src/rose/rose_build_misc.cpp + src/rose/rose_build_misc.h src/rose/rose_build_program.cpp src/rose/rose_build_program.h src/rose/rose_build_resources.h diff --git a/src/rose/rose_build.h b/src/rose/rose_build.h index 2219f12e..ca3ba369 100644 --- a/src/rose/rose_build.h +++ b/src/rose/rose_build.h @@ -134,10 +134,6 @@ std::unique_ptr makeRoseBuilder(ReportManager &rm, bool roseCheckRose(const RoseInGraph &ig, bool prefilter, const ReportManager &rm, const CompileContext &cc); -/* used by heuristics to determine the small write engine. High numbers are - * intended to indicate a lightweight rose. */ -u32 roseQuality(const RoseEngine *t); - bool roseIsPureLiteral(const RoseEngine *t); size_t maxOverlap(const ue2_literal &a, const ue2_literal &b, u32 b_delay); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index cf3de55c..4d9c3d7e 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -41,6 +41,7 @@ #include "rose_build_long_lit.h" #include "rose_build_lookaround.h" #include "rose_build_matchers.h" +#include "rose_build_misc.h" #include "rose_build_program.h" #include "rose_build_resources.h" #include "rose_build_scatter.h" diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 839fd478..d8a39994 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -26,6 +26,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include "rose_build_misc.h" #include "rose_build_impl.h" #include "hwlm/hwlm_literal.h" diff --git a/src/rose/rose_build_misc.h b/src/rose/rose_build_misc.h new file mode 100644 index 00000000..b9c6d5ca --- /dev/null +++ b/src/rose/rose_build_misc.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ROSE_BUILD_MISC_H +#define ROSE_BUILD_MISC_H + +#include "ue2common.h" + +struct RoseEngine; + +namespace ue2 { + +/* used by heuristics to determine the small write engine. High numbers are + * intended to indicate a lightweight rose. */ +u32 roseQuality(const RoseEngine *rose); + +} + +#endif From 778addadc5785e6359df34cd770d4d74dfe1b39f Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Fri, 28 Jul 2017 14:56:54 +1000 Subject: [PATCH 131/190] mangle fdr conf parts of scratch as well --- src/runtime.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/runtime.c b/src/runtime.c index a374984f..8e3a4def 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -140,6 +140,7 @@ void populateCoreInfo(struct hs_scratch *s, const struct RoseEngine *rose, s->som_set_now_offset = ~0ULL; s->deduper.current_report_offset = ~0ULL; s->deduper.som_log_dirty = 1; /* som logs have not been cleared */ + s->fdr_conf = NULL; // Rose program execution (used for some report paths) depends on these // values being initialised. From 37033ef9bb6a1fe7d03e4d68b53335adef9879e8 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 31 Jul 2017 10:38:30 +1000 Subject: [PATCH 132/190] Provide RoseResources to roseQuality. RoseResources is an alternative to manually digging through the bytecode. --- src/rose/rose_build_bytecode.cpp | 20 ++++++++++++++------ src/rose/rose_build_misc.cpp | 16 +++++++--------- src/rose/rose_build_misc.h | 4 +++- src/rose/rose_build_resources.h | 2 ++ 4 files changed, 26 insertions(+), 16 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 4d9c3d7e..e62c9e18 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2333,6 +2333,7 @@ void addSomRevNfas(build_context &bc, RoseEngine &proto, static void recordResources(RoseResources &resources, const RoseBuildImpl &build, + const vector &anchored_dfas, const vector &fragments) { if (!build.outfixes.empty()) { resources.has_outfixes = true; @@ -2351,6 +2352,15 @@ void recordResources(RoseResources &resources, const RoseBuildImpl &build, break; } } + + resources.has_anchored = !anchored_dfas.empty(); + resources.has_anchored_multiple = anchored_dfas.size() > 1; + for (const auto &rdfa : anchored_dfas) { + if (rdfa.states.size() > 256) { + resources.has_anchored_large = true; + } + } + } static @@ -3413,6 +3423,7 @@ u32 writeEagerQueueIter(const set &eager, u32 leftfixBeginQueue, static bytecode_ptr addSmallWriteEngine(const RoseBuildImpl &build, + const RoseResources &res, bytecode_ptr rose) { assert(rose); @@ -3421,7 +3432,7 @@ bytecode_ptr addSmallWriteEngine(const RoseBuildImpl &build, return rose; } - u32 qual = roseQuality(rose.get()); + u32 qual = roseQuality(res, rose.get()); auto smwr_engine = build.smwr.build(qual); if (!smwr_engine) { DEBUG_PRINTF("no smwr built\n"); @@ -3561,10 +3572,7 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { build_context bc; u32 floatingMinLiteralMatchOffset = findMinFloatingLiteralMatch(*this, anchored_dfas); - recordResources(bc.resources, *this, fragments); - if (!anchored_dfas.empty()) { - bc.resources.has_anchored = true; - } + recordResources(bc.resources, *this, anchored_dfas, fragments); bc.needs_mpv_catchup = needsMpvCatchup(*this); makeBoundaryPrograms(*this, bc, boundary, dboundary, proto.boundary); @@ -3803,7 +3811,7 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { bc.engine_blob.write_bytes(engine.get()); // Add a small write engine if appropriate. - engine = addSmallWriteEngine(*this, move(engine)); + engine = addSmallWriteEngine(*this, bc.resources, move(engine)); DEBUG_PRINTF("rose done %p\n", engine.get()); diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index d8a39994..450b2efb 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -29,6 +29,7 @@ #include "rose_build_misc.h" #include "rose_build_impl.h" +#include "rose_build_resources.h" #include "hwlm/hwlm_literal.h" #include "nfa/castlecompile.h" #include "nfa/goughcompile.h" @@ -788,18 +789,16 @@ LeftEngInfo::operator bool() const { return graph || castle || dfa || haig; } -u32 roseQuality(const RoseEngine *t) { +u32 roseQuality(const RoseResources &res, const RoseEngine *t) { /* Rose is low quality if the atable is a Mcclellan 16 or has multiple DFAs */ - const anchored_matcher_info *atable = getALiteralMatcher(t); - if (atable) { - if (atable->next_offset) { + if (res.has_anchored) { + if (res.has_anchored_multiple) { DEBUG_PRINTF("multiple atable engines\n"); return 0; } - const NFA *nfa = (const NFA *)((const char *)atable + sizeof(*atable)); - if (!isSmallDfaType(nfa->type)) { + if (res.has_anchored_large) { DEBUG_PRINTF("m16 atable engine\n"); return 0; } @@ -808,7 +807,7 @@ u32 roseQuality(const RoseEngine *t) { /* if we always run multiple engines then we are slow */ u32 always_run = 0; - if (atable) { + if (res.has_anchored) { always_run++; } @@ -817,8 +816,7 @@ u32 roseQuality(const RoseEngine *t) { always_run++; } - const HWLM *ftable = getFLiteralMatcher(t); - if (ftable) { + if (res.has_floating) { /* TODO: ignore conditional ftables, or ftables beyond smwr region */ always_run++; } diff --git a/src/rose/rose_build_misc.h b/src/rose/rose_build_misc.h index b9c6d5ca..f34b8292 100644 --- a/src/rose/rose_build_misc.h +++ b/src/rose/rose_build_misc.h @@ -35,9 +35,11 @@ struct RoseEngine; namespace ue2 { +struct RoseResources; + /* used by heuristics to determine the small write engine. High numbers are * intended to indicate a lightweight rose. */ -u32 roseQuality(const RoseEngine *rose); +u32 roseQuality(const RoseResources &res, const RoseEngine *rose); } diff --git a/src/rose/rose_build_resources.h b/src/rose/rose_build_resources.h index 3edb81b9..4fa102f3 100644 --- a/src/rose/rose_build_resources.h +++ b/src/rose/rose_build_resources.h @@ -48,6 +48,8 @@ struct RoseResources { bool has_lit_delay = false; bool has_lit_check = false; // long literal support bool has_anchored = false; + bool has_anchored_multiple = false; /* multiple anchored dfas */ + bool has_anchored_large = false; /* mcclellan 16 anchored dfa */ bool has_floating = false; bool has_eod = false; }; From 15784954e876d0c9aa2166be1151adb6dd0114fd Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 7 Aug 2017 13:50:21 +1000 Subject: [PATCH 133/190] dynamic compression: add HS_CDECL to implementation --- src/runtime.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/runtime.c b/src/runtime.c index 8e3a4def..c384c031 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -1096,8 +1096,8 @@ hs_error_t HS_CDECL hs_scan_vector(const hs_database_t *db, } HS_PUBLIC_API -hs_error_t hs_compress_stream(const hs_stream_t *stream, char *buf, - size_t buf_space, size_t *used_space) { +hs_error_t HS_CDECL hs_compress_stream(const hs_stream_t *stream, char *buf, + size_t buf_space, size_t *used_space) { if (unlikely(!stream || !used_space)) { return HS_INVALID; } @@ -1123,8 +1123,9 @@ hs_error_t hs_compress_stream(const hs_stream_t *stream, char *buf, } HS_PUBLIC_API -hs_error_t hs_expand_stream(const hs_database_t *db, hs_stream_t **stream, - const char *buf, size_t buf_size) { +hs_error_t HS_CDECL hs_expand_stream(const hs_database_t *db, + hs_stream_t **stream, + const char *buf, size_t buf_size) { if (unlikely(!stream || !buf)) { return HS_INVALID; } @@ -1162,11 +1163,11 @@ hs_error_t hs_expand_stream(const hs_database_t *db, hs_stream_t **stream, } HS_PUBLIC_API -hs_error_t hs_reset_and_expand_stream(hs_stream_t *to_stream, - const char *buf, size_t buf_size, - hs_scratch_t *scratch, - match_event_handler onEvent, - void *context) { +hs_error_t HS_CDECL hs_reset_and_expand_stream(hs_stream_t *to_stream, + const char *buf, size_t buf_size, + hs_scratch_t *scratch, + match_event_handler onEvent, + void *context) { if (unlikely(!to_stream || !buf)) { return HS_INVALID; } From 927501175c303be0e07e9f398b3d5eeeade28b91 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 29 May 2017 14:59:31 +1000 Subject: [PATCH 134/190] rose_build_long_lit: refactor to do less dupe work Also some performance improvements. --- src/rose/rose_build_long_lit.cpp | 121 +++++++++++++++++-------------- 1 file changed, 66 insertions(+), 55 deletions(-) diff --git a/src/rose/rose_build_long_lit.cpp b/src/rose/rose_build_long_lit.cpp index 7ebf73ec..45a2eb27 100644 --- a/src/rose/rose_build_long_lit.cpp +++ b/src/rose/rose_build_long_lit.cpp @@ -44,7 +44,7 @@ using namespace std; namespace ue2 { /** \brief Minimum size for a non-empty hash table. Must be a power of two. */ -static constexpr u32 MIN_HASH_TABLE_SIZE = 128; +static constexpr size_t MIN_HASH_TABLE_SIZE = 128; /** \brief Maximum load factor (between zero and one) for a hash table. */ static constexpr double MAX_HASH_TABLE_LOAD = 0.7; @@ -167,30 +167,69 @@ vector makeBloomFilter(const vector &lits, return bloom; } -static +static UNUSED size_t hashTableOccupancy(const vector &tab) { return count_if(begin(tab), end(tab), [](const RoseLongLitHashEntry &ent) { return ent.str_offset != 0; }); } -static +static UNUSED double hashTableLoad(const vector &tab) { return (double)hashTableOccupancy(tab) / (double)(tab.size()); } +using LitOffsetVector = small_vector, 1>; + static -vector buildHashTable(const vector &lits, - size_t max_len, - const vector &litToOffsetVal, - size_t numEntries, bool nocase) { +vector buildHashTable( + size_t max_len, const vector &litToOffsetVal, + const map &hashToLitOffPairs, + size_t numEntries) { vector tab(numEntries, {0,0}); if (!numEntries) { return tab; } - map>> hashToLitOffPairs; + for (const auto &m : hashToLitOffPairs) { + u32 hash = m.first; + const LitOffsetVector &d = m.second; + + u32 bucket = hash % numEntries; + + // Placement via linear probing. + for (const auto &lit_offset : d) { + while (tab[bucket].str_offset != 0) { + bucket++; + if (bucket == numEntries) { + bucket = 0; + } + } + + u32 lit_id = lit_offset.first; + u32 offset = lit_offset.second; + + DEBUG_PRINTF("hash 0x%08x lit_id %u offset %u bucket %u\n", hash, + lit_id, offset, bucket); + + auto &entry = tab[bucket]; + entry.str_offset = verify_u32(litToOffsetVal.at(lit_id)); + assert(entry.str_offset != 0); + entry.str_len = offset + max_len; + } + } + + DEBUG_PRINTF("hash table occupancy %zu of %zu entries\n", + hashTableOccupancy(tab), numEntries); + + return tab; +} + +static +map computeLitHashes(const vector &lits, + size_t max_len, bool nocase) { + map hashToLitOffPairs; for (u32 lit_id = 0; lit_id < lits.size(); lit_id++) { const ue2_case_string &lit = lits[lit_id]; @@ -205,8 +244,10 @@ vector buildHashTable(const vector &lits, } for (auto &m : hashToLitOffPairs) { - u32 hash = m.first; - vector> &d = m.second; + LitOffsetVector &d = m.second; + if (d.size() == 1) { + continue; + } // Sort by (offset, string) so that we'll be able to remove identical // string prefixes. @@ -240,36 +281,9 @@ vector buildHashTable(const vector &lits, } return a.first < b.first; }); - - u32 bucket = hash % numEntries; - - // Placement via linear probing. - for (const auto &lit_offset : d) { - while (tab[bucket].str_offset != 0) { - bucket++; - if (bucket == numEntries) { - bucket = 0; - } - } - - u32 lit_id = lit_offset.first; - u32 offset = lit_offset.second; - - DEBUG_PRINTF("hash 0x%08x lit_id %u offset %u bucket %u\n", hash, - lit_id, offset, bucket); - - auto &entry = tab[bucket]; - entry.str_offset = verify_u32(litToOffsetVal.at(lit_id)); - assert(entry.str_offset != 0); - entry.str_len = offset + max_len; - } } - DEBUG_PRINTF("%s hash table occupancy %zu of %zu entries\n", - nocase ? "nocase" : "caseful", hashTableOccupancy(tab), - numEntries); - - return tab; + return hashToLitOffPairs; } static @@ -277,24 +291,21 @@ vector makeHashTable(const vector &lits, size_t max_len, const vector &litToOffsetVal, u32 numPositions, bool nocase) { - vector tab; + // Compute lit substring hashes. + const auto hashToLitOffPairs = computeLitHashes(lits, max_len, nocase); - // Note: for the hash table, we must always have at least enough entries - // for the number of hashable positions. - size_t num_entries = roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, - numPositions)); + // Compute the size of the hash table: we need enough entries to satisfy + // our max load constraint, and it must be a power of two. + size_t num_entries = (double)numPositions / MAX_HASH_TABLE_LOAD + 1; + num_entries = roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, num_entries)); + + auto tab = buildHashTable(max_len, litToOffsetVal, hashToLitOffPairs, + num_entries); + DEBUG_PRINTF("built %s hash table for %zu entries: load %f\n", + nocase ? "nocase" : "caseful", num_entries, + hashTableLoad(tab)); + assert(hashTableLoad(tab) < MAX_HASH_TABLE_LOAD); - for (;;) { - tab = buildHashTable(lits, max_len, litToOffsetVal, num_entries, - nocase); - DEBUG_PRINTF("built %s hash table for %zu entries: load %f\n", - nocase ? "nocase" : "caseful", num_entries, - hashTableLoad(tab)); - if (hashTableLoad(tab) < MAX_HASH_TABLE_LOAD) { - break; - } - num_entries *= 2; - } return tab; } @@ -383,7 +394,7 @@ u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob, if (info.nocase.num_literals) { bloom_nocase = makeBloomFilter(lits, max_len, true); tab_nocase = makeHashTable(lits, max_len, litToOffsetVal, - info.nocase.hashed_positions, true); + info.nocase.hashed_positions, true); } size_t wholeLitTabSize = ROUNDUP_16(byte_length(lit_blob)); From 09938d532f236781857a5596a0fa1c69028b6b8c Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 31 Jul 2017 16:02:55 +1000 Subject: [PATCH 135/190] rose: return a vector from findEdgesByLiteral --- src/rose/rose_build_bytecode.cpp | 53 +++++++++++++------------------- 1 file changed, 22 insertions(+), 31 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index e62c9e18..b0de73bc 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2687,30 +2687,22 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, static RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc, ProgramBuild &prog_build, u32 lit_id, - const map> &lit_edge_map, + const vector> &lit_edge_map, bool is_anchored_replay_program) { - const vector no_edges; - DEBUG_PRINTF("lit_id=%u\n", lit_id); - const vector *edges_ptr; - if (contains(lit_edge_map, lit_id)) { - edges_ptr = &lit_edge_map.at(lit_id); - } else { - /* literal may happen only in a delay context */ - edges_ptr = &no_edges; - } + assert(lit_id < lit_edge_map.size()); return makeLiteralProgram(build, bc.leftfix_info, bc.suffixes, - bc.engine_info_by_queue, - bc.roleStateIndices, prog_build, lit_id, - *edges_ptr, is_anchored_replay_program); + bc.engine_info_by_queue, bc.roleStateIndices, + prog_build, lit_id, lit_edge_map.at(lit_id), + is_anchored_replay_program); } static RoseProgram makeFragmentProgram(const RoseBuildImpl &build, build_context &bc, ProgramBuild &prog_build, const vector &lit_ids, - const map> &lit_edge_map) { + const vector> &lit_edge_map) { assert(!lit_ids.empty()); vector blocks; @@ -2728,28 +2720,27 @@ RoseProgram makeFragmentProgram(const RoseBuildImpl &build, build_context &bc, * vertices with that literal ID. */ static -map> findEdgesByLiteral(const RoseBuildImpl &build) { - // Use a set of edges while building the map to cull duplicates. - map> unique_lit_edge_map; +vector> findEdgesByLiteral(const RoseBuildImpl &build) { + vector> lit_edge_map(build.literals.size()); const auto &g = build.g; - for (const auto &e : edges_range(g)) { - const auto &v = target(e, g); + for (const auto &v : vertices_range(g)) { for (const auto &lit_id : g[v].literals) { - unique_lit_edge_map[lit_id].insert(e); + assert(lit_id < lit_edge_map.size()); + auto &edge_list = lit_edge_map.at(lit_id); + insert(&edge_list, edge_list.end(), in_edges(v, g)); } } - // Build output map, sorting edges by (source, target) vertex index. - map> lit_edge_map; - for (const auto &m : unique_lit_edge_map) { - auto edge_list = vector(begin(m.second), end(m.second)); - sort(begin(edge_list), end(edge_list), - [&g](const RoseEdge &a, const RoseEdge &b) { - return tie(g[source(a, g)].index, g[target(a, g)].index) < - tie(g[source(b, g)].index, g[target(b, g)].index); - }); - lit_edge_map.emplace(m.first, std::move(edge_list)); + // Sort edges in each edge list by (source, target) indices. This gives us + // less surprising ordering in program generation for a literal with many + // edges. + for (auto &edge_list : lit_edge_map) { + sort(begin(edge_list), end(edge_list), [&g](const RoseEdge &a, + const RoseEdge &b) { + return tie(g[source(a, g)].index, g[target(a, g)].index) < + tie(g[source(b, g)].index, g[target(b, g)].index); + }); } return lit_edge_map; @@ -2906,7 +2897,7 @@ static void buildFragmentPrograms(const RoseBuildImpl &build, vector &fragments, build_context &bc, ProgramBuild &prog_build, - const map> &lit_edge_map) { + const vector> &lit_edge_map) { // Sort fragments based on literal length and case info to build // included literal programs before their parent programs. vector ordered_fragments(fragments); From 4889a492e4fd429b3de1be6e9f7b06cc5070ba65 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 31 Jul 2017 16:22:08 +1000 Subject: [PATCH 136/190] rose: more hash member funcs for rose types --- src/rose/rose_build_misc.cpp | 9 ++++++++- src/rose/rose_build_role_aliasing.cpp | 2 +- src/rose/rose_graph.h | 2 ++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 450b2efb..af2af5de 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -576,6 +576,9 @@ bool RoseSuffixInfo::operator<(const RoseSuffixInfo &b) const { return false; } +size_t RoseSuffixInfo::hash() const { + return hash_all(top, graph, castle, rdfa, haig, tamarama); +} void RoseSuffixInfo::reset(void) { top = 0; @@ -691,7 +694,7 @@ set all_tops(const suffix_id &s) { } size_t suffix_id::hash() const { - return hash_all(g, c, d, h); + return hash_all(g, c, d, h, t); } bool isAnchored(const left_id &r) { @@ -769,6 +772,10 @@ u64a findMaxOffset(const set &reports, const ReportManager &rm) { return maxOffset; } +size_t LeftEngInfo::hash() const { + return hash_all(graph, castle, dfa, haig, tamarama, lag, leftfix_report); +} + void LeftEngInfo::reset(void) { graph.reset(); castle.reset(); diff --git a/src/rose/rose_build_role_aliasing.cpp b/src/rose/rose_build_role_aliasing.cpp index b5e69ef9..ba71a3ea 100644 --- a/src/rose/rose_build_role_aliasing.cpp +++ b/src/rose/rose_build_role_aliasing.cpp @@ -1468,7 +1468,7 @@ void splitByReportSuffixBehaviour(const RoseGraph &g, vector> &buckets) { // Split by report set and suffix info. auto make_split_key = [&g](RoseVertex v) { - return hash_all(g[v].reports, suffix_id(g[v].suffix)); + return hash_all(g[v].reports, g[v].suffix); }; splitAndFilterBuckets(buckets, make_split_key); } diff --git a/src/rose/rose_graph.h b/src/rose/rose_graph.h index d1181063..2c5ebbe9 100644 --- a/src/rose/rose_graph.h +++ b/src/rose/rose_graph.h @@ -111,6 +111,7 @@ struct LeftEngInfo { ORDER_CHECK(leftfix_report); return false; } + size_t hash() const; void reset(void); operator bool() const; bool tracksSom() const { return !!haig; } @@ -131,6 +132,7 @@ struct RoseSuffixInfo { bool operator==(const RoseSuffixInfo &b) const; bool operator!=(const RoseSuffixInfo &b) const { return !(*this == b); } bool operator<(const RoseSuffixInfo &b) const; + size_t hash() const; void reset(void); operator bool() const { return graph || castle || haig || rdfa || tamarama; } }; From 1f2eb5a093e7b7d8074b915a5e2a5ba053b20f37 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 3 Aug 2017 11:33:42 +1000 Subject: [PATCH 137/190] rose_build_lookaround: use vector in trimLiterals --- src/rose/rose_build_lookaround.cpp | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/rose/rose_build_lookaround.cpp b/src/rose/rose_build_lookaround.cpp index dd495fd3..7cc1c584 100644 --- a/src/rose/rose_build_lookaround.cpp +++ b/src/rose/rose_build_lookaround.cpp @@ -485,19 +485,17 @@ vector findLiteralReach(const rose_literal_id &lit) { } static -map findLiteralReach(const RoseBuildImpl &build, - const RoseVertex v) { +vector findLiteralReach(const RoseBuildImpl &build, + const RoseVertex v) { bool first = true; - map look; + vector look; for (u32 lit_id : build.g[v].literals) { const rose_literal_id &lit = build.literals.at(lit_id); auto lit_look = findLiteralReach(lit); if (first) { - for (auto &p : lit_look) { - look.emplace(p.offset, p.reach); - } + look = std::move(lit_look); first = false; continue; } @@ -512,22 +510,21 @@ map findLiteralReach(const RoseBuildImpl &build, look.erase(it, end(look)); break; } - if (it->first < jt->offset) { + if (it->offset < jt->offset) { // Offset is present in look but not in lit_look, erase. it = look.erase(it); - } else if (it->first > jt->offset) { + } else if (it->offset > jt->offset) { // Offset is preset in lit_look but not in look, ignore. ++jt; } else { // Offset is present in both, union its reach with look. - it->second |= jt->reach; + it->reach |= jt->reach; ++it; ++jt; } } } - DEBUG_PRINTF("lit lookaround: %s\n", dump(look).c_str()); return look; } @@ -541,11 +538,11 @@ void trimLiterals(const RoseBuildImpl &build, const RoseVertex v, DEBUG_PRINTF("pre-trim lookaround: %s\n", dump(look).c_str()); for (const auto &m : findLiteralReach(build, v)) { - auto it = look.find(m.first); + auto it = look.find(m.offset); if (it == end(look)) { continue; } - if (m.second.isSubsetOf(it->second)) { + if (m.reach.isSubsetOf(it->second)) { DEBUG_PRINTF("can trim entry at %d\n", it->first); look.erase(it); } From 58004f15f044a28617ada1af3c9e83909c8e5e3f Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 14 Jul 2017 12:54:03 +1000 Subject: [PATCH 138/190] limex_compile: turn tugs into a bitset --- src/nfa/limex_compile.cpp | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 94d9961b..1c56b312 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -144,13 +144,15 @@ struct build_info { const map &rsmi, const map &smi, const map> &ti, const set &zi, - bool dai, bool sci, const CompileContext &cci, - u32 nsi) - : h(hi), state_ids(states_in), repeats(ri), tops(ti), zombies(zi), - do_accel(dai), stateCompression(sci), cc(cci), + bool dai, bool sci, const CompileContext &cci, u32 nsi) + : h(hi), state_ids(states_in), repeats(ri), tops(ti), tugs(nsi), + zombies(zi), do_accel(dai), stateCompression(sci), cc(cci), num_states(nsi) { for (const auto &br : repeats) { - insert(&tugs, br.tug_triggers); + for (auto v : br.tug_triggers) { + assert(state_ids.at(v) != NO_STATE); + tugs.set(state_ids.at(v)); + } br_cyclic[br.cyclic] = BoundedRepeatSummary(br.repeatMin, br.repeatMax); } @@ -170,7 +172,7 @@ struct build_info { map squashMap; const map> &tops; - unordered_set tugs; + NFAStateSet tugs; map br_cyclic; const set &zombies; bool do_accel; @@ -1528,7 +1530,7 @@ bool isExceptionalTransition(const NGHolder &h, const NFAEdge &e, } // All transitions out of a tug trigger are exceptional. - if (contains(args.tugs, from)) { + if (args.tugs.test(f)) { return true; } return false; @@ -1845,10 +1847,9 @@ struct Factory { maskSetBit(limex->repeatCyclicMask, cyclic); } /* also include tugs in repeat cyclic mask */ - for (NFAVertex v : args.tugs) { - u32 v_state = args.state_ids.at(v); - assert(v_state != NO_STATE); - maskSetBit(limex->repeatCyclicMask, v_state); + for (size_t i = args.tugs.find_first(); i != args.tugs.npos; + i = args.tugs.find_next(i)) { + maskSetBit(limex->repeatCyclicMask, i); } } From d55e8fdf94970601d15ed9755b7a35e458b292bc Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 14 Jul 2017 13:01:00 +1000 Subject: [PATCH 139/190] limex_compile: reduce state id lookups --- src/nfa/limex_compile.cpp | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 1c56b312..2010728d 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -1519,18 +1519,14 @@ u32 depth_to_u32(const depth &d) { } static -bool isExceptionalTransition(const NGHolder &h, const NFAEdge &e, - const build_info &args, u32 maxShift) { - NFAVertex from = source(e, h); - NFAVertex to = target(e, h); - u32 f = args.state_ids.at(from); - u32 t = args.state_ids.at(to); - if (!isLimitedTransition(f, t, maxShift)) { +bool isExceptionalTransition(u32 from, u32 to, const build_info &args, + u32 maxShift) { + if (!isLimitedTransition(from, to, maxShift)) { return true; } // All transitions out of a tug trigger are exceptional. - if (args.tugs.test(f)) { + if (args.tugs.test(from)) { return true; } return false; @@ -1546,7 +1542,7 @@ u32 findMaxVarShift(const build_info &args, u32 nShifts) { if (from == NO_STATE || to == NO_STATE) { continue; } - if (!isExceptionalTransition(h, e, args, MAX_SHIFT_AMOUNT)) { + if (!isExceptionalTransition(from, to, args, MAX_SHIFT_AMOUNT)) { shiftMask |= (1UL << (to - from)); } } @@ -1575,7 +1571,7 @@ int getLimexScore(const build_info &args, u32 nShifts) { if (from == NO_STATE || to == NO_STATE) { continue; } - if (isExceptionalTransition(h, e, args, maxVarShift)) { + if (isExceptionalTransition(from, to, args, maxVarShift)) { exceptionalStates.set(from); } } @@ -1870,7 +1866,7 @@ struct Factory { // We check for exceptional transitions here, as we don't want tug // trigger transitions emitted as limited transitions (even if they // could be in this model). - if (!isExceptionalTransition(h, e, args, maxShift)) { + if (!isExceptionalTransition(from, to, args, maxShift)) { u32 shift = to - from; if ((shiftMask & (1UL << shift)) == 0UL) { shiftMask |= (1UL << shift); @@ -1905,7 +1901,7 @@ struct Factory { continue; } - if (isExceptionalTransition(h, e, args, maxShift)) { + if (isExceptionalTransition(from, to, args, maxShift)) { exceptional.insert(e); } } From 8eb55d4242c31428a19a84d93afa90cb9582cf83 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 3 Aug 2017 15:52:25 +1000 Subject: [PATCH 140/190] rose_build_exclusive: clean up use of vertex indices --- src/rose/rose_build_exclusive.cpp | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/rose/rose_build_exclusive.cpp b/src/rose/rose_build_exclusive.cpp index 4c8796f5..6a5a710d 100644 --- a/src/rose/rose_build_exclusive.cpp +++ b/src/rose/rose_build_exclusive.cpp @@ -219,29 +219,29 @@ bool isExclusive(const NGHolder &h, const auto &cr1 = role1.cr; if (overlaps(cr1, role2.last_cr)) { CharReach cr = cr1 | role1.prefix_cr; + flat_set states; for (const auto &lit : triggers2) { auto lit1 = findStartPos(cr, lit); if (lit1.empty()) { continue; } - u32 lower_bound = 0; - if (lit1.size() < lit.size()) { - lower_bound = ~0U; - } - flat_set states; - for (const auto &v : vertices_range(h)) { - if (h[v].index >= lower_bound || h[v].index < 2) { - states.insert(v); - } + states.clear(); + + if (lit1.size() < lit.size()) { + // Only starts. + states.insert(h.start); + states.insert(h.startDs); + } else { + // All vertices. + insert(&states, vertices(h)); } auto activeStates = execute_graph(h, lit1, states); - // Check if has only literal states are on + // Check if only literal states are on for (const auto &s : activeStates) { - u32 stateId = h[s].index; - if ((stateId > 1 && stateId <= num) || - contains(tailId, stateId)) { + if ((!is_any_start(s, h) && h[s].index <= num) || + contains(tailId, h[s].index)) { skipList[id2].insert(id1); return false; } From ba1df6412bee93e6366125bf0a2dd678e9a905eb Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 4 Aug 2017 10:06:20 +1000 Subject: [PATCH 141/190] groupByFragment: make fewer string copies --- src/rose/rose_build_bytecode.cpp | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index b0de73bc..efc591bd 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2767,17 +2767,13 @@ bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) { } static -rose_literal_id getFragment(const rose_literal_id &lit) { - if (lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX) { - DEBUG_PRINTF("whole lit is frag\n"); - return lit; +rose_literal_id getFragment(rose_literal_id lit) { + if (lit.s.length() > ROSE_SHORT_LITERAL_LEN_MAX) { + // Trim to last ROSE_SHORT_LITERAL_LEN_MAX bytes. + lit.s.erase(0, lit.s.length() - ROSE_SHORT_LITERAL_LEN_MAX); } - - rose_literal_id frag = lit; - frag.s = frag.s.substr(frag.s.length() - ROSE_SHORT_LITERAL_LEN_MAX); - - DEBUG_PRINTF("fragment: %s\n", dumpString(frag.s).c_str()); - return frag; + DEBUG_PRINTF("fragment: %s\n", dumpString(lit.s).c_str()); + return lit; } static From a645201675c414b9c75b1b75719cb9510792a862 Mon Sep 17 00:00:00 2001 From: "Wang, Xiang W" Date: Mon, 7 Aug 2017 10:02:53 -0400 Subject: [PATCH 142/190] UE-3147: ensure the same squash behavior for literals shared between different literal matchers --- src/rose/rose_build_bytecode.cpp | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index efc591bd..0ae5bb4f 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2838,8 +2838,21 @@ void buildIncludedIdMap(unordered_map> &includedIdMap, } const auto &proto = *litProto->hwlmProto; for (const auto &lit : proto.lits) { - if (lit.included_id != INVALID_LIT_ID) { + if (contains(includedIdMap, lit.id)) { + const auto &included_id = includedIdMap[lit.id].first; + const auto &squash = includedIdMap[lit.id].second; + // The squash behavior should be the same for the same literal + // in different literal matchers. + if (lit.included_id != included_id || + lit.squash != squash) { + includedIdMap[lit.id] = make_pair(INVALID_LIT_ID, 0); + DEBUG_PRINTF("find different included info for the" + " same literal\n"); + } + } else if (lit.included_id != INVALID_LIT_ID) { includedIdMap[lit.id] = make_pair(lit.included_id, lit.squash); + } else { + includedIdMap[lit.id] = make_pair(INVALID_LIT_ID, 0); } } } @@ -2870,7 +2883,8 @@ void findInclusionGroups(vector &fragments, for (const auto &c : candidates) { auto &frag = fragments[c]; u32 id = c; - if (contains(includedIdMap, id)) { + if (contains(includedIdMap, id) && + includedIdMap[id].first != INVALID_LIT_ID) { const auto &childId = includedIdMap[id]; frag.included_frag_id = childId.first; frag.squash = childId.second; @@ -2878,7 +2892,8 @@ void findInclusionGroups(vector &fragments, frag.included_frag_id); } - if (contains(includedDelayIdMap, id)) { + if (contains(includedDelayIdMap, id) && + includedDelayIdMap[id].first != INVALID_LIT_ID) { const auto &childId = includedDelayIdMap[id]; frag.included_delay_frag_id = childId.first; frag.delay_squash = childId.second; From c693c44646a3a0fa7ff7a1a9fcf446f5de6133c0 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 7 Aug 2017 16:41:13 +1000 Subject: [PATCH 143/190] violet: do not remove more states from holders if dfa has been built --- src/nfagraph/ng_violet.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index e19a6211..b5e662cc 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -1556,6 +1556,12 @@ void removeRedundantLiteralsFromPrefixes(RoseInGraph &g, continue; } + if (g[e].dfa) { + /* if we removed any more states, we would need to rebuild the + * the dfa which can be time consuming. */ + continue; + } + assert(!g[t].delay); const ue2_literal &lit = g[t].s; @@ -1673,6 +1679,8 @@ void removeRedundantLiteralsFromInfix(const NGHolder &h, RoseInGraph &ig, /* already removed redundant parts of literals */ return; } + + assert(!ig[e].dfa); } map, u32> > graphs; /* + delay */ @@ -1746,6 +1754,11 @@ void removeRedundantLiteralsFromInfixes(RoseInGraph &g, } assert(!g[t].delay); + if (g[e].dfa) { + /* if we removed any more states, we would need to rebuild the + * the dfa which can be time consuming. */ + continue; + } NGHolder *h = g[e].graph.get(); if (!contains(infixes, h)) { @@ -2870,7 +2883,7 @@ bool splitForImplementability(RoseInGraph &vg, NGHolder &h, } DEBUG_PRINTF("trying to netflow\n"); - bool rv = doNetflowCut(h, nullptr, vg, edges, false, cc.grey); + bool rv = doNetflowCut(h, nullptr, vg, edges, false, cc.grey); DEBUG_PRINTF("done\n"); return rv; @@ -2892,7 +2905,7 @@ bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, map > edges_by_graph; vector graphs; for (const RoseInEdge &ve : edges_range(vg)) { - if (vg[ve].graph) { + if (vg[ve].graph && !vg[ve].dfa) { NGHolder *h = vg[ve].graph.get(); if (!contains(edges_by_graph, h)) { graphs.push_back(h); @@ -2929,7 +2942,6 @@ bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, return false; } changed = true; - good.insert(h); continue; } From 34ed4a6991eb913f90d0353626df4ae3590f16c5 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Tue, 8 Aug 2017 10:13:46 +1000 Subject: [PATCH 144/190] violet: maintain a reference to all the known implementable graphs --- src/nfagraph/ng_violet.cpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index b5e662cc..0a3a97a0 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -2898,22 +2898,22 @@ bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, bool changed = false; bool need_to_recalc = false; u32 added_count = 0; - unordered_set good; /* known to be implementable */ + unordered_set> good; /* known to be implementable */ do { changed = false; DEBUG_PRINTF("added %u\n", added_count); map > edges_by_graph; - vector graphs; + vector> graphs; for (const RoseInEdge &ve : edges_range(vg)) { if (vg[ve].graph && !vg[ve].dfa) { - NGHolder *h = vg[ve].graph.get(); - if (!contains(edges_by_graph, h)) { + auto &h = vg[ve].graph; + if (!contains(edges_by_graph, h.get())) { graphs.push_back(h); } - edges_by_graph[h].push_back(ve); + edges_by_graph[h.get()].push_back(ve); } } - for (NGHolder *h : graphs) { + for (auto &h : graphs) { if (contains(good, h)) { continue; } @@ -2924,9 +2924,8 @@ bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, } if (tryForEarlyDfa(*h, cc) - && doEarlyDfa(rose, vg, *h, edges_by_graph[h], final_chance, rm, - cc)) { - good.insert(h); + && doEarlyDfa(rose, vg, *h, edges_by_graph[h.get()], + final_chance, rm, cc)) { continue; } @@ -2935,7 +2934,7 @@ bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, return false; } - if (splitForImplementability(vg, *h, edges_by_graph[h], cc)) { + if (splitForImplementability(vg, *h, edges_by_graph[h.get()], cc)) { added_count++; if (added_count > MAX_IMPLEMENTABLE_SPLITS) { DEBUG_PRINTF("added_count hit limit\n"); From 72973ccb478e87dbcf20a2ba9c6e084748c9ded9 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Tue, 8 Aug 2017 11:24:52 +1000 Subject: [PATCH 145/190] violet: don't bother swapping holders if unable to trim graph --- src/nfagraph/ng_violet.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index 0a3a97a0..7a4de5f5 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -1713,6 +1713,11 @@ void removeRedundantLiteralsFromInfix(const NGHolder &h, RoseInGraph &ig, continue; } + if (!delay) { + /* unable to trim graph --> no point swapping to new holder */ + continue; + } + assert(isCorrectlyTopped(*h_new)); graphs[right] = make_pair(h_new, delay); } From 3ff70d5568c2f3f95900cb06604ddcfdd4cb473b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 4 Aug 2017 13:23:07 +1000 Subject: [PATCH 146/190] insertion_ordered_{map,set}: add new containers These are associative map/set structures that are iterable in insertion order. --- CMakeLists.txt | 1 + src/nfagraph/ng_violet.cpp | 166 ++++++------- src/rose/rose_build_add.cpp | 14 +- src/rose/rose_build_bytecode.cpp | 22 +- src/util/insertion_ordered.h | 368 ++++++++++++++++++++++++++++ unit/CMakeLists.txt | 1 + unit/internal/insertion_ordered.cpp | 209 ++++++++++++++++ 7 files changed, 671 insertions(+), 110 deletions(-) create mode 100644 src/util/insertion_ordered.h create mode 100644 unit/internal/insertion_ordered.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 4a07cffc..398c5d0c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1007,6 +1007,7 @@ SET (hs_compile_SRCS src/util/graph_small_color_map.h src/util/hash.h src/util/hash_dynamic_bitset.h + src/util/insertion_ordered.h src/util/math.h src/util/multibit_build.cpp src/util/multibit_build.h diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index 7a4de5f5..9ce732c2 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -60,6 +60,7 @@ #include "util/flat_containers.h" #include "util/graph.h" #include "util/graph_range.h" +#include "util/insertion_ordered.h" #include "util/make_unique.h" #include "util/order_check.h" #include "util/target_info.h" @@ -1076,24 +1077,21 @@ bool splitRoseEdge(const NGHolder &base_graph, RoseInGraph &vg, insert(&splitter_reports, base_graph[v].reports); } - /* find the targets of each source vertex; note the use of vectors to + /* find the targets of each source vertex; insertion_ordered_map used to * preserve deterministic ordering */ - vector sources; - map> images; + insertion_ordered_map> images; for (const RoseInEdge &e : ee) { RoseInVertex src = source(e, vg); RoseInVertex dest = target(e, vg); - if (!contains(images, src)) { - sources.push_back(src); - } images[src].push_back(dest); remove_edge(e, vg); } map, vector> verts_by_image; - for (const auto &u : sources) { - const auto &image = images[u]; + for (const auto &m : images) { + const auto &u = m.first; + const auto &image = m.second; if (contains(verts_by_image, image)) { for (RoseInVertex v : verts_by_image[image]) { @@ -1743,8 +1741,7 @@ void removeRedundantLiteralsFromInfix(const NGHolder &h, RoseInGraph &ig, static void removeRedundantLiteralsFromInfixes(RoseInGraph &g, const CompileContext &cc) { - vector seen_order; - map> infixes; + insertion_ordered_map> infixes; for (const RoseInEdge &e : edges_range(g)) { RoseInVertex s = source(e, g); @@ -1766,14 +1763,13 @@ void removeRedundantLiteralsFromInfixes(RoseInGraph &g, } NGHolder *h = g[e].graph.get(); - if (!contains(infixes, h)) { - seen_order.push_back(h); - } infixes[h].push_back(e); } - for (NGHolder *h : seen_order) { - removeRedundantLiteralsFromInfix(*h, g, infixes[h], cc); + for (const auto &m : infixes) { + NGHolder *h = m.first; + const auto &edges = m.second; + removeRedundantLiteralsFromInfix(*h, g, edges, cc); } } @@ -2088,13 +2084,13 @@ void findBetterPrefixes(RoseInGraph &vg, const CompileContext &cc) { STAGE_DEBUG_PRINTF("FIND BETTER PREFIXES\n"); RoseInVertex start = getStart(vg); + insertion_ordered_map> prefixes; bool changed; u32 gen = 0; do { DEBUG_PRINTF("gen %u\n", gen); changed = false; - vector seen_order; - map > prefixes; + prefixes.clear(); /* find prefixes */ for (const RoseInEdge &e : out_edges_range(start, vg)) { @@ -2102,9 +2098,6 @@ void findBetterPrefixes(RoseInGraph &vg, const CompileContext &cc) { assert(vg[target(e, vg)].type == RIV_LITERAL); if (vg[e].graph) { NGHolder *h = vg[e].graph.get(); - if (!contains(prefixes, h)) { - seen_order.push_back(h); - } prefixes[h].push_back(e); } } @@ -2114,14 +2107,16 @@ void findBetterPrefixes(RoseInGraph &vg, const CompileContext &cc) { } /* look for bad prefixes and try to split */ - for (NGHolder *h : seen_order) { + for (const auto &m : prefixes) { + NGHolder *h = m.first; + const auto &edges = m.second; depth max_width = findMaxWidth(*h); if (willBeTransient(max_width, cc) || willBeAnchoredTable(max_width, cc.grey)) { continue; } - changed = improvePrefix(*h, vg, prefixes[h], cc); + changed = improvePrefix(*h, vg, edges, cc); } } while (changed && gen++ < MAX_FIND_BETTER_PREFIX_GEN); } @@ -2149,24 +2144,25 @@ void extractStrongLiterals(RoseInGraph &vg, const CompileContext &cc) { if (!cc.grey.violetExtractStrongLiterals) { return; } - STAGE_DEBUG_PRINTF("EXTRACT STRONG LITERALS\n"); - set stuck; + STAGE_DEBUG_PRINTF("EXTRACT STRONG LITERALS\n"); + + unordered_set stuck; + insertion_ordered_map> edges_by_graph; bool changed; + do { changed = false; - vector seen_order; - map > edges_by_graph; + edges_by_graph.clear(); for (const RoseInEdge &ve : edges_range(vg)) { if (vg[source(ve, vg)].type != RIV_LITERAL) { continue; } + if (vg[ve].graph) { - if (!contains(edges_by_graph, vg[ve].graph.get())) { - seen_order.push_back(vg[ve].graph.get()); - } - edges_by_graph[vg[ve].graph.get()].push_back(ve); + NGHolder *h = vg[ve].graph.get(); + edges_by_graph[h].push_back(ve); } } @@ -2175,12 +2171,14 @@ void extractStrongLiterals(RoseInGraph &vg, const CompileContext &cc) { return; } - for (NGHolder *g : seen_order) { + for (const auto &m : edges_by_graph) { + NGHolder *g = m.first; + const auto &edges = m.second; if (contains(stuck, g)) { DEBUG_PRINTF("already known to be bad\n"); continue; } - bool rv = extractStrongLiteral(*g, vg, edges_by_graph[g], cc); + bool rv = extractStrongLiteral(*g, vg, edges, cc); if (rv) { changed = true; } else { @@ -2228,8 +2226,7 @@ void improveWeakInfixes(RoseInGraph &vg, const CompileContext &cc) { RoseInVertex start = getStart(vg); - set weak; - vector ordered_weak; + unordered_set weak; for (RoseInVertex vv : adjacent_vertices_range(start, vg)) { /* outfixes shouldn't have made it this far */ @@ -2245,22 +2242,22 @@ void improveWeakInfixes(RoseInGraph &vg, const CompileContext &cc) { NGHolder *h = vg[e].graph.get(); DEBUG_PRINTF("'%s' guards %p\n", dumpString(vg[vv].s).c_str(), h); - if (!contains(weak, h)) { - weak.insert(h); - ordered_weak.push_back(h); - } + weak.insert(h); } } - map > weak_edges; + insertion_ordered_map> weak_edges; for (const RoseInEdge &ve : edges_range(vg)) { - if (contains(weak, vg[ve].graph.get())) { - weak_edges[vg[ve].graph.get()].push_back(ve); + NGHolder *h = vg[ve].graph.get(); + if (contains(weak, h)) { + weak_edges[h].push_back(ve); } } - for (NGHolder *h : ordered_weak) { - improveInfix(*h, vg, weak_edges[h], cc); + for (const auto &m : weak_edges) { + NGHolder *h = m.first; + const auto &edges = m.second; + improveInfix(*h, vg, edges, cc); } } @@ -2416,8 +2413,8 @@ void avoidSuffixes(RoseInGraph &vg, const CompileContext &cc) { STAGE_DEBUG_PRINTF("AVOID SUFFIXES\n"); RoseInVertex accept = getPrimaryAccept(vg); - map > suffixes; - vector ordered_suffixes; + + insertion_ordered_map> suffixes; /* find suffixes */ for (const RoseInEdge &e : in_edges_range(accept, vg)) { @@ -2426,15 +2423,14 @@ void avoidSuffixes(RoseInGraph &vg, const CompileContext &cc) { assert(vg[e].graph); /* non suffix paths should be wired to other accepts */ const NGHolder *h = vg[e].graph.get(); - if (!contains(suffixes, h)) { - ordered_suffixes.push_back(h); - } suffixes[h].push_back(e); } /* look at suffixes and try to split */ - for (const NGHolder *h : ordered_suffixes) { - replaceSuffixWithInfix(*h, vg, suffixes[h], cc); + for (const auto &m : suffixes) { + const NGHolder *h = m.first; + const auto &edges = m.second; + replaceSuffixWithInfix(*h, vg, edges, cc); } } @@ -2518,20 +2514,18 @@ void lookForDoubleCut(RoseInGraph &vg, const CompileContext &cc) { return; } - map > right_edges; - vector ordered_graphs; + insertion_ordered_map> right_edges; for (const RoseInEdge &ve : edges_range(vg)) { if (vg[ve].graph && vg[source(ve, vg)].type == RIV_LITERAL) { const NGHolder *h = vg[ve].graph.get(); - if (!contains(right_edges, h)) { - ordered_graphs.push_back(h); - } right_edges[h].push_back(ve); } } - for (const NGHolder *h : ordered_graphs) { - lookForDoubleCut(*h, right_edges[h], vg, cc.grey); + for (const auto &m : right_edges) { + const NGHolder *h = m.first; + const auto &edges = m.second; + lookForDoubleCut(*h, edges, vg, cc.grey); } } @@ -2656,24 +2650,22 @@ void decomposeLiteralChains(RoseInGraph &vg, const CompileContext &cc) { return; } + insertion_ordered_map> right_edges; bool changed; do { changed = false; - map > right_edges; - vector ordered_graphs; + right_edges.clear(); for (const RoseInEdge &ve : edges_range(vg)) { if (vg[ve].graph && vg[source(ve, vg)].type == RIV_LITERAL) { const NGHolder *h = vg[ve].graph.get(); - if (!contains(right_edges, h)) { - ordered_graphs.push_back(h); - } right_edges[h].push_back(ve); } } - for (const NGHolder *h : ordered_graphs) { - const vector &ee = right_edges[h]; + for (const auto &m : right_edges) { + const NGHolder *h = m.first; + const vector &ee = m.second; bool rv = lookForDoubleCut(*h, ee, vg, cc.grey); if (!rv && h->kind != NFA_SUFFIX) { rv = lookForTrailingLiteralDotStar(*h, ee, vg, cc.grey); @@ -2701,39 +2693,34 @@ static void lookForCleanEarlySplits(RoseInGraph &vg, const CompileContext &cc) { u32 gen = 0; - vector prev = {getStart(vg)}; + insertion_ordered_set prev({getStart(vg)}); + insertion_ordered_set curr; while (gen < MAX_DESIRED_CLEAN_SPLIT_DEPTH) { - /* collect vertices in edge order for determinism */ - vector curr; - set curr_seen; + curr.clear(); for (RoseInVertex u : prev) { for (auto v : adjacent_vertices_range(u, vg)) { - if (curr_seen.insert(v).second) { - curr.push_back(v); - } + curr.insert(v); } } - map> rightfixes; - vector ordered_graphs; + insertion_ordered_map> rightfixes; for (RoseInVertex v : curr) { for (const RoseInEdge &e : out_edges_range(v, vg)) { if (vg[e].graph) { NGHolder *h = vg[e].graph.get(); - if (!contains(rightfixes, h)) { - ordered_graphs.push_back(h); - } rightfixes[h].push_back(e); } } } - for (const NGHolder *h : ordered_graphs) { - lookForCleanSplit(*h, rightfixes[h], vg, cc); + for (const auto &m : rightfixes) { + const NGHolder *h = m.first; + const auto &edges = m.second; + lookForCleanSplit(*h, edges, vg, cc); } - prev = curr; + prev = std::move(curr); gen++; } } @@ -2907,18 +2894,16 @@ bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, do { changed = false; DEBUG_PRINTF("added %u\n", added_count); - map > edges_by_graph; - vector> graphs; + insertion_ordered_map, + vector> edges_by_graph; for (const RoseInEdge &ve : edges_range(vg)) { if (vg[ve].graph && !vg[ve].dfa) { auto &h = vg[ve].graph; - if (!contains(edges_by_graph, h.get())) { - graphs.push_back(h); - } - edges_by_graph[h.get()].push_back(ve); + edges_by_graph[h].push_back(ve); } } - for (auto &h : graphs) { + for (auto &m : edges_by_graph) { + auto &h = m.first; if (contains(good, h)) { continue; } @@ -2928,9 +2913,10 @@ bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, continue; } - if (tryForEarlyDfa(*h, cc) - && doEarlyDfa(rose, vg, *h, edges_by_graph[h.get()], - final_chance, rm, cc)) { + const auto &edges = m.second; + + if (tryForEarlyDfa(*h, cc) && + doEarlyDfa(rose, vg, *h, edges, final_chance, rm, cc)) { continue; } @@ -2939,7 +2925,7 @@ bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, return false; } - if (splitForImplementability(vg, *h, edges_by_graph[h.get()], cc)) { + if (splitForImplementability(vg, *h, edges, cc)) { added_count++; if (added_count > MAX_IMPLEMENTABLE_SPLITS) { DEBUG_PRINTF("added_count hit limit\n"); diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index b003336a..71f1667d 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -55,6 +55,7 @@ #include "util/container.h" #include "util/dump_charclass.h" #include "util/graph_range.h" +#include "util/insertion_ordered.h" #include "util/make_unique.h" #include "util/noncopyable.h" #include "util/order_check.h" @@ -1525,8 +1526,7 @@ bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter) { renumber_vertices(in); assert(validateKinds(in)); - map > graphs; - vector ordered_graphs; // Stored in first-encounter order. + insertion_ordered_map> graphs; for (const auto &e : edges_range(in)) { if (!in[e].graph) { @@ -1544,21 +1544,17 @@ bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter) { NGHolder *h = in[e].graph.get(); assert(isCorrectlyTopped(*h)); - if (!contains(graphs, h)) { - ordered_graphs.push_back(h); - } graphs[h].push_back(e); } - assert(ordered_graphs.size() == graphs.size()); - vector graph_edges; - for (auto h : ordered_graphs) { + for (const auto &m : graphs) { + NGHolder *h = m.first; if (!canImplementGraph(*h, prefilter, rm, cc)) { return false; } - insert(&graph_edges, graph_edges.end(), graphs[h]); + insert(&graph_edges, graph_edges.end(), m.second); } /* we are now past the point of no return. We can start making irreversible diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 0ae5bb4f..d3ae52bf 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -86,6 +86,7 @@ #include "util/container.h" #include "util/fatbit_build.h" #include "util/graph_range.h" +#include "util/insertion_ordered.h" #include "util/make_unique.h" #include "util/multibit_build.h" #include "util/noncopyable.h" @@ -1474,11 +1475,11 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc, RoseGraph &g = tbi.g; const CompileContext &cc = tbi.cc; - map > infixTriggers; - vector order; - unordered_map> succs; + map> infixTriggers; findInfixTriggers(tbi, &infixTriggers); + insertion_ordered_map> succs; + if (cc.grey.allowTamarama && cc.streaming && !do_prefix) { findExclusiveInfixes(tbi, bc, qif, infixTriggers, no_retrigger_queues); } @@ -1517,10 +1518,6 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc, } } - if (!contains(succs, leftfix)) { - order.push_back(leftfix); - } - succs[leftfix].push_back(v); } @@ -1529,8 +1526,9 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc, map eager; - for (const left_id &leftfix : order) { - const auto &left_succs = succs[leftfix]; + for (const auto &m : succs) { + const left_id &leftfix = m.first; + const auto &left_succs = m.second; rose_group squash_mask = tbi.rose_squash_masks.at(leftfix); eager_info ei; @@ -1549,9 +1547,11 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc, eager.clear(); } - for (const left_id &leftfix : order) { + for (const auto &m : succs) { + const left_id &leftfix = m.first; + const auto &left_succs = m.second; buildLeftfix(tbi, bc, do_prefix, qif.get_queue(), infixTriggers, - no_retrigger_queues, eager_queues, eager, succs[leftfix], + no_retrigger_queues, eager_queues, eager, left_succs, leftfix); } diff --git a/src/util/insertion_ordered.h b/src/util/insertion_ordered.h new file mode 100644 index 00000000..2067d350 --- /dev/null +++ b/src/util/insertion_ordered.h @@ -0,0 +1,368 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UTIL_INSERTION_ORDERED_H +#define UTIL_INSERTION_ORDERED_H + +/** + * \file + * \brief Insertion-ordered associative containers (set, map). + */ + +#include "util/operators.h" +#include "util/unordered.h" + +#include +#include +#include +#include +#include + +#include + +namespace ue2 { + +namespace insertion_ordered_detail { + +// Iterator facade that wraps an underlying iterator, so that we get our +// own iterator types. +template +class iter_wrapper + : public boost::iterator_facade, Value, + boost::random_access_traversal_tag> { +public: + iter_wrapper() = default; + explicit iter_wrapper(WrappedIter it_in) : it(std::move(it_in)) {} + + // Templated copy-constructor to allow for interoperable iterator and + // const_iterator. + template friend class iter_wrapper; + + template + iter_wrapper(iter_wrapper other, + typename std::enable_if::value>::type * = nullptr) + : it(std::move(other.it)) {} + + WrappedIter get() const { return it; } + +private: + friend class boost::iterator_core_access; + + WrappedIter it; + + void increment() { ++it; } + void decrement() { --it; } + void advance(size_t n) { it += n; } + typename std::iterator_traits::difference_type + distance_to(const iter_wrapper &other) const { + return other.it - it; + } + bool equal(const iter_wrapper &other) const { return it == other.it; } + Value &dereference() const { return *it; } +}; + +template +class element_store { + std::vector data; + ue2_unordered_map map; + +public: + bool empty() const { + return data.empty(); + } + + size_t size() const { + assert(data.size() == map.size()); + return data.size(); + } + + void clear() { + data.clear(); + map.clear(); + } + + void reserve(size_t n) { + data.reserve(n); + map.reserve(n); + } + + // Iteration. + + using const_iterator = + iter_wrapper::const_iterator, + const Element>; + using iterator = + iter_wrapper::iterator, Element>; + + const_iterator begin() const { + return const_iterator(data.begin()); + } + + const_iterator end() const { + return const_iterator(data.end()); + } + + iterator begin() { + return iterator(data.begin()); + } + + iterator end() { + return iterator(data.end()); + } + + // Search. + + const_iterator find(const Key &key) const { + auto map_it = map.find(key); + if (map_it == map.end()) { + return end(); + } + auto idx = map_it->second; + assert(idx < data.size()); + return begin() + idx; + } + + iterator find(const Key &key) { + auto map_it = map.find(key); + if (map_it == map.end()) { + return end(); + } + auto idx = map_it->second; + assert(idx < data.size()); + return begin() + idx; + } + + // Insert. + + std::pair insert(const Key &key, const Element &element) { + const auto idx = data.size(); + if (map.emplace(key, idx).second) { + data.push_back(element); + return {begin() + idx, true}; + } + return {end(), false}; + } + + bool operator==(const element_store &a) const { + return data == a.data; + } + + bool operator<(const element_store &a) const { + return data < a.data; + } + + void swap(element_store &a) { + using std::swap; + swap(data, a.data); + swap(map, a.map); + } +}; + +} // namespace insertion_ordered_detail + +template +class insertion_ordered_map + : public totally_ordered> { +public: + using key_type = Key; + using mapped_type = Value; + using value_type = std::pair; + +private: + using store_type = insertion_ordered_detail::element_store; + store_type store; + +public: + using const_iterator = typename store_type::const_iterator; + using iterator = typename store_type::iterator; + + insertion_ordered_map() = default; + + template + insertion_ordered_map(Iter it, Iter it_end) { + insert(it, it_end); + } + + explicit insertion_ordered_map(std::initializer_list init) { + insert(init.begin(), init.end()); + } + + const_iterator begin() const { return store.begin(); } + const_iterator end() const { return store.end(); } + iterator begin() { return store.begin(); } + iterator end() { return store.end(); } + + const_iterator find(const Key &key) const { + return store.find(key); + } + + iterator find(const Key &key) { + return store.find(key); + } + + std::pair insert(const std::pair &p) { + return store.insert(p.first, p); + } + + template + void insert(Iter it, Iter it_end) { + for (; it != it_end; ++it) { + insert(*it); + } + } + + Value &operator[](const Key &key) { + auto it = find(key); + if (it == end()) { + it = insert({key, Value{}}).first; + } + return it->second; + } + + const Value &at(const Key &key) const { + return find(key)->second; + } + + Value &at(const Key &key) { + return find(key)->second; + } + + bool empty() const { + return store.empty(); + } + + size_t size() const { + return store.size(); + } + + void clear() { + store.clear(); + } + + void reserve(size_t n) { + store.reserve(n); + } + + bool operator==(const insertion_ordered_map &a) const { + return store == a.store; + } + + bool operator<(const insertion_ordered_map &a) const { + return store < a.store; + } + + void swap(insertion_ordered_map &a) { + store.swap(a.store); + } + + friend void swap(insertion_ordered_map &a, insertion_ordered_map &b) { + a.swap(b); + } +}; + +template +class insertion_ordered_set + : public totally_ordered> { +public: + using key_type = Key; + using value_type = Key; + +private: + using store_type = insertion_ordered_detail::element_store; + store_type store; + +public: + using const_iterator = typename store_type::const_iterator; + using iterator = typename store_type::iterator; + + insertion_ordered_set() = default; + + template + insertion_ordered_set(Iter it, Iter it_end) { + insert(it, it_end); + } + + explicit insertion_ordered_set(std::initializer_list init) { + insert(init.begin(), init.end()); + } + + const_iterator begin() const { return store.begin(); } + const_iterator end() const { return store.end(); } + + const_iterator find(const Key &key) const { + return store.find(key); + } + + std::pair insert(const Key &key) { + return store.insert(key, key); + } + + template + void insert(Iter it, Iter it_end) { + for (; it != it_end; ++it) { + insert(*it); + } + } + + bool empty() const { + return store.empty(); + } + + size_t size() const { + return store.size(); + } + + void clear() { + store.clear(); + } + + void reserve(size_t n) { + store.reserve(n); + } + + bool operator==(const insertion_ordered_set &a) const { + return store == a.store; + } + + bool operator<(const insertion_ordered_set &a) const { + return store < a.store; + } + + void swap(insertion_ordered_set &a) { + store.swap(a.store); + } + + friend void swap(insertion_ordered_set &a, insertion_ordered_set &b) { + a.swap(b); + } +}; + +} // namespace ue2 + +#endif // UTIL_INSERTION_ORDERED_H diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index fad8633d..6f8a8bf4 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -78,6 +78,7 @@ set(unit_internal_SOURCES internal/flat_set.cpp internal/flat_map.cpp internal/graph.cpp + internal/insertion_ordered.cpp internal/lbr.cpp internal/limex_nfa.cpp internal/masked_move.cpp diff --git a/unit/internal/insertion_ordered.cpp b/unit/internal/insertion_ordered.cpp new file mode 100644 index 00000000..6026ce1d --- /dev/null +++ b/unit/internal/insertion_ordered.cpp @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "ue2common.h" +#include "util/insertion_ordered.h" + +#include "gtest/gtest.h" + +using namespace std; +using namespace ue2; + +template +std::ostream &operator<<(std::ostream &os, + const insertion_ordered_map &m) { + os << "{"; + for (auto it = begin(m); it != end(m); ++it) { + os << "{" << it->first << ", " << it->second << "}"; + if (it != end(m)) { + os << ", "; + } + } + os << "}"; + return os; +} + +TEST(insertion_ordered_map, empty) { + insertion_ordered_map m; + EXPECT_TRUE(m.empty()); + EXPECT_TRUE(m.begin() == m.end()); + EXPECT_EQ(0, m.size()); + + m.insert({10, 10}); + EXPECT_FALSE(m.empty()); + EXPECT_EQ(1, m.size()); + + m.clear(); + EXPECT_TRUE(m.empty()); + EXPECT_TRUE(m.begin() == m.end()); + EXPECT_EQ(0, m.size()); +} + +TEST(insertion_ordered_map, insert) { + const vector> v = {{7, 1}, {1, 2}, {3, 4}, + {10, 5}, {99, 6}, {12, 7}}; + insertion_ordered_map m; + for (const auto &e : v) { + m.insert(e); + } + + EXPECT_FALSE(m.empty()); + EXPECT_EQ(v.size(), m.size()); + vector> v2(m.begin(), m.end()); + EXPECT_EQ(v, v2); +} + +TEST(insertion_ordered_map, insert_iter) { + const vector> v = {{7, 1}, {1, 2}, {3, 4}, + {10, 5}, {99, 6}, {12, 7}}; + insertion_ordered_map m; + m.insert(v.begin(), v.end()); + + EXPECT_FALSE(m.empty()); + EXPECT_EQ(v.size(), m.size()); + vector> v2(m.begin(), m.end()); + EXPECT_EQ(v, v2); +} + +TEST(insertion_ordered_map, find_const) { + const vector> v = {{7, 1}, {1, 2}, {3, 4}, + {10, 5}, {99, 6}, {12, 7}}; + const insertion_ordered_map m(v.begin(), v.end()); + + for (const auto &e : v) { + auto it = m.find(e.first); + ASSERT_NE(m.end(), it); + EXPECT_EQ(e.first, it->first); + EXPECT_EQ(e.second, it->second); + } +} + +TEST(insertion_ordered_map, find_mutable) { + const vector> v = {{7, 1}, {1, 2}, {3, 4}, + {10, 5}, {99, 6}, {12, 7}}; + insertion_ordered_map m(v.begin(), v.end()); + + for (const auto &e : v) { + auto it = m.find(e.first); + ASSERT_NE(m.end(), it); + EXPECT_EQ(e.first, it->first); + EXPECT_EQ(e.second, it->second); + auto &mut = it->second; + ++mut; + EXPECT_EQ(e.second + 1, m.at(e.first)); + } +} + +TEST(insertion_ordered_map, operator_brackets) { + insertion_ordered_map m; + + u32 val = 1000; + for (u32 i = 10; i > 0; i--) { + m[i] = val++; + } + + EXPECT_EQ(10, m.size()); + + val = 1000; + auto it = m.begin(); + for (u32 i = 10; i > 0; i--) { + ASSERT_NE(m.end(), it); + EXPECT_EQ(i, it->first); + EXPECT_EQ(val, it->second); + ++val; + ++it; + } + + ASSERT_EQ(m.end(), it); +} + +template +std::ostream &operator<<(std::ostream &os, const insertion_ordered_set &s) { + os << "{"; + for (auto it = begin(s); it != end(s); ++it) { + os << *it; + if (it != end(s)) { + os << ", "; + } + } + os << "}"; + return os; +} + +TEST(insertion_ordered_set, empty) { + insertion_ordered_set m; + EXPECT_TRUE(m.empty()); + EXPECT_TRUE(m.begin() == m.end()); + EXPECT_EQ(0, m.size()); + + m.insert(10); + EXPECT_FALSE(m.empty()); + EXPECT_EQ(1, m.size()); + + m.clear(); + EXPECT_TRUE(m.empty()); + EXPECT_TRUE(m.begin() == m.end()); + EXPECT_EQ(0, m.size()); +} + +TEST(insertion_ordered_set, insert) { + const vector v = {7, 1, 3, 10, 99, 12}; + insertion_ordered_set s; + for (const auto &e : v) { + s.insert(e); + } + + EXPECT_FALSE(s.empty()); + EXPECT_EQ(v.size(), s.size()); + vector v2(s.begin(), s.end()); + EXPECT_EQ(v, v2); +} + +TEST(insertion_ordered_set, insert_iter) { + const vector v = {7, 1, 3, 10, 99, 12}; + insertion_ordered_set s; + s.insert(v.begin(), v.end()); + + EXPECT_FALSE(s.empty()); + EXPECT_EQ(v.size(), s.size()); + vector v2(s.begin(), s.end()); + EXPECT_EQ(v, v2); +} + +TEST(insertion_ordered_set, find_const) { + const vector v = {7, 1, 3, 10, 99, 12}; + const insertion_ordered_set s(v.begin(), v.end()); + + for (const auto &e : v) { + auto it = s.find(e); + ASSERT_NE(s.end(), it); + EXPECT_EQ(e, *it); + } +} From 164e5a929f140e1457c75e4127fcdc6093065aa1 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 9 Aug 2017 14:04:28 +1000 Subject: [PATCH 147/190] fdr_compile: faster scoring code --- src/fdr/fdr_compile.cpp | 91 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 80 insertions(+), 11 deletions(-) diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index 210729a7..770f30fe 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -52,6 +52,7 @@ #include "util/verify_types.h" #include +#include #include #include #include @@ -212,11 +213,77 @@ bytecode_ptr FDRCompiler::setupFDR() { //#define DEBUG_ASSIGNMENT -static -double getScoreUtil(u32 len, u32 count) { - return len == 0 ? numeric_limits::max() - : our_pow(count, 1.05) * our_pow(len, -3.0); -} +/** + * Utility class for computing: + * + * score(count, len) = pow(count, 1.05) * pow(len, -3) + * + * Calling pow() is expensive. This is mitigated by using pre-computed LUTs for + * small inputs and a cache for larger ones. + */ +class Scorer { + unordered_map count_factor_cache; + + // LUT: pow(count, 1.05) for small values of count. + static const array count_lut; + + double count_factor(u32 count) { + if (count < count_lut.size()) { + return count_lut[count]; + } + + auto it = count_factor_cache.find(count); + if (it != count_factor_cache.end()) { + return it->second; + } + double r = our_pow(count, 1.05); + count_factor_cache.emplace(count, r); + return r; + } + + // LUT: pow(len, -3) for len in range [0,8]. + static const array len_lut; + + double len_factor(u32 len) { + assert(len <= len_lut.size()); + return len_lut[len]; + } + +public: + double operator()(u32 len, u32 count) { + if (len == 0) { + return numeric_limits::max(); + } + return count_factor(count) * len_factor(len); + } +}; + +const array Scorer::count_lut{{ + pow(0, 1.05), pow(1, 1.05), pow(2, 1.05), pow(3, 1.05), pow(4, 1.05), + pow(5, 1.05), pow(6, 1.05), pow(7, 1.05), pow(8, 1.05), pow(9, 1.05), + pow(10, 1.05), pow(11, 1.05), pow(12, 1.05), pow(13, 1.05), pow(14, 1.05), + pow(15, 1.05), pow(16, 1.05), pow(17, 1.05), pow(18, 1.05), pow(19, 1.05), + pow(20, 1.05), pow(21, 1.05), pow(22, 1.05), pow(23, 1.05), pow(24, 1.05), + pow(25, 1.05), pow(26, 1.05), pow(27, 1.05), pow(28, 1.05), pow(29, 1.05), + pow(30, 1.05), pow(31, 1.05), pow(32, 1.05), pow(33, 1.05), pow(34, 1.05), + pow(35, 1.05), pow(36, 1.05), pow(37, 1.05), pow(38, 1.05), pow(39, 1.05), + pow(40, 1.05), pow(41, 1.05), pow(42, 1.05), pow(43, 1.05), pow(44, 1.05), + pow(45, 1.05), pow(46, 1.05), pow(47, 1.05), pow(48, 1.05), pow(49, 1.05), + pow(50, 1.05), pow(51, 1.05), pow(52, 1.05), pow(53, 1.05), pow(54, 1.05), + pow(55, 1.05), pow(56, 1.05), pow(57, 1.05), pow(58, 1.05), pow(59, 1.05), + pow(60, 1.05), pow(61, 1.05), pow(62, 1.05), pow(63, 1.05), pow(64, 1.05), + pow(65, 1.05), pow(66, 1.05), pow(67, 1.05), pow(68, 1.05), pow(69, 1.05), + pow(70, 1.05), pow(71, 1.05), pow(72, 1.05), pow(73, 1.05), pow(74, 1.05), + pow(75, 1.05), pow(76, 1.05), pow(77, 1.05), pow(78, 1.05), pow(79, 1.05), + pow(80, 1.05), pow(81, 1.05), pow(82, 1.05), pow(83, 1.05), pow(84, 1.05), + pow(85, 1.05), pow(86, 1.05), pow(87, 1.05), pow(88, 1.05), pow(89, 1.05), + pow(90, 1.05), pow(91, 1.05), pow(92, 1.05), pow(93, 1.05), pow(94, 1.05), + pow(95, 1.05), pow(96, 1.05), pow(97, 1.05), pow(98, 1.05), pow(99, 1.05), +}}; + +const array Scorer::len_lut{{ + pow(0, -3.0), pow(1, -3.0), pow(2, -3.0), pow(3, -3.0), pow(4, -3.0), + pow(5, -3.0), pow(6, -3.0), pow(7, -3.0), pow(8, -3.0)}}; /** * Returns true if the two given literals should be placed in the same chunk as @@ -361,12 +428,14 @@ map> assignStringsToBuckets( boost::multi_array, 2> t( boost::extents[numChunks][numBuckets]); + Scorer scorer; + for (u32 j = 0; j < numChunks; j++) { u32 cnt = 0; for (u32 k = j; k < numChunks; ++k) { cnt += chunks[k].count; } - t[j][0] = {getScoreUtil(chunks[j].length, cnt), 0}; + t[j][0] = {scorer(chunks[j].length, cnt), 0}; } for (u32 i = 1; i < numBuckets; i++) { @@ -374,7 +443,7 @@ map> assignStringsToBuckets( pair best = {MAX_SCORE, 0}; u32 cnt = chunks[j].count; for (u32 k = j + 1; k < numChunks - 1; k++) { - auto score = getScoreUtil(chunks[j].length, cnt); + auto score = scorer(chunks[j].length, cnt); if (score > best.first) { break; // now worse locally than our best score, give up } @@ -416,10 +485,10 @@ map> assignStringsToBuckets( UNUSED const auto &first_lit = lits[first_id]; UNUSED const auto &last_lit = lits[last_id - 1]; DEBUG_PRINTF("placing [%u-%u) in one bucket (%u lits, len %zu-%zu, " - "score %0.4f)\n", - first_id, last_id, last_id - first_id, - first_lit.s.length(), last_lit.s.length(), - getScoreUtil(first_lit.s.length(), last_id - first_id)); + "score %0.4f)\n", + first_id, last_id, last_id - first_id, + first_lit.s.length(), last_lit.s.length(), + scorer(first_lit.s.length(), last_id - first_id)); vector litIds; u32 cnt = last_id - first_id; From 4e9b0ad3fdd79b2111d732a3c99518ec70b97626 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 10 Aug 2017 13:28:00 +1000 Subject: [PATCH 148/190] rework mergeLeftfixesVariableLag() --- src/rose/rose_build_merge.cpp | 564 ++++++++++++++++++++++------------ 1 file changed, 370 insertions(+), 194 deletions(-) diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index 4001b118..0d73c101 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -64,6 +64,7 @@ #include "util/dump_charclass.h" #include "util/graph_range.h" #include "util/hash.h" +#include "util/insertion_ordered.h" #include "util/order_check.h" #include "util/report_manager.h" #include "util/ue2string.h" @@ -886,6 +887,18 @@ bool hasSameEngineType(const RoseVertexProps &u_prop, return true; } +/** + * Verifies that merging the leftfix of vertices does not cause conflicts due + * to the literals on the right. + * + * The main concern is that the lags of the literals and overlap between them + * allow the engine check offset to potentially regress. + * + * Parameters are vectors of literals + lag pairs. + * + * Note: if more constaints of when the leftfixes were going to be checked + * (mandatory lookarounds passing, offset checks), more merges may be allowed. + */ static bool compatibleLiteralsForMerge( const vector> &ulits, @@ -899,6 +912,21 @@ bool compatibleLiteralsForMerge( return false; } + // We don't handle delayed cases yet. + for (const auto &ue : ulits) { + const rose_literal_id &ul = *ue.first; + if (ul.delay) { + return false; + } + } + + for (const auto &ve : vlits) { + const rose_literal_id &vl = *ve.first; + if (vl.delay) { + return false; + } + } + /* An engine requires that all accesses to it are ordered by offsets. (ie, we can not check an engine's state at offset Y, if we have already checked its status at offset X and X > Y). If we can not establish that @@ -908,18 +936,10 @@ bool compatibleLiteralsForMerge( const rose_literal_id &ul = *ue.first; u32 ulag = ue.second; - if (ul.delay) { - return false; // We don't handle delayed cases yet. - } - for (const auto &ve : vlits) { const rose_literal_id &vl = *ve.first; u32 vlag = ve.second; - if (vl.delay) { - return false; // We don't handle delayed cases yet. - } - if (!checkPrefix(ul, ulag, vl, vlag) || !checkPrefix(vl, vlag, ul, ulag)) { DEBUG_PRINTF("prefix check failed\n"); @@ -944,8 +964,8 @@ bool isAccelerableLeftfix(const RoseBuildImpl &build, const NGHolder &g) { } /** - * In block mode, we want to be a little more selective, We will only merge - * prefix engines when the literal sets are the same, or if the merged graph + * In block mode, we want to be a little more selective -- We will only merge + * prefix engines when the literal sets are the same or if the merged graph * has only grown by a small amount. */ static @@ -1101,12 +1121,13 @@ bool checkPredDelay(const rose_literal_id &ul, const rose_literal_id &vl, return true; } +template static never_inline -bool checkPredDelays(const RoseBuildImpl &tbi, const deque &v1, - const deque &v2) { +bool checkPredDelays(const RoseBuildImpl &build, const VertexCont &v1, + const VertexCont &v2) { flat_set preds; for (auto v : v1) { - insert(&preds, inv_adjacent_vertices(v, tbi.g)); + insert(&preds, inv_adjacent_vertices(v, build.g)); } flat_set pred_lits; @@ -1118,29 +1139,29 @@ bool checkPredDelays(const RoseBuildImpl &tbi, const deque &v1, * the literal is no longer available. */ flat_set known_good_preds; for (auto v : v2) { - insert(&known_good_preds, inv_adjacent_vertices(v, tbi.g)); + insert(&known_good_preds, inv_adjacent_vertices(v, build.g)); } for (auto u : preds) { if (!contains(known_good_preds, u)) { - insert(&pred_lits, tbi.g[u].literals); + insert(&pred_lits, build.g[u].literals); } } vector pred_rose_lits; pred_rose_lits.reserve(pred_lits.size()); for (const auto &p : pred_lits) { - pred_rose_lits.push_back(&tbi.literals.at(p)); + pred_rose_lits.push_back(&build.literals.at(p)); } for (auto v : v2) { - u32 vlag = tbi.g[v].left.lag; + u32 vlag = build.g[v].left.lag; if (!vlag) { continue; } - for (const u32 vlit : tbi.g[v].literals) { - const rose_literal_id &vl = tbi.literals.at(vlit); + for (const u32 vlit : build.g[v].literals) { + const rose_literal_id &vl = build.literals.at(vlit); assert(!vl.delay); // this should never have got this far? for (const auto &ul : pred_rose_lits) { assert(!ul->delay); // this should never have got this far? @@ -1189,7 +1210,7 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, vector> ulits; /* lit + lag pairs */ for (auto a : verts1) { - if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, u_front, a)) { + if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, v_front, a)) { return false; } @@ -1278,23 +1299,23 @@ struct RoseMergeCandidate { } static -bool mergeRosePair(RoseBuildImpl &tbi, left_id &r1, left_id &r2, - const deque &verts1, - const deque &verts2) { +bool mergeLeftfixPair(RoseBuildImpl &build, left_id &r1, left_id &r2, + const vector &verts1, + const vector &verts2) { assert(!verts1.empty() && !verts2.empty()); - DEBUG_PRINTF("merging rose pair:\n"); + DEBUG_PRINTF("merging pair of leftfixes:\n"); DEBUG_PRINTF(" A:%016zx: tops %s\n", r1.hash(), as_string_list(all_tops(r1)).c_str()); DEBUG_PRINTF(" B:%016zx: tops %s\n", r2.hash(), as_string_list(all_tops(r2)).c_str()); - RoseGraph &g = tbi.g; + RoseGraph &g = build.g; if (r1.graph()) { assert(r2.graph()); assert(r1.graph()->kind == r2.graph()->kind); - if (!mergeNfaPair(*r1.graph(), *r2.graph(), nullptr, tbi.cc)) { + if (!mergeNfaPair(*r1.graph(), *r2.graph(), nullptr, build.cc)) { DEBUG_PRINTF("nfa merge failed\n"); return false; } @@ -1315,7 +1336,7 @@ bool mergeRosePair(RoseBuildImpl &tbi, left_id &r1, left_id &r2, return true; } else if (r1.castle()) { assert(r2.castle()); - assert(tbi.cc.grey.allowCastle); + assert(build.cc.grey.allowCastle); map top_map; if (!mergeCastle(*r2.castle(), *r1.castle(), top_map)) { @@ -1340,58 +1361,184 @@ bool mergeRosePair(RoseBuildImpl &tbi, left_id &r1, left_id &r2, } static -void processMergeQueue(RoseBuildImpl &tbi, RoseBouquet &roses, - priority_queue &pq) { - unordered_set dead; +bool mergeLeftVL_checkTargetsCompatible(const RoseBuildImpl &build, + const vector &targets_1, + const vector &targets_2) { + assert(!targets_1.empty()); + assert(!targets_2.empty()); - DEBUG_PRINTF("merge queue has %zu entries\n", pq.size()); - - while (!pq.empty()) { - DEBUG_PRINTF("pq pop h1=%p, h2=%p, cpl=%u, states=%u\n", - pq.top().r1.graph(), pq.top().r2.graph(), pq.top().cpl, - pq.top().states); - - left_id r1 = pq.top().r1, r2 = pq.top().r2; - pq.pop(); - - if (contains(dead, r1) || contains(dead, r2)) { - continue; + vector> ulits; /* lit + lag pairs */ + for (auto a : targets_1) { + u32 ulag = build.g[a].left.lag; + for (u32 id : build.g[a].literals) { + ulits.emplace_back(&build.literals.at(id), ulag); } - - if (r1.graph() && r2.graph()) { - NGHolder *h1 = r1.graph(), *h2 = r2.graph(); - CharReach stop1 = findStopAlphabet(*h1, SOM_NONE); - CharReach stop2 = findStopAlphabet(*h2, SOM_NONE); - CharReach stopboth(stop1 & stop2); - DEBUG_PRINTF("stop1=%zu, stop2=%zu, stopboth=%zu\n", stop1.count(), - stop2.count(), stopboth.count()); - if (stopboth.count() < 10 && - (stop1.count() > 10 || stop2.count() > 10)) { - DEBUG_PRINTF("skip merge, would kill stop alphabet\n"); - continue; - } - size_t maxstop = max(stop1.count(), stop2.count()); - if (maxstop > 200 && stopboth.count() < 200) { - DEBUG_PRINTF("skip merge, would reduce stop alphabet\n"); - continue; - } - } - - const deque &verts1 = roses.vertices(r1); - const deque &verts2 = roses.vertices(r2); - - if (!mergeableRoseVertices(tbi, verts1, verts2)) { - continue; - } - - if (!mergeRosePair(tbi, r1, r2, verts1, verts2)) { - continue; - } - - roses.insert(r2, verts1); - roses.erase(r1); - dead.insert(r1); } + + vector> vlits; + for (auto a : targets_2) { + u32 vlag = build.g[a].left.lag; + for (u32 id : build.g[a].literals) { + vlits.emplace_back(&build.literals.at(id), vlag); + } + } + + if (!compatibleLiteralsForMerge(ulits, vlits)) { + return false; + } + + // Check preds are compatible as well. + if (!checkPredDelays(build, targets_1, targets_2) + || !checkPredDelays(build, targets_2, targets_1)) { + return false; + } + + DEBUG_PRINTF("vertex sets are mergeable\n"); + return true; +} + +/** + * In block mode, we want to be a little more selective -- we will only merge + * prefix engines when the literal sets are the same or if the merged graph + * has only grown by a small amount. + */ +static +bool goodBlockModeMerge(const RoseBuildImpl &build, + const vector &u_verts, const left_id &u_eng, + const vector &v_verts, + const left_id &v_eng) { + assert(!build.cc.streaming); + + // Always merge infixes if we can (subject to the other criteria in + // mergeableRoseVertices). + if (!build.isRootSuccessor(u_verts.front())) { + return true; + } + + const RoseGraph &g = build.g; + + flat_set u_lits; + for (RoseVertex u : u_verts) { + insert(&u_lits, g[u].literals); + } + + flat_set v_lits; + for (RoseVertex v : v_verts) { + insert(&v_lits, g[v].literals); + } + + // Merge prefixes with identical literal sets (as we'd have to run them + // both when we see those literals anyway). + if (u_lits == v_lits) { + return true; + } + + // The rest of this function only deals with the case when have graph + // leftfixes. + if (!u_eng.graph()) { + return false; + } + assert(v_eng.graph()); + const NGHolder &ug = *u_eng.graph(); + const NGHolder &vg = *v_eng.graph(); + + size_t u_count = num_vertices(ug); + size_t v_count = num_vertices(vg); + DEBUG_PRINTF("u prefix has %zu vertices, v prefix has %zu vertices\n", + u_count, v_count); + if (u_count > MAX_BLOCK_PREFIX_MERGE_VERTICES || + v_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { + DEBUG_PRINTF("prefixes too big already\n"); + return false; + } + + DEBUG_PRINTF("trying merge\n"); + NGHolder h; + cloneHolder(h, vg); + if (!mergeNfaPair(ug, h, nullptr, build.cc)) { + DEBUG_PRINTF("couldn't merge\n"); + return false; + } + + const size_t merged_count = num_vertices(h); + DEBUG_PRINTF("merged result has %zu vertices\n", merged_count); + if (merged_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { + DEBUG_PRINTF("exceeded limit\n"); + return false; + } + + // We want to only perform merges that take advantage of some + // commonality in the two input graphs, so we check that the number of + // vertices has only grown a small amount: somewhere between the sum + // (no commonality) and the max (no growth at all) of the vertex counts + // of the input graphs. + size_t max_size = u_count + v_count; + size_t min_size = max(u_count, v_count); + size_t max_growth = ((max_size - min_size) * 25) / 100; + if (merged_count > min_size + max_growth) { + DEBUG_PRINTF("grew too much\n"); + return false; + } + + // We don't want to squander any chances at accelerating. + if (!isAccelerableLeftfix(build, h) + && (isAccelerableLeftfix(build, ug) + || isAccelerableLeftfix(build, vg))) { + DEBUG_PRINTF("would lose accel property\n"); + return false; + } + + DEBUG_PRINTF("safe to merge\n"); + return true; +} + +/** + * Merge r1 into r2 if safe and appropriate. Returns true on success. + */ +static +bool mergeLeftVL_tryMergeCandidate(RoseBuildImpl &build, left_id &r1, + const vector &targets_1, + left_id &r2, + const vector &targets_2) { + if (targets_1.empty() || targets_2.empty()) { + /* one of the engines has already been merged away */ + return false; + } + + assert(!r1.graph() == !r2.graph()); + if (r1.graph()) { + NGHolder *h1 = r1.graph(); + NGHolder *h2 = r2.graph(); + CharReach stop1 = findStopAlphabet(*h1, SOM_NONE); + CharReach stop2 = findStopAlphabet(*h2, SOM_NONE); + CharReach stopboth = stop1 & stop2; + DEBUG_PRINTF("stop1=%zu, stop2=%zu, stopboth=%zu\n", stop1.count(), + stop2.count(), stopboth.count()); + if (stopboth.count() < 10 + && (stop1.count() > 10 || stop2.count() > 10)) { + DEBUG_PRINTF("skip merge, would kill stop alphabet\n"); + return false; + } + size_t maxstop = max(stop1.count(), stop2.count()); + if (maxstop > 200 && stopboth.count() < 200) { + DEBUG_PRINTF("skip merge, would reduce stop alphabet\n"); + return false; + } + } + + /* Rechecking that the targets are compatible, as we may have already + * merged new states into r1 or r2 and we need to verify that this + * candidate is still ok. */ + if (!mergeLeftVL_checkTargetsCompatible(build, targets_1, targets_2)) { + return false; + } + + if (!build.cc.streaming + && !goodBlockModeMerge(build, targets_1, r1, targets_2, r2)) { + return false; + } + + return mergeLeftfixPair(build, r1, r2, targets_1, targets_2); } static @@ -1416,30 +1563,6 @@ bool nfaHasFiniteMaxWidth(const NGHolder &g) { return findMaxWidth(g).is_finite(); } -namespace { -struct RoseMergeKey { - RoseMergeKey(const set &parents_in, - bool narrowStart_in, bool hasMaxWidth_in) : - narrowStart(narrowStart_in), - hasMaxWidth(hasMaxWidth_in), - parents(parents_in) {} - bool operator<(const RoseMergeKey &b) const { - const RoseMergeKey &a = *this; - ORDER_CHECK(narrowStart); - ORDER_CHECK(hasMaxWidth); - ORDER_CHECK(parents); - return false; - } - - // NOTE: these two bool discriminators are only used for prefixes, not - // infixes. - bool narrowStart; - bool hasMaxWidth; - - set parents; -}; -} - static bool hasReformedStartDotStar(const NGHolder &h, const Grey &grey) { if (!proper_out_degree(h.startDs, h)) { @@ -1472,14 +1595,73 @@ u32 commonPrefixLength(left_id &r1, left_id &r2) { return 0; } +namespace { +struct MergeKey { + MergeKey(const left_id &left, flat_set parents_in) : + parents(std::move(parents_in)) { + + // We want to distinguish prefixes (but not infixes) on whether they + // have a narrow start or max width. + if (left.graph() && !is_triggered(*left.graph())) { + const NGHolder &h = *left.graph(); + narrowStart = nfaHasNarrowStart(h); + hasMaxWidth = nfaHasFiniteMaxWidth(h); + } else { + narrowStart = false; + hasMaxWidth = false; + } + + if (left.castle()) { + /* castles should have a non-empty reach */ + assert(left.castle()->reach().any()); + castle_cr = left.castle()->reach(); + } else { + assert(left.graph()); + } + } + + bool operator<(const MergeKey &b) const { + const MergeKey &a = *this; + ORDER_CHECK(narrowStart); + ORDER_CHECK(hasMaxWidth); + ORDER_CHECK(castle_cr); + ORDER_CHECK(parents); + return false; + } + + // NOTE: these two bool discriminators are only used for prefixes, not + // infixes. + bool narrowStart; + bool hasMaxWidth; + CharReach castle_cr; /* empty for graphs, reach (non-empty) for castles. */ + + flat_set parents; +}; +} + +template +static +void chunk(vector in, vector> *out, size_t chunk_size) { + if (in.size() <= chunk_size) { + out->push_back(std::move(in)); + return; + } + + out->push_back(vector()); + out->back().reserve(chunk_size); + for (const auto &t : in) { + if (out->back().size() >= chunk_size) { + out->push_back(vector()); + out->back().reserve(chunk_size); + } + out->back().push_back(std::move(t)); + } +} + /** * This pass attempts to merge prefix/infix engines which share a common set of * parent vertices. * - * TODO: this function should be rewritten as it assumes all instances of an - * engine have the same set of parent vertices. This can cause the same set of - * merges to be attempted multiple times. - * * Engines are greedily merged pairwise by this process based on a priority * queue keyed off the common prefix length. * @@ -1487,13 +1669,9 @@ u32 commonPrefixLength(left_id &r1, left_id &r2) { * the stop alphabet. * * Infixes: - * - LBR candidates are not considered. However, LBRs which have already been - * converted to castles are considered for merging with other castles. - * TODO: Check if we can still have LBR candidates at this stage and if these - * criteria still makes sense and then add explanation as to why there are - * both castles and graphs which are LBR candidates at this stage. * - It is expected that when this is run all infixes are still at the single - * top stage. + * top stage as we have not yet merged unrelated infixes together. After + * execution, castles may have multiple (but equivalent) tops. * * Prefixes: * - transient prefixes are not considered. @@ -1503,142 +1681,140 @@ u32 commonPrefixLength(left_id &r1, left_id &r2) { * - merges are not considered in cases where dot star start state will be * reformed to optimise a leading repeat. */ -void mergeLeftfixesVariableLag(RoseBuildImpl &tbi) { - if (!tbi.cc.grey.mergeRose) { +void mergeLeftfixesVariableLag(RoseBuildImpl &build) { + if (!build.cc.grey.mergeRose) { return; } - assert(!hasOrphanedTops(tbi)); + assert(!hasOrphanedTops(build)); - map rosesByParent; - RoseGraph &g = tbi.g; - set parents; + RoseGraph &g = build.g; + + insertion_ordered_map> eng_verts; DEBUG_PRINTF("-----\n"); DEBUG_PRINTF("entry\n"); DEBUG_PRINTF("-----\n"); for (auto v : vertices_range(g)) { - if (!g[v].left) { + const auto &left = g[v].left; + if (!left) { continue; } + eng_verts[left].push_back(v); + } - const bool is_prefix = tbi.isRootSuccessor(v); - + map> engine_groups; + for (const auto &e : eng_verts) { + const left_id &left = e.first; + const auto &verts = e.second; // Only non-transient for the moment. - if (contains(tbi.transient, g[v].left)) { + if (contains(build.transient, left)) { continue; } // No forced McClellan or Haig infix merges. - if (g[v].left.dfa || (!is_prefix && g[v].left.haig)) { + if (left.dfa() || left.haig()) { continue; } + assert(left.graph() || left.castle()); - if (g[v].left.graph) { - NGHolder &h = *g[v].left.graph; + if (left.graph()) { + const NGHolder &h = *left.graph(); + /* we should not have merged yet */ + assert(!is_triggered(h) || onlyOneTop(h)); - /* Ensure that kind on the graph is correct */ - assert(h.kind == (is_prefix ? NFA_PREFIX : NFA_INFIX)); - - if (hasReformedStartDotStar(h, tbi.cc.grey)) { + if (hasReformedStartDotStar(h, build.cc.grey)) { continue; // preserve the optimisation of the leading repeat } + } else { + assert(left.castle()); - if (!is_prefix && isLargeLBR(h, tbi.cc.grey)) { + if (!build.cc.grey.allowCastle) { + DEBUG_PRINTF("castle merging disallowed by greybox\n"); continue; } } - if (g[v].left.castle && !tbi.cc.grey.allowCastle) { - DEBUG_PRINTF("castle merging disallowed by greybox\n"); - continue; - } - // We collapse the anchored root into the root vertex when calculating // parents, so that we can merge differently-anchored prefix roses // together. (Prompted by UE-2100) - /* TODO: check this if this still does anything given that - * mergeableRoseVertices() does a strict check. - */ - parents.clear(); - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (tbi.isAnyStart(u)) { - parents.insert(tbi.root); - } else { - parents.insert(u); - } + flat_set parents; + for (RoseVertex v : verts) { + insert(&parents, inv_adjacent_vertices_range(v, g)); } - if (parents.empty()) { - assert(0); - continue; + if (contains(parents, build.anchored_root)) { + parents.erase(build.anchored_root); + parents.insert(build.root); } - // We want to distinguish prefixes (but not infixes) on whether they - // have a narrow start or max width. - bool narrowStart = false, hasMaxWidth = false; - if (is_prefix && g[v].left.graph) { - const NGHolder &h = *g[v].left.graph; - narrowStart = nfaHasNarrowStart(h); - hasMaxWidth = nfaHasFiniteMaxWidth(h); - } + assert(!parents.empty()); - RoseMergeKey key(parents, narrowStart, hasMaxWidth); - rosesByParent[key].insert(g[v].left, v); + engine_groups[MergeKey(left, parents)].push_back(left); } - for (auto &m : rosesByParent) { - if (m.second.size() < 2) { + vector> chunks; + for (auto &raw_group : engine_groups | map_values) { + chunk(move(raw_group), &chunks, MERGE_GROUP_SIZE_MAX); + } + engine_groups.clear(); + + DEBUG_PRINTF("chunked roses into %zu groups\n", chunks.size()); + + for (auto &roses : chunks) { + if (roses.size() < 2) { continue; } + // All pairs on the prio queue. + u32 tie_breaker = 0; + priority_queue pq; + for (auto it = roses.begin(), ite = roses.end(); it != ite; ++it) { + left_id r1 = *it; + const vector &targets_1 = eng_verts[r1]; - deque rose_groups; - chunkBouquets(m.second, rose_groups, MERGE_GROUP_SIZE_MAX); - m.second.clear(); - DEBUG_PRINTF("chunked roses into %zu groups\n", rose_groups.size()); + for (auto jt = next(it); jt != ite; ++jt) { + left_id r2 = *jt; - for (auto &roses : rose_groups) { - // All pairs on the prio queue. - u32 tie_breaker = 0; - priority_queue pq; - for (auto it = roses.begin(), ite = roses.end(); it != ite; ++it) { - left_id r1 = *it; - const deque &verts1 = roses.vertices(r1); + /* we should have already split on engine types and reach */ + assert(!r1.castle() == !r2.castle()); + assert(!r1.graph() == !r2.graph()); + assert(!r1.castle() + || r1.castle()->reach() == r2.castle()->reach()); - for (auto jt = next(it); jt != ite; ++jt) { - left_id r2 = *jt; - - // Roses must be of the same engine type to be mergeable. - if ((!r1.graph() != !r2.graph()) || - (!r1.castle() != !r2.castle())) { - continue; - } - - // Castles must have the same reach to be mergeable. - if (r1.castle()) { - if (r1.castle()->reach() != r2.castle()->reach()) { - continue; - } - } - - const deque &verts2 = roses.vertices(r2); - if (!mergeableRoseVertices(tbi, verts1, verts2)) { - continue; // No point queueing unmergeable cases. - } - - u32 cpl = commonPrefixLength(r1, r2); - pq.push(RoseMergeCandidate(r1, r2, cpl, tie_breaker++)); + const vector &targets_2 = eng_verts[r2]; + if (!mergeLeftVL_checkTargetsCompatible(build, targets_1, + targets_2)) { + continue; // No point queueing unmergeable cases. } + + u32 cpl = commonPrefixLength(r1, r2); + pq.push(RoseMergeCandidate(r1, r2, cpl, tie_breaker++)); + } + } + + DEBUG_PRINTF("merge queue has %zu entries\n", pq.size()); + + while (!pq.empty()) { + left_id r1 = pq.top().r1; + left_id r2 = pq.top().r2; + DEBUG_PRINTF("pq pop h1=%p, h2=%p, cpl=%u, states=%u\n", + r1.graph(), r2.graph(), pq.top().cpl, pq.top().states); + pq.pop(); + vector &targets_1 = eng_verts[r1]; + vector &targets_2 = eng_verts[r2]; + if (mergeLeftVL_tryMergeCandidate(build, r1, targets_1, r2, + targets_2)) { + insert(&targets_2, targets_2.end(), targets_1); + targets_1.clear(); } - processMergeQueue(tbi, roses, pq); } } DEBUG_PRINTF("-----\n"); DEBUG_PRINTF("exit\n"); DEBUG_PRINTF("-----\n"); - assert(!hasOrphanedTops(tbi)); + assert(!hasOrphanedTops(build)); } namespace { From d9030805de7e260c5ad9d7a66a08220b7366a0cd Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Thu, 10 Aug 2017 11:33:15 +1000 Subject: [PATCH 149/190] cmake: use abi-version on older gcc --- CMakeLists.txt | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 398c5d0c..71f8a6d2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -248,7 +248,13 @@ else() endif() if(CMAKE_COMPILER_IS_GNUCXX) - set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fabi-version=0 -Wno-unused-local-typedefs -Wno-maybe-uninitialized") + set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-local-typedefs -Wno-maybe-uninitialized") + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0) + set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fabi-version=0") + endif () + # don't complain about abi + set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-abi") + set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-abi") endif() if (NOT(ARCH_IA32 AND RELEASE_BUILD)) @@ -256,11 +262,6 @@ else() set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-omit-frame-pointer") endif() - if (RELEASE_BUILD) - # we don't need the noise of ABI warnings in a release build - set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Wno-abi") - set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-abi") - endif () if (CMAKE_C_COMPILER_ID MATCHES "Intel") set(SKYLAKE_FLAG "-xCORE-AVX512") From 96f57a9c2e565c303faac3e82b1fa567ec9a99b5 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 2 Aug 2017 11:38:14 +1000 Subject: [PATCH 150/190] icc: disable more warnings --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 71f8a6d2..7962b29b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -420,10 +420,10 @@ endif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") if(NOT WIN32) if(CMAKE_C_COMPILER_ID MATCHES "Intel") - SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -diag-error 10006 -diag-disable 177 -diag-disable 2304 -diag-disable 2305 -diag-disable 2338 -diag-disable 1418 -diag-disable=remark") + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -diag-error 10006 -diag-disable 68 -diag-disable 177 -diag-disable 186 -diag-disable 2304 -diag-disable 2305 -diag-disable 2338 -diag-disable 1418 -diag-disable=remark") endif() if(CMAKE_CXX_COMPILER_ID MATCHES "Intel") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -diag-error 10006 -diag-disable 177 -diag-disable 2304 -diag-disable 2305 -diag-disable 2338 -diag-disable 1418 -diag-disable 1170 -diag-disable 3373 -diag-disable=remark") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -diag-error 10006 -diag-disable 68 -diag-disable 177 -diag-disable 186 -diag-disable 2304 -diag-disable 2305 -diag-disable 2338 -diag-disable 1418 -diag-disable 1170 -diag-disable 3373 -diag-disable=remark") endif() endif() From b259283d6b86df9c0baed9d1f24dcb39bdc0dca9 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Thu, 10 Aug 2017 15:45:42 +1000 Subject: [PATCH 151/190] cmake: set isystem flag for older cmake --- CMakeLists.txt | 14 +++++++------- tools/hsbench/CMakeLists.txt | 7 +------ unit/CMakeLists.txt | 7 +------ 3 files changed, 9 insertions(+), 19 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7962b29b..fb9c7a4e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,6 +38,7 @@ endif() set(BINDIR "${PROJECT_BINARY_DIR}/bin") set(LIBDIR "${PROJECT_BINARY_DIR}/lib") + set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR}) # First for the generic no-config case @@ -57,6 +58,11 @@ if(CMAKE_GENERATOR STREQUAL Xcode) set(XCODE TRUE) endif() +# older versions of cmake don't know things support isystem +if (XCODE OR CMAKE_CXX_COMPILER_ID MATCHES "Intel") + set(CMAKE_INCLUDE_SYSTEM_FLAG_CXX "-isystem") +endif () + set(CMAKE_INCLUDE_CURRENT_DIR 1) include_directories(${PROJECT_SOURCE_DIR}/src) include_directories(${PROJECT_BINARY_DIR}) @@ -402,13 +408,7 @@ CHECK_CXX_COMPILER_FLAG("-Wunused-variable" CXX_WUNUSED_VARIABLE) endif() -if (NOT XCODE) - include_directories(SYSTEM ${Boost_INCLUDE_DIRS}) -else() - # cmake doesn't think Xcode supports isystem - set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -isystem ${Boost_INCLUDE_DIRS}") -endif() - +include_directories(SYSTEM ${Boost_INCLUDE_DIRS}) if(CMAKE_SYSTEM_NAME MATCHES "Linux") set(LINUX TRUE) diff --git a/tools/hsbench/CMakeLists.txt b/tools/hsbench/CMakeLists.txt index 9b2cde4d..a8792cf7 100644 --- a/tools/hsbench/CMakeLists.txt +++ b/tools/hsbench/CMakeLists.txt @@ -4,12 +4,7 @@ if (NOT SQLITE3_FOUND) return() endif() -if (NOT XCODE) - include_directories(SYSTEM ${SQLITE3_INCLUDE_DIRS}) -else() - # cmake doesn't think Xcode supports isystem - set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -isystem ${SQLITE3_INCLUDE_DIRS}") -endif() +include_directories(SYSTEM ${SQLITE3_INCLUDE_DIRS}) # BSD has the _np funcs in a _np header CHECK_INCLUDE_FILE_CXX(pthread_np.h HAVE_PTHREAD_NP_H) diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index 6f8a8bf4..e9d1821b 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -2,12 +2,7 @@ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") set(gtest_SOURCES gtest/gtest-all.cc gtest/gtest.h) -if(NOT XCODE) - include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}) -else() - set(CMAKE_CXX_FLAGS "-isystem ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CXX_FLAGS}") -endif() - +include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}) include_directories(${PROJECT_SOURCE_DIR}) # remove some warnings From 2a492273b536c83bd9bb265a96a3102a76a30ac5 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 10 Aug 2017 13:43:20 +1000 Subject: [PATCH 152/190] remove !LBR constraints from merge passes we have either converted candidates to castles already or we have converted them back in the hope of merging them with other holders --- src/nfagraph/ng_lbr.cpp | 20 -------------------- src/nfagraph/ng_lbr.h | 3 --- src/rose/rose_build_merge.cpp | 33 ++++++--------------------------- unit/internal/lbr.cpp | 2 -- 4 files changed, 6 insertions(+), 52 deletions(-) diff --git a/src/nfagraph/ng_lbr.cpp b/src/nfagraph/ng_lbr.cpp index 9bf16efe..d8ba503c 100644 --- a/src/nfagraph/ng_lbr.cpp +++ b/src/nfagraph/ng_lbr.cpp @@ -346,24 +346,4 @@ bytecode_ptr constructLBR(const NGHolder &g, return constructLBR(proto, triggers, cc, rm); } -/** \brief True if graph \p g could be turned into an LBR engine. */ -bool isLBR(const NGHolder &g, const Grey &grey) { - if (!grey.allowLbr) { - return false; - } - - PureRepeat repeat; - if (!isPureRepeat(g, repeat)) { - DEBUG_PRINTF("not pure bounded repeat\n"); - return false; - } - - if (repeat.reports.size() != 1) { - DEBUG_PRINTF("too many reports\n"); - return false; - } - - return true; -} - } // namespace ue2 diff --git a/src/nfagraph/ng_lbr.h b/src/nfagraph/ng_lbr.h index 1eec9653..c181dbb9 100644 --- a/src/nfagraph/ng_lbr.h +++ b/src/nfagraph/ng_lbr.h @@ -66,9 +66,6 @@ constructLBR(const CastleProto &proto, const std::vector> &triggers, const CompileContext &cc, const ReportManager &rm); -/** \brief True if graph \p g could be turned into an LBR engine. */ -bool isLBR(const NGHolder &g, const Grey &grey); - } // namespace ue2 #endif // NG_LBR_H diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index 0d73c101..fb7b3a1f 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -101,10 +101,6 @@ static const size_t DFA_CHUNK_SIZE_MAX = 200; /** \brief Max DFA states in a merged DFA. */ static const size_t DFA_MERGE_MAX_STATES = 8000; -/** \brief An LBR must have at least this many vertices to be protected from - * merging with other graphs. */ -static const size_t LARGE_LBR_MIN_VERTICES = 32; - /** \brief In block mode, merge two prefixes even if they don't have identical * literal sets if they have fewer than this many states and the merged graph * is also small. */ @@ -122,14 +118,6 @@ size_t small_rose_threshold(const CompileContext &cc) { : SMALL_ROSE_THRESHOLD_BLOCK; } -static -bool isLargeLBR(const NGHolder &g, const Grey &grey) { - if (num_vertices(g) < LARGE_LBR_MIN_VERTICES) { - return false; - } - return isLBR(g, grey); -} - namespace { struct DupeLeafKey { explicit DupeLeafKey(const RoseVertexProps &litv) @@ -1889,6 +1877,12 @@ void dedupeLeftfixesVariableLag(RoseBuildImpl &tbi) { continue; } + if (leftfix.graph()) { + /* we should not have merged yet */ + assert(!is_triggered(*leftfix.graph()) + || onlyOneTop(*leftfix.graph())); + } + roseGrouping[DedupeLeftKey(tbi, v)].insert(leftfix, v); } @@ -2275,11 +2269,6 @@ void mergeSmallLeftfixes(RoseBuildImpl &tbi) { continue; } - // Don't merge cases that will become LBRs or haigs. - if (isLargeLBR(h, tbi.cc.grey)) { - continue; - } - // Small roses only. if (num_vertices(h) > small_rose_threshold(tbi.cc)) { continue; @@ -2497,11 +2486,6 @@ void mergeAcyclicSuffixes(RoseBuildImpl &tbi) { continue; } - if (isLargeLBR(*h, tbi.cc.grey)) { - DEBUG_PRINTF("not considering LBR suffix for merge\n"); - continue; - } - suffixes.insert(g[v].suffix, v); } @@ -2564,11 +2548,6 @@ void mergeSmallSuffixes(RoseBuildImpl &tbi) { continue; } - if (isLargeLBR(*h, tbi.cc.grey)) { - DEBUG_PRINTF("not considering LBR suffix for merge\n"); - continue; - } - suffixes.insert(g[v].suffix, v); } diff --git a/unit/internal/lbr.cpp b/unit/internal/lbr.cpp index d32f7e8f..2c585ae5 100644 --- a/unit/internal/lbr.cpp +++ b/unit/internal/lbr.cpp @@ -101,8 +101,6 @@ protected: ASSERT_TRUE(g != nullptr); clearReports(*g); - ASSERT_TRUE(isLBR(*g, grey)); - rm.setProgramOffset(0, MATCH_REPORT); /* LBR triggered by dot */ From a97cdba8cca6988c5ebce199148247c1d70768ee Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 9 Aug 2017 13:31:45 +1000 Subject: [PATCH 153/190] rose merges: why not dedupe transient engines? We avoid merging different transient engines as it may force us to run heavier engines and no stream state is consumed either way. However, there should be no harm in just removing duplicate instances of a transient engine. --- src/rose/rose_build_merge.cpp | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index fb7b3a1f..6bd76381 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -517,8 +517,8 @@ private: * * Note: only roles with a single predecessor vertex are considered for this * transform - it should probably be generalised to work for roles which share - * the same set of predecessor roles as for \ref dedupeLeftfixesVariableLag or it - * should be retired entirely. + * the same set of predecessor roles as for \ref dedupeLeftfixesVariableLag or + * it should be retired entirely. */ bool dedupeLeftfixes(RoseBuildImpl &tbi) { DEBUG_PRINTF("deduping leftfixes\n"); @@ -1812,7 +1812,8 @@ namespace { */ struct DedupeLeftKey { DedupeLeftKey(const RoseBuildImpl &build, RoseVertex v) - : left_hash(hashLeftfix(build.g[v].left)) { + : left_hash(hashLeftfix(build.g[v].left)), + transient(contains(build.transient, build.g[v].left)) { const auto &g = build.g; for (const auto &e : in_edges_range(v, g)) { preds.emplace(g[source(e, g)].index, g[e].rose_top); @@ -1820,7 +1821,8 @@ struct DedupeLeftKey { } bool operator<(const DedupeLeftKey &b) const { - return tie(left_hash, preds) < tie(b.left_hash, b.preds); + return tie(left_hash, preds, transient) + < tie(b.left_hash, b.preds, b.transient); } private: @@ -1830,6 +1832,9 @@ private: /** For each in-edge, the pair of (parent index, edge top). */ set> preds; + + /** We don't want to combine transient with non-transient. */ + bool transient; }; } // namespace @@ -1851,15 +1856,18 @@ private: * successor may want to inspect it; the overlap relationships between the * involved literals are examined to ensure that this property holds. * + * Note: this is unable to dedupe when delayed literals are involved unlike + * dedupeLeftfixes. + * * Note: in block mode we restrict the dedupe of prefixes further as some of * logic checks are shared with the mergeLeftfix functions. */ -void dedupeLeftfixesVariableLag(RoseBuildImpl &tbi) { +void dedupeLeftfixesVariableLag(RoseBuildImpl &build) { map roseGrouping; DEBUG_PRINTF("entry\n"); - RoseGraph &g = tbi.g; + RoseGraph &g = build.g; for (auto v : vertices_range(g)) { if (!g[v].left) { continue; @@ -1867,11 +1875,6 @@ void dedupeLeftfixesVariableLag(RoseBuildImpl &tbi) { const left_id leftfix(g[v].left); - // Only non-transient for the moment. - if (contains(tbi.transient, leftfix)) { - continue; - } - if (leftfix.haig()) { /* TODO: allow merging of identical haigs */ continue; @@ -1883,7 +1886,7 @@ void dedupeLeftfixesVariableLag(RoseBuildImpl &tbi) { || onlyOneTop(*leftfix.graph())); } - roseGrouping[DedupeLeftKey(tbi, v)].insert(leftfix, v); + roseGrouping[DedupeLeftKey(build, v)].insert(leftfix, v); } for (RoseBouquet &roses : roseGrouping | map_values) { @@ -1907,7 +1910,7 @@ void dedupeLeftfixesVariableLag(RoseBuildImpl &tbi) { continue; } - if (!mergeableRoseVertices(tbi, verts1, verts2)) { + if (!mergeableRoseVertices(build, verts1, verts2)) { continue; } @@ -1927,6 +1930,10 @@ void dedupeLeftfixesVariableLag(RoseBuildImpl &tbi) { g[v].left.lag = orig_lag; } roses.insert(r2, verts1); + + /* remove stale entry from transient set, if present */ + build.transient.erase(r1); + // no need to erase h1 from roses, that would invalidate `it'. break; } From 47e64646b4d7d07ed7261eef742137f7e4a34b07 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 10 Aug 2017 15:42:26 +1000 Subject: [PATCH 154/190] move mergeDupeLeaves() and uncalcLeaves() to rose_build_role_aliasing Unlike the rest of rose_build_mergem, these functions relate to merging roles/vertices rather than merging engines. --- src/rose/rose_build_merge.cpp | 297 ------------------------- src/rose/rose_build_merge.h | 9 +- src/rose/rose_build_role_aliasing.cpp | 302 ++++++++++++++++++++++++++ src/rose/rose_build_role_aliasing.h | 14 +- unit/internal/rose_build_merge.cpp | 1 + 5 files changed, 317 insertions(+), 306 deletions(-) diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index 6bd76381..04d5e7d0 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -118,303 +118,6 @@ size_t small_rose_threshold(const CompileContext &cc) { : SMALL_ROSE_THRESHOLD_BLOCK; } -namespace { -struct DupeLeafKey { - explicit DupeLeafKey(const RoseVertexProps &litv) - : literals(litv.literals), reports(litv.reports), - eod_accept(litv.eod_accept), suffix(litv.suffix), left(litv.left), - som_adjust(litv.som_adjust) { - DEBUG_PRINTF("eod_accept %d\n", (int)eod_accept); - DEBUG_PRINTF("report %u\n", left.leftfix_report); - DEBUG_PRINTF("lag %u\n", left.lag); - } - - bool operator<(const DupeLeafKey &b) const { - const DupeLeafKey &a = *this; - ORDER_CHECK(literals); - ORDER_CHECK(eod_accept); - ORDER_CHECK(suffix); - ORDER_CHECK(reports); - ORDER_CHECK(som_adjust); - ORDER_CHECK(left.leftfix_report); - ORDER_CHECK(left.lag); - return false; - } - - flat_set literals; - flat_set reports; - bool eod_accept; - suffix_id suffix; - LeftEngInfo left; - u32 som_adjust; -}; - -struct UncalcLeafKey { - UncalcLeafKey(const RoseGraph &g, RoseVertex v) - : literals(g[v].literals), rose(g[v].left) { - for (const auto &e : in_edges_range(v, g)) { - RoseVertex u = source(e, g); - preds.insert(make_pair(u, g[e])); - } - } - - bool operator<(const UncalcLeafKey &b) const { - const UncalcLeafKey &a = *this; - ORDER_CHECK(literals); - ORDER_CHECK(preds); - ORDER_CHECK(rose); - return false; - } - - flat_set literals; - flat_set> preds; - LeftEngInfo rose; -}; -} // namespace - -/** - * This function merges leaf vertices with the same literals and report - * id/suffix. The leaf vertices of the graph are inspected and a mapping of - * leaf vertex properties to vertices is built. If the same set of leaf - * properties has already been seen when we inspect a vertex, we attempt to - * merge the vertex in with the previously seen vertex. This process can fail - * if the vertices share a common predecessor vertex but have a differing, - * incompatible relationship (different bounds or infix) with the predecessor. - * - * This takes place after \ref dedupeSuffixes to increase effectiveness as the - * same suffix is required for a merge to occur. - */ -void mergeDupeLeaves(RoseBuildImpl &tbi) { - map leaves; - vector changed; - - RoseGraph &g = tbi.g; - for (auto v : vertices_range(g)) { - if (in_degree(v, g) == 0) { - assert(tbi.isAnyStart(v)); - continue; - } - - DEBUG_PRINTF("inspecting vertex index=%zu in_degree %zu " - "out_degree %zu\n", g[v].index, in_degree(v, g), - out_degree(v, g)); - - // Vertex must be a reporting leaf node - if (g[v].reports.empty() || !isLeafNode(v, g)) { - continue; - } - - // At the moment, we ignore all successors of root or anchored_root, - // since many parts of our runtime assume that these have in-degree 1. - if (tbi.isRootSuccessor(v)) { - continue; - } - - DupeLeafKey dupe(g[v]); - if (leaves.find(dupe) == leaves.end()) { - leaves.insert(make_pair(dupe, v)); - continue; - } - - RoseVertex t = leaves.find(dupe)->second; - DEBUG_PRINTF("found two leaf dupe roles, index=%zu,%zu\n", g[v].index, - g[t].index); - - vector deadEdges; - for (const auto &e : in_edges_range(v, g)) { - RoseVertex u = source(e, g); - DEBUG_PRINTF("u index=%zu\n", g[u].index); - if (RoseEdge et = edge(u, t, g)) { - if (g[et].minBound <= g[e].minBound - && g[et].maxBound >= g[e].maxBound) { - DEBUG_PRINTF("remove more constrained edge\n"); - deadEdges.push_back(e); - } - } else { - DEBUG_PRINTF("rehome edge: add %zu->%zu\n", g[u].index, - g[t].index); - add_edge(u, t, g[e], g); - deadEdges.push_back(e); - } - } - - if (!deadEdges.empty()) { - for (auto &e : deadEdges) { - remove_edge(e, g); - } - changed.push_back(v); - g[t].min_offset = min(g[t].min_offset, g[v].min_offset); - g[t].max_offset = max(g[t].max_offset, g[v].max_offset); - } - } - DEBUG_PRINTF("find loop done\n"); - - // Remove any vertices that now have no in-edges. - size_t countRemovals = 0; - for (size_t i = 0; i < changed.size(); i++) { - RoseVertex v = changed[i]; - if (in_degree(v, g) == 0) { - DEBUG_PRINTF("remove vertex\n"); - if (!tbi.isVirtualVertex(v)) { - for (u32 lit_id : g[v].literals) { - tbi.literal_info[lit_id].vertices.erase(v); - } - } - remove_vertex(v, g); - countRemovals++; - } - } - - // if we've removed anything, we need to renumber vertices - if (countRemovals) { - renumber_vertices(g); - DEBUG_PRINTF("removed %zu vertices.\n", countRemovals); - } -} - -/** Merges the suffixes on the (identical) vertices in \a vcluster, used by - * \ref uncalcLeaves. */ -static -void mergeCluster(RoseGraph &g, const ReportManager &rm, - const vector &vcluster, - vector &dead, const CompileContext &cc) { - if (vcluster.size() <= 1) { - return; // No merge to perform. - } - - // Note that we batch merges up fairly crudely for performance reasons. - vector::const_iterator it = vcluster.begin(), it2; - while (it != vcluster.end()) { - vector cluster; - map rev; - - for (it2 = it; - it2 != vcluster.end() && cluster.size() < MERGE_GROUP_SIZE_MAX; - ++it2) { - RoseVertex v = *it2; - NGHolder *h = g[v].suffix.graph.get(); - assert(!g[v].suffix.haig); /* should not be here if haig */ - rev[h] = v; - cluster.push_back(h); - } - it = it2; - - DEBUG_PRINTF("merging cluster %zu\n", cluster.size()); - auto merged = mergeNfaCluster(cluster, &rm, cc); - DEBUG_PRINTF("done\n"); - - for (const auto &m : merged) { - NGHolder *h_victim = m.first; // mergee - NGHolder *h_winner = m.second; - RoseVertex victim = rev[h_victim]; - RoseVertex winner = rev[h_winner]; - - LIMIT_TO_AT_MOST(&g[winner].min_offset, g[victim].min_offset); - ENSURE_AT_LEAST(&g[winner].max_offset, g[victim].max_offset); - insert(&g[winner].reports, g[victim].reports); - - dead.push_back(victim); - } - } -} - -static -void findUncalcLeavesCandidates(RoseBuildImpl &tbi, - map > &clusters, - deque &ordered) { - const RoseGraph &g = tbi.g; - - vector suffix_vertices; // vertices with suffix graphs - unordered_map fcount; // ref count per graph - - for (auto v : vertices_range(g)) { - if (g[v].suffix) { - if (!g[v].suffix.graph) { - continue; /* cannot uncalc (haig/mcclellan); TODO */ - } - - assert(g[v].suffix.graph->kind == NFA_SUFFIX); - - // Ref count all suffixes, as we don't want to merge a suffix - // that happens to be shared with a non-leaf vertex somewhere. - DEBUG_PRINTF("vertex %zu has suffix %p\n", g[v].index, - g[v].suffix.graph.get()); - fcount[g[v].suffix.graph.get()]++; - - // Vertex must be a reporting pseudo accept - if (!isLeafNode(v, g)) { - continue; - } - - suffix_vertices.push_back(v); - } - } - - for (auto v : suffix_vertices) { - if (in_degree(v, g) == 0) { - assert(tbi.isAnyStart(v)); - continue; - } - - const NGHolder *h = g[v].suffix.graph.get(); - assert(h); - DEBUG_PRINTF("suffix %p\n", h); - - // We can't easily merge suffixes shared with other vertices, and - // creating a unique copy to do so may just mean we end up tracking - // more NFAs. Better to leave shared suffixes alone. - if (fcount[h] != 1) { - DEBUG_PRINTF("skipping shared suffix\n"); - continue; - } - - UncalcLeafKey key(g, v); - vector &vec = clusters[key]; - if (vec.empty()) { - - ordered.push_back(key); - } - vec.push_back(v); - } - - DEBUG_PRINTF("find loop done\n"); -} - -/** - * This function attempts to combine identical roles (same literals, same - * predecessors, etc) with different suffixes into a single role which - * activates a larger suffix. The leaf vertices of the graph with a suffix are - * grouped into clusters which have members triggered by identical roles. The - * \ref mergeNfaCluster function (from ng_uncalc_components) is then utilised - * to build a set of larger (and still implementable) suffixes. The graph is - * then updated to point to the new suffixes and any unneeded roles are - * removed. - * - * Note: suffixes which are shared amongst multiple roles are not considered - * for this pass as the individual suffixes would have to continue to exist for - * the other roles to trigger resulting in the transformation not producing any - * savings. - * - * Note: as \ref mergeNfaCluster is slow when the cluster sizes are large, - * clusters of more than \ref MERGE_GROUP_SIZE_MAX roles are split into smaller - * chunks for processing. - */ -void uncalcLeaves(RoseBuildImpl &tbi) { - DEBUG_PRINTF("uncalcing\n"); - - map > clusters; - deque ordered; - findUncalcLeavesCandidates(tbi, clusters, ordered); - - vector dead; - - for (const auto &key : ordered) { - DEBUG_PRINTF("cluster of size %zu\n", clusters[key].size()); - mergeCluster(tbi.g, tbi.rm, clusters[key], dead, tbi.cc); - } - tbi.removeVertices(dead); -} - /** * Returns a loose hash of a leftfix for use in dedupeLeftfixes. Note that * reports should not contribute to the hash. diff --git a/src/rose/rose_build_merge.h b/src/rose/rose_build_merge.h index 0f765bff..6de6c778 100644 --- a/src/rose/rose_build_merge.h +++ b/src/rose/rose_build_merge.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -27,8 +27,8 @@ */ /** \file - * \brief Rose Build: functions for reducing the size of the Rose graph - * through merging. + * \brief Rose Build: functions for reducing the number of engines in a Rose + * graph through merging or deduplicating engines. */ #ifndef ROSE_BUILD_MERGE_H @@ -44,9 +44,6 @@ namespace ue2 { class NGHolder; class RoseBuildImpl; -void mergeDupeLeaves(RoseBuildImpl &tbi); -void uncalcLeaves(RoseBuildImpl &tbi); - bool dedupeLeftfixes(RoseBuildImpl &tbi); void mergeLeftfixesVariableLag(RoseBuildImpl &tbi); void dedupeLeftfixesVariableLag(RoseBuildImpl &tbi); diff --git a/src/rose/rose_build_role_aliasing.cpp b/src/rose/rose_build_role_aliasing.cpp index ba71a3ea..22581caf 100644 --- a/src/rose/rose_build_role_aliasing.cpp +++ b/src/rose/rose_build_role_aliasing.cpp @@ -62,6 +62,8 @@ using boost::adaptors::map_values; namespace ue2 { +static constexpr size_t MERGE_GROUP_SIZE_MAX = 200; + namespace { // Used for checking edge sets (both in- and out-) against each other. struct EdgeAndVertex { @@ -2026,4 +2028,304 @@ void aliasRoles(RoseBuildImpl &build, bool mergeRoses) { assert(canImplementGraphs(build)); } +namespace { +struct DupeLeafKey { + explicit DupeLeafKey(const RoseVertexProps &litv) + : literals(litv.literals), reports(litv.reports), + eod_accept(litv.eod_accept), suffix(litv.suffix), left(litv.left), + som_adjust(litv.som_adjust) { + DEBUG_PRINTF("eod_accept %d\n", (int)eod_accept); + DEBUG_PRINTF("report %u\n", left.leftfix_report); + DEBUG_PRINTF("lag %u\n", left.lag); + } + + bool operator<(const DupeLeafKey &b) const { + const DupeLeafKey &a = *this; + ORDER_CHECK(literals); + ORDER_CHECK(eod_accept); + ORDER_CHECK(suffix); + ORDER_CHECK(reports); + ORDER_CHECK(som_adjust); + ORDER_CHECK(left.leftfix_report); + ORDER_CHECK(left.lag); + return false; + } + + flat_set literals; + flat_set reports; + bool eod_accept; + suffix_id suffix; + LeftEngInfo left; + u32 som_adjust; +}; + +struct UncalcLeafKey { + UncalcLeafKey(const RoseGraph &g, RoseVertex v) + : literals(g[v].literals), rose(g[v].left) { + for (const auto &e : in_edges_range(v, g)) { + RoseVertex u = source(e, g); + preds.insert(make_pair(u, g[e])); + } + } + + bool operator<(const UncalcLeafKey &b) const { + const UncalcLeafKey &a = *this; + ORDER_CHECK(literals); + ORDER_CHECK(preds); + ORDER_CHECK(rose); + return false; + } + + flat_set literals; + flat_set> preds; + LeftEngInfo rose; +}; +} // namespace + +/** + * This function merges leaf vertices with the same literals and report + * id/suffix. The leaf vertices of the graph are inspected and a mapping of + * leaf vertex properties to vertices is built. If the same set of leaf + * properties has already been seen when we inspect a vertex, we attempt to + * merge the vertex in with the previously seen vertex. This process can fail + * if the vertices share a common predecessor vertex but have a differing, + * incompatible relationship (different bounds or infix) with the predecessor. + * + * This takes place after \ref dedupeSuffixes to increase effectiveness as the + * same suffix is required for a merge to occur. + * + * TODO: work if this is a subset of role aliasing (and if it can be eliminated) + * or clearly document cases that would not be covered by role aliasing. + */ +void mergeDupeLeaves(RoseBuildImpl &build) { + map leaves; + vector changed; + + RoseGraph &g = build.g; + for (auto v : vertices_range(g)) { + if (in_degree(v, g) == 0) { + assert(build.isAnyStart(v)); + continue; + } + + DEBUG_PRINTF("inspecting vertex index=%zu in_degree %zu " + "out_degree %zu\n", g[v].index, in_degree(v, g), + out_degree(v, g)); + + // Vertex must be a reporting leaf node + if (g[v].reports.empty() || !isLeafNode(v, g)) { + continue; + } + + // At the moment, we ignore all successors of root or anchored_root, + // since many parts of our runtime assume that these have in-degree 1. + if (build.isRootSuccessor(v)) { + continue; + } + + DupeLeafKey dupe(g[v]); + if (leaves.find(dupe) == leaves.end()) { + leaves.insert(make_pair(dupe, v)); + continue; + } + + RoseVertex t = leaves.find(dupe)->second; + DEBUG_PRINTF("found two leaf dupe roles, index=%zu,%zu\n", g[v].index, + g[t].index); + + vector deadEdges; + for (const auto &e : in_edges_range(v, g)) { + RoseVertex u = source(e, g); + DEBUG_PRINTF("u index=%zu\n", g[u].index); + if (RoseEdge et = edge(u, t, g)) { + if (g[et].minBound <= g[e].minBound + && g[et].maxBound >= g[e].maxBound) { + DEBUG_PRINTF("remove more constrained edge\n"); + deadEdges.push_back(e); + } + } else { + DEBUG_PRINTF("rehome edge: add %zu->%zu\n", g[u].index, + g[t].index); + add_edge(u, t, g[e], g); + deadEdges.push_back(e); + } + } + + if (!deadEdges.empty()) { + for (auto &e : deadEdges) { + remove_edge(e, g); + } + changed.push_back(v); + g[t].min_offset = min(g[t].min_offset, g[v].min_offset); + g[t].max_offset = max(g[t].max_offset, g[v].max_offset); + } + } + DEBUG_PRINTF("find loop done\n"); + + // Remove any vertices that now have no in-edges. + size_t countRemovals = 0; + for (size_t i = 0; i < changed.size(); i++) { + RoseVertex v = changed[i]; + if (in_degree(v, g) == 0) { + DEBUG_PRINTF("remove vertex\n"); + if (!build.isVirtualVertex(v)) { + for (u32 lit_id : g[v].literals) { + build.literal_info[lit_id].vertices.erase(v); + } + } + remove_vertex(v, g); + countRemovals++; + } + } + + // if we've removed anything, we need to renumber vertices + if (countRemovals) { + renumber_vertices(g); + DEBUG_PRINTF("removed %zu vertices.\n", countRemovals); + } +} + +/** Merges the suffixes on the (identical) vertices in \a vcluster, used by + * \ref uncalcLeaves. */ +static +void mergeCluster(RoseGraph &g, const ReportManager &rm, + const vector &vcluster, + vector &dead, const CompileContext &cc) { + if (vcluster.size() <= 1) { + return; // No merge to perform. + } + + // Note that we batch merges up fairly crudely for performance reasons. + vector::const_iterator it = vcluster.begin(), it2; + while (it != vcluster.end()) { + vector cluster; + map rev; + + for (it2 = it; + it2 != vcluster.end() && cluster.size() < MERGE_GROUP_SIZE_MAX; + ++it2) { + RoseVertex v = *it2; + NGHolder *h = g[v].suffix.graph.get(); + assert(!g[v].suffix.haig); /* should not be here if haig */ + rev[h] = v; + cluster.push_back(h); + } + it = it2; + + DEBUG_PRINTF("merging cluster %zu\n", cluster.size()); + auto merged = mergeNfaCluster(cluster, &rm, cc); + DEBUG_PRINTF("done\n"); + + for (const auto &m : merged) { + NGHolder *h_victim = m.first; // mergee + NGHolder *h_winner = m.second; + RoseVertex victim = rev[h_victim]; + RoseVertex winner = rev[h_winner]; + + LIMIT_TO_AT_MOST(&g[winner].min_offset, g[victim].min_offset); + ENSURE_AT_LEAST(&g[winner].max_offset, g[victim].max_offset); + insert(&g[winner].reports, g[victim].reports); + + dead.push_back(victim); + } + } +} + +static +void findUncalcLeavesCandidates(RoseBuildImpl &build, + map > &clusters, + deque &ordered) { + const RoseGraph &g = build.g; + + vector suffix_vertices; // vertices with suffix graphs + unordered_map fcount; // ref count per graph + + for (auto v : vertices_range(g)) { + if (g[v].suffix) { + if (!g[v].suffix.graph) { + continue; /* cannot uncalc (haig/mcclellan); TODO */ + } + + assert(g[v].suffix.graph->kind == NFA_SUFFIX); + + // Ref count all suffixes, as we don't want to merge a suffix + // that happens to be shared with a non-leaf vertex somewhere. + DEBUG_PRINTF("vertex %zu has suffix %p\n", g[v].index, + g[v].suffix.graph.get()); + fcount[g[v].suffix.graph.get()]++; + + // Vertex must be a reporting pseudo accept + if (!isLeafNode(v, g)) { + continue; + } + + suffix_vertices.push_back(v); + } + } + + for (auto v : suffix_vertices) { + if (in_degree(v, g) == 0) { + assert(build.isAnyStart(v)); + continue; + } + + const NGHolder *h = g[v].suffix.graph.get(); + assert(h); + DEBUG_PRINTF("suffix %p\n", h); + + // We can't easily merge suffixes shared with other vertices, and + // creating a unique copy to do so may just mean we end up tracking + // more NFAs. Better to leave shared suffixes alone. + if (fcount[h] != 1) { + DEBUG_PRINTF("skipping shared suffix\n"); + continue; + } + + UncalcLeafKey key(g, v); + vector &vec = clusters[key]; + if (vec.empty()) { + + ordered.push_back(key); + } + vec.push_back(v); + } + + DEBUG_PRINTF("find loop done\n"); +} + +/** + * This function attempts to combine identical roles (same literals, same + * predecessors, etc) with different suffixes into a single role which + * activates a larger suffix. The leaf vertices of the graph with a suffix are + * grouped into clusters which have members triggered by identical roles. The + * \ref mergeNfaCluster function (from ng_uncalc_components) is then utilised + * to build a set of larger (and still implementable) suffixes. The graph is + * then updated to point to the new suffixes and any unneeded roles are + * removed. + * + * Note: suffixes which are shared amongst multiple roles are not considered + * for this pass as the individual suffixes would have to continue to exist for + * the other roles to trigger resulting in the transformation not producing any + * savings. + * + * Note: as \ref mergeNfaCluster is slow when the cluster sizes are large, + * clusters of more than \ref MERGE_GROUP_SIZE_MAX roles are split into smaller + * chunks for processing. + */ +void uncalcLeaves(RoseBuildImpl &build) { + DEBUG_PRINTF("uncalcing\n"); + + map > clusters; + deque ordered; + findUncalcLeavesCandidates(build, clusters, ordered); + + vector dead; + + for (const auto &key : ordered) { + DEBUG_PRINTF("cluster of size %zu\n", clusters[key].size()); + mergeCluster(build.g, build.rm, clusters[key], dead, build.cc); + } + build.removeVertices(dead); +} + } // namespace ue2 diff --git a/src/rose/rose_build_role_aliasing.h b/src/rose/rose_build_role_aliasing.h index 274b76f9..4655f10d 100644 --- a/src/rose/rose_build_role_aliasing.h +++ b/src/rose/rose_build_role_aliasing.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,8 +26,13 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef ROSE_BUILD_ROLE_ALIASING -#define ROSE_BUILD_ROLE_ALIASING +#ifndef ROSE_BUILD_ROLE_ALIASING_H +#define ROSE_BUILD_ROLE_ALIASING_H + +/** \file + * \brief Rose Build: functions for reducing the size of the Rose graph + * through merging roles (RoseVertices) together. + */ namespace ue2 { @@ -35,6 +40,9 @@ class RoseBuildImpl; void aliasRoles(RoseBuildImpl &build, bool mergeRoses); +void mergeDupeLeaves(RoseBuildImpl &build); +void uncalcLeaves(RoseBuildImpl &build); + } // namespace ue2 #endif diff --git a/unit/internal/rose_build_merge.cpp b/unit/internal/rose_build_merge.cpp index ed7c2bdc..5029f0a5 100644 --- a/unit/internal/rose_build_merge.cpp +++ b/unit/internal/rose_build_merge.cpp @@ -34,6 +34,7 @@ #include "rose/rose_build.h" #include "rose/rose_build_impl.h" #include "rose/rose_build_merge.h" +#include "rose/rose_build_role_aliasing.h" #include "util/report_manager.h" #include "util/boundary_reports.h" #include "util/compile_context.h" From 7192d47517b156384420bdc24ea69cfe79bfca25 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Mon, 14 Aug 2017 10:53:50 +1000 Subject: [PATCH 155/190] Update PCRE version to 8.41 --- doc/dev-reference/compilation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/dev-reference/compilation.rst b/doc/dev-reference/compilation.rst index 02b5c3f3..6b6d972a 100644 --- a/doc/dev-reference/compilation.rst +++ b/doc/dev-reference/compilation.rst @@ -64,7 +64,7 @@ libpcre are supported. The use of unsupported constructs will result in compilation errors. The version of PCRE used to validate Hyperscan's interpretation of this syntax -is 8.40. +is 8.41. ==================== Supported Constructs From 84030aa0fc5d2d59969635b1506a0ef5258f1cd0 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 8 Aug 2017 11:19:08 +1000 Subject: [PATCH 156/190] castlecompile: remove unused container --- src/nfa/castlecompile.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index 661c9c2c..5884ebb2 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -153,13 +153,11 @@ static void getNeighborInfo(const CliqueGraph &g, vector &neighbor, const CliqueVertex &cv, const set &group) { u32 id = g[cv].stateId; - unordered_set neighborId; // find neighbors for cv for (const auto &v : adjacent_vertices_range(cv, g)) { - if (g[v].stateId != id && contains(group, g[v].stateId)){ + if (g[v].stateId != id && contains(group, g[v].stateId)) { neighbor.push_back(g[v].stateId); - neighborId.insert(g[v].stateId); DEBUG_PRINTF("Neighbor:%u\n", g[v].stateId); } } From 14333f5b0b3f7ab790cbbcad63f1fa79440c6860 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 10 Aug 2017 10:53:39 +1000 Subject: [PATCH 157/190] rdfa: make getImplAlphaSize() inline --- src/nfa/rdfa.cpp | 4 ---- src/nfa/rdfa.h | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/src/nfa/rdfa.cpp b/src/nfa/rdfa.cpp index 077ff9ed..ae857b6a 100644 --- a/src/nfa/rdfa.cpp +++ b/src/nfa/rdfa.cpp @@ -33,10 +33,6 @@ namespace ue2 { // prevent weak vtables raw_dfa::~raw_dfa() {} -u16 raw_dfa::getImplAlphaSize() const { - return alpha_size - N_SPECIAL_SYMBOL; -} - void raw_dfa::stripExtraEodReports(void) { /* if a state generates a given report as a normal accept - then it does * not also need to generate an eod report for it */ diff --git a/src/nfa/rdfa.h b/src/nfa/rdfa.h index 0936fb15..6b994e4f 100644 --- a/src/nfa/rdfa.h +++ b/src/nfa/rdfa.h @@ -81,7 +81,7 @@ struct raw_dfa { explicit raw_dfa(nfa_kind k) : kind(k) {} virtual ~raw_dfa(); - u16 getImplAlphaSize() const; + u16 getImplAlphaSize() const { return alpha_size - N_SPECIAL_SYMBOL; } virtual void stripExtraEodReports(void); bool hasEodReports(void) const; }; From d25740b615332ed32e1ccf2cf81b043b0539c065 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 10 Aug 2017 11:06:13 +1000 Subject: [PATCH 158/190] smallwrite_build: make failure_map unordered --- src/smallwrite/smallwrite_build.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index c041155b..345edfe9 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -395,7 +395,7 @@ namespace { */ struct ACVisitor : public boost::default_bfs_visitor { ACVisitor(LitTrie &trie_in, - map &failure_map_in, + unordered_map &failure_map_in, vector &ordering_in) : mutable_trie(trie_in), failure_map(failure_map_in), ordering(ordering_in) {} @@ -445,7 +445,7 @@ struct ACVisitor : public boost::default_bfs_visitor { private: LitTrie &mutable_trie; //!< For setting reports property. - map &failure_map; + unordered_map &failure_map; vector &ordering; //!< BFS ordering for vertices. }; } @@ -471,11 +471,13 @@ bool isSaneTrie(const LitTrie &trie) { */ static void buildAutomaton(LitTrie &trie, - map &failure_map, + unordered_map &failure_map, vector &ordering) { assert(isSaneTrie(trie)); // Find our failure transitions and reports. + failure_map.reserve(num_vertices(trie)); + ordering.reserve(num_vertices(trie)); ACVisitor ac_vis(trie, failure_map, ordering); boost::breadth_first_search(trie, trie.root, visitor(ac_vis)); @@ -672,7 +674,7 @@ unique_ptr buildDfa(LitTrie &trie, bool nocase) { DEBUG_PRINTF("trie has %zu states\n", num_vertices(trie)); vector ordering; - map failure_map; + unordered_map failure_map; buildAutomaton(trie, failure_map, ordering); // Construct DFA states in BFS order. From ba6f638c40ddf62d534dd949d91696bb229e907f Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 10 Aug 2017 11:29:19 +1000 Subject: [PATCH 159/190] accel_dfa_build_strat: use flat_set --- src/nfa/accel_dfa_build_strat.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/nfa/accel_dfa_build_strat.cpp b/src/nfa/accel_dfa_build_strat.cpp index 928e078e..1f1b5e02 100644 --- a/src/nfa/accel_dfa_build_strat.cpp +++ b/src/nfa/accel_dfa_build_strat.cpp @@ -226,16 +226,16 @@ bool has_self_loop(dstate_id_t s, const raw_dfa &raw) { } static -vector find_nonexit_symbols(const raw_dfa &rdfa, - const CharReach &escape) { - set rv; +flat_set find_nonexit_symbols(const raw_dfa &rdfa, + const CharReach &escape) { + flat_set rv; CharReach nonexit = ~escape; - for (auto i = nonexit.find_first(); i != CharReach::npos; + for (auto i = nonexit.find_first(); i != nonexit.npos; i = nonexit.find_next(i)) { rv.insert(rdfa.alpha_remap[i]); } - return vector(rv.begin(), rv.end()); + return rv; } static @@ -289,7 +289,7 @@ dstate_id_t get_sds_or_proxy(const raw_dfa &raw) { static set find_region(const raw_dfa &rdfa, dstate_id_t base, - const AccelScheme &ei) { + const AccelScheme &ei) { DEBUG_PRINTF("looking for region around %hu\n", base); set region = {base}; From d5b3f2b50899ac3e515467174b364958598339a4 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 10 Aug 2017 11:48:50 +1000 Subject: [PATCH 160/190] gatherReports: fewer map lookups --- src/nfa/mcclellancompile.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 8f73d077..45f30158 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -288,11 +288,12 @@ unique_ptr mcclellan_build_strat::gatherReports( raw_report_list rrl(s.reports, rm, remap_reports); DEBUG_PRINTF("non empty r\n"); - if (rev.find(rrl) != rev.end()) { - reports.push_back(rev[rrl]); + auto it = rev.find(rrl); + if (it != rev.end()) { + reports.push_back(it->second); } else { DEBUG_PRINTF("adding to rl %zu\n", ri->size()); - rev[rrl] = ri->size(); + rev.emplace(rrl, ri->size()); reports.push_back(ri->size()); ri->rl.push_back(rrl); } @@ -306,13 +307,14 @@ unique_ptr mcclellan_build_strat::gatherReports( DEBUG_PRINTF("non empty r eod\n"); raw_report_list rrl(s.reports_eod, rm, remap_reports); - if (rev.find(rrl) != rev.end()) { - reports_eod.push_back(rev[rrl]); + auto it = rev.find(rrl); + if (it != rev.end()) { + reports_eod.push_back(it->second); continue; } DEBUG_PRINTF("adding to rl eod %zu\n", s.reports_eod.size()); - rev[rrl] = ri->size(); + rev.emplace(rrl, ri->size()); reports_eod.push_back(ri->size()); ri->rl.push_back(rrl); } @@ -325,10 +327,9 @@ unique_ptr mcclellan_build_strat::gatherReports( *arbReport = 0; } - /* if we have only a single report id generated from all accepts (not eod) * we can take some short cuts */ - set reps; + flat_set reps; for (u32 rl_index : reports) { if (rl_index == MO_INVALID_IDX) { From 36136f1003058d48feaadd6a3331a0a18ea708f9 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 10 Aug 2017 13:19:26 +1000 Subject: [PATCH 161/190] fdr_compile: don't do string copies in isSuffix --- src/fdr/fdr_compile.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index 770f30fe..e50245d7 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -638,16 +638,17 @@ bytecode_ptr FDRCompiler::build() { static bool isSuffix(const hwlmLiteral &lit1, const hwlmLiteral &lit2) { - auto s1 = lit1.s; - auto s2 = lit2.s; - if (lit1.nocase || lit2.nocase) { - upperString(s1); - upperString(s2); - } + const auto &s1 = lit1.s; + const auto &s2 = lit2.s; size_t len1 = s1.length(); size_t len2 = s2.length(); assert(len1 >= len2); - return equal(s2.begin(), s2.end(), s1.begin() + len1 - len2); + + auto lit_cmp = (lit1.nocase || lit2.nocase) + ? [](char a, char b) { return mytoupper(a) == mytoupper(b); } + : [](char a, char b) { return a == b; }; + + return equal(s2.begin(), s2.end(), s1.begin() + len1 - len2, lit_cmp); } /* From 1aad3b0ed10eff9705659c8bbfa88c0561f4a817 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 10 Aug 2017 15:02:57 +1000 Subject: [PATCH 162/190] ue2_literal: make nocase member a dynamic_bitset We were previously using vector, but dynamic_bitset provides a faster any() impl --- src/util/ue2string.cpp | 49 +++++++++++++++++++------------------- src/util/ue2string.h | 53 ++++++++++++++++++++++-------------------- 2 files changed, 53 insertions(+), 49 deletions(-) diff --git a/src/util/ue2string.cpp b/src/util/ue2string.cpp index 02d7b713..b9cb67f4 100644 --- a/src/util/ue2string.cpp +++ b/src/util/ue2string.cpp @@ -237,12 +237,12 @@ ue2_literal::elem::operator CharReach () const { } ue2_literal::ue2_literal(const std::string &s_in, bool nc_in) - : s(nc_in ? toUpperString(s_in) : s_in), nocase(s_in.size(), nc_in) { + : s(nc_in ? toUpperString(s_in) : s_in), nocase(s_in.size()) { if (nc_in) { - // Quash nocase bit for non-alpha chars + // Switch on nocase bit for all alpha characters. for (size_t i = 0; i < s.length(); i++) { - if (!ourisalpha(s[i])) { - nocase[i] = false; + if (ourisalpha(s[i])) { + nocase.set(i); } } } @@ -255,21 +255,27 @@ ue2_literal ue2_literal::substr(size_type pos, size_type n) const { ue2_literal rv; rv.s = s.substr(pos, n); size_type upper = nocase.size(); - if (n != string::npos && n + pos < nocase.size()) { + if (n != npos && n + pos < nocase.size()) { upper = n + pos; } - rv.nocase.insert(rv.nocase.end(), nocase.begin() + pos, - nocase.begin() + upper); + + rv.nocase.resize(upper - pos, false); + for (size_t i = pos; i < upper; i++) { + rv.nocase.set(i - pos, nocase.test(i)); + } + assert(s.size() == nocase.size()); return rv; } ue2_literal &ue2_literal::erase(size_type pos, size_type n) { s.erase(pos, n); - size_type upper = nocase.size(); - if (n != string::npos && n + pos < nocase.size()) { - upper = n + pos; + + if (n != npos) { + for (size_type i = pos + n; i < nocase.size(); i++) { + nocase.set(i - n, nocase.test(i)); + } } - nocase.erase(nocase.begin() + pos, nocase.begin() + upper); + nocase.resize(s.size()); return *this; } @@ -306,29 +312,24 @@ bool ue2_literal::operator<(const ue2_literal &b) const { return nocase < b.nocase; } -ue2_literal operator+(const ue2_literal &a, const ue2_literal &b) { - ue2_literal rv; - rv.s = a.s + b.s; - rv.nocase = a.nocase; - rv.nocase.insert(rv.nocase.end(), b.nocase.begin(), b.nocase.end()); - return rv; -} - void ue2_literal::operator+=(const ue2_literal &b) { s += b.s; - nocase.insert(nocase.end(), b.nocase.begin(), b.nocase.end()); + size_t prefix = nocase.size(); + nocase.resize(prefix + b.nocase.size()); + for (size_t i = 0; i < b.nocase.size(); i++) { + nocase.set(prefix + i, b.nocase[i]); + } } bool ue2_literal::any_nocase() const { - return find(nocase.begin(), nocase.end(), true) != nocase.end(); + return nocase.any(); } void make_nocase(ue2_literal *lit) { ue2_literal rv; - for (ue2_literal::const_iterator it = lit->begin(); it != lit->end(); - ++it) { - rv.push_back(it->c, ourisalpha(it->c)); + for (const auto &elem: *lit) { + rv.push_back(elem.c, ourisalpha(elem.c)); } lit->swap(rv); diff --git a/src/util/ue2string.h b/src/util/ue2string.h index 9eef65da..703faa2f 100644 --- a/src/util/ue2string.h +++ b/src/util/ue2string.h @@ -37,11 +37,13 @@ #include "util/charreach.h" #include "util/compare.h" #include "util/hash.h" +#include "util/operators.h" #include #include #include +#include #include namespace ue2 { @@ -80,7 +82,7 @@ struct ue2_case_string { bool nocase; }; -struct ue2_literal { +struct ue2_literal : totally_ordered { public: /// Single element proxy, pointed to by our const_iterator. struct elem { @@ -108,38 +110,37 @@ public: private: friend class boost::iterator_core_access; void increment() { - ++it; ++it_nc; + ++idx; } void decrement() { - --it; --it_nc; + --idx; } void advance(size_t n) { - it += n; it_nc += n; + idx += n; } difference_type distance_to(const const_iterator &other) const { - return other.it - it; + return other.idx - idx; } bool equal(const const_iterator &other) const { - return it == other.it; + return idx == other.idx && lit == other.lit; } const elem dereference() const { - return elem(*it, *it_nc); + return elem(lit->s[idx], lit->nocase[idx]); } friend struct ue2_literal; - const_iterator(const std::string::const_iterator &it_in, - const std::vector::const_iterator &it_nc_in) - : it(it_in), it_nc(it_nc_in) {} + const_iterator(const ue2_literal &lit_in, size_t idx_in) + : lit(&lit_in), idx(idx_in) {} - std::string::const_iterator it; - std::vector::const_iterator it_nc; + const ue2_literal *lit = nullptr; + size_t idx; }; using const_reverse_iterator = std::reverse_iterator; + using size_type = std::string::size_type; + static const size_type npos = std::string::npos; - typedef std::string::size_type size_type; - - ue2_literal() {} + ue2_literal() = default; ue2_literal(const std::string &s_in, bool nc_in); ue2_literal(char c, bool nc_in); ue2_literal(const ue2_literal &) = default; @@ -156,16 +157,16 @@ public: size_type length() const { return s.length(); } bool empty() const { return s.empty(); } - ue2_literal substr(size_type pos, size_type n = std::string::npos) const; + ue2_literal substr(size_type pos, size_type n = npos) const; const char *c_str() const { return s.c_str(); } bool any_nocase() const; const_iterator begin() const { - return const_iterator(s.begin(), nocase.begin()); + return const_iterator(*this, 0); } const_iterator end() const { - return const_iterator(s.end(), nocase.end()); + return const_iterator(*this, s.size()); } const_reverse_iterator rbegin() const { @@ -176,22 +177,23 @@ public: return const_reverse_iterator(begin()); } - ue2_literal &erase(size_type pos = 0, size_type n = std::string::npos); + ue2_literal &erase(size_type pos = 0, size_type n = npos); void push_back(const elem &e) { push_back(e.c, e.nocase); } void push_back(char c, bool nc); - const elem back() const { return elem(*s.rbegin(), nocase.back()); } - friend ue2_literal operator+(const ue2_literal &a, const ue2_literal &b); + const elem back() const { return *rbegin(); } + + friend ue2_literal operator+(ue2_literal a, const ue2_literal &b) { + a += b; + return a; + } void operator+=(const ue2_literal &b); bool operator==(const ue2_literal &b) const { return s == b.s && nocase == b.nocase; } - bool operator!=(const ue2_literal &b) const { - return !(*this == b); - } bool operator<(const ue2_literal &b) const; void clear(void) { s.clear(); nocase.clear(); } @@ -204,8 +206,9 @@ public: } private: + friend const_iterator; std::string s; - std::vector nocase; /* for trolling value */ + boost::dynamic_bitset<> nocase; }; /// Return a reversed copy of this literal. From 25170b32eb7b0bf2c51e609392b6893de5fc94a4 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 10 Aug 2017 15:12:28 +1000 Subject: [PATCH 163/190] ue2_literal: better hash function --- src/util/ue2string.cpp | 5 +++++ src/util/ue2string.h | 4 +++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/util/ue2string.cpp b/src/util/ue2string.cpp index b9cb67f4..40076056 100644 --- a/src/util/ue2string.cpp +++ b/src/util/ue2string.cpp @@ -34,6 +34,7 @@ #include "charreach.h" #include "compare.h" +#include "hash_dynamic_bitset.h" #include #include @@ -325,6 +326,10 @@ bool ue2_literal::any_nocase() const { return nocase.any(); } +size_t ue2_literal::hash() const { + return hash_all(s, hash_dynamic_bitset()(nocase)); +} + void make_nocase(ue2_literal *lit) { ue2_literal rv; diff --git a/src/util/ue2string.h b/src/util/ue2string.h index 703faa2f..3519207c 100644 --- a/src/util/ue2string.h +++ b/src/util/ue2string.h @@ -205,6 +205,8 @@ public: nocase.swap(other.nocase); } + size_t hash() const; + private: friend const_iterator; std::string s; @@ -321,7 +323,7 @@ struct hash { template<> struct hash { size_t operator()(const ue2::ue2_literal &lit) const { - return ue2::ue2_hasher()(lit); + return lit.hash(); } }; From fe31b387e8af8584846d51a7ff5b05fbef761162 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 10 Aug 2017 16:58:48 +1000 Subject: [PATCH 164/190] hash: use std::hash for string hashing --- src/util/hash.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/util/hash.h b/src/util/hash.h index 1c35d20c..60bc670a 100644 --- a/src/util/hash.h +++ b/src/util/hash.h @@ -35,6 +35,7 @@ #define UTIL_HASH_H #include +#include #include #include @@ -123,10 +124,16 @@ struct ue2_hash::value>::type> { } }; -/** \brief Hash for any container type that supports std::begin(). */ +/** + * \brief Hash for any container type that supports std::begin(). + * + * We exempt std::string as std::hash is provided and quicker. + */ template -struct ue2_hash::value && - !has_hash_member::value>::type> { +struct ue2_hash::value && + !std::is_same::type, std::string>::value && + !has_hash_member::value>::type> { size_t operator()(const T &obj) const { size_t v = 0; for (const auto &elem : obj) { From 3f36665e390f2014449be470f11865736fbfc7d2 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 11 Aug 2017 10:35:19 +1000 Subject: [PATCH 165/190] unit: add PrintTo for ue2_literal --- unit/internal/util_string.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/unit/internal/util_string.cpp b/unit/internal/util_string.cpp index d6f7285a..f501f66b 100644 --- a/unit/internal/util_string.cpp +++ b/unit/internal/util_string.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,6 +34,18 @@ using namespace ue2; +#if defined(DUMP_SUPPORT) + +namespace ue2 { + +static void PrintTo(const ue2_literal &lit, ::std::ostream *os) { + *os << dumpString(lit); +} + +} // namespace ue2 + +#endif // DUMP_SUPPORT + TEST(string, case_iter1) { const char * const expected[] = { "3FOO-BAR", From 3b392d6b70be9bd6686c0f2769bb58d405e2643b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 11 Aug 2017 13:29:07 +1000 Subject: [PATCH 166/190] accel_dfa_build_strat: make extend() faster --- src/nfa/accel_dfa_build_strat.cpp | 41 +++++++++++++++++-------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/src/nfa/accel_dfa_build_strat.cpp b/src/nfa/accel_dfa_build_strat.cpp index 1f1b5e02..4508d4f1 100644 --- a/src/nfa/accel_dfa_build_strat.cpp +++ b/src/nfa/accel_dfa_build_strat.cpp @@ -41,6 +41,7 @@ #include "util/verify_types.h" #include +#include #include #include @@ -66,6 +67,17 @@ void dump_paths(const Container &paths) { DEBUG_PRINTF("%zu paths\n", paths.size()); } +static +vector reverse_alpha_remapping(const raw_dfa &rdfa) { + vector rv(rdfa.alpha_size - 1); /* TOP not required */ + + for (u32 i = 0; i < N_CHARS; i++) { + rv.at(rdfa.alpha_remap[i]).set(i); + } + + return rv; +} + static bool is_useful_path(const vector &good, const path &p) { for (const auto &g : good) { @@ -99,9 +111,10 @@ path append(const path &orig, const CharReach &cr, u32 new_dest) { } static -void extend(const raw_dfa &rdfa, const path &p, - map> &all, vector &out) { - dstate s = rdfa.states[p.dest]; +void extend(const raw_dfa &rdfa, const vector &rev_map, + const path &p, unordered_map> &all, + vector &out) { + const dstate &s = rdfa.states[p.dest]; if (!p.reach.empty() && p.reach.back().none()) { out.push_back(p); @@ -126,9 +139,9 @@ void extend(const raw_dfa &rdfa, const path &p, } flat_map dest; - for (unsigned i = 0; i < N_CHARS; i++) { - u32 succ = s.next[rdfa.alpha_remap[i]]; - dest[succ].set(i); + for (u32 i = 0; i < rev_map.size(); i++) { + u32 succ = s.next[i]; + dest[succ] |= rev_map[i]; } for (const auto &e : dest) { @@ -149,13 +162,14 @@ void extend(const raw_dfa &rdfa, const path &p, static vector> generate_paths(const raw_dfa &rdfa, dstate_id_t base, u32 len) { + const vector rev_map = reverse_alpha_remapping(rdfa); vector paths{path(base)}; - map> all; + unordered_map> all; all[base].push_back(path(base)); for (u32 i = 0; i < len && paths.size() < PATHS_LIMIT; i++) { vector next_gen; for (const auto &p : paths) { - extend(rdfa, p, all, next_gen); + extend(rdfa, rev_map, p, all, next_gen); } paths = move(next_gen); @@ -196,17 +210,6 @@ bool better(const AccelScheme &a, const AccelScheme &b) { return a.cr.count() < b.cr.count(); } -static -vector reverse_alpha_remapping(const raw_dfa &rdfa) { - vector rv(rdfa.alpha_size - 1); /* TOP not required */ - - for (u32 i = 0; i < N_CHARS; i++) { - rv.at(rdfa.alpha_remap[i]).set(i); - } - - return rv; -} - static bool double_byte_ok(const AccelScheme &info) { return !info.double_byte.empty() && From 58c3de0d33f48abde990d2bce0416c28e60ff60e Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 11 Aug 2017 15:37:55 +1000 Subject: [PATCH 167/190] mcclellancompile: don't copy dstate unnecessarily --- src/nfa/mcclellancompile.cpp | 2 +- src/nfa/mcsheng_compile.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 45f30158..ff18a68a 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -898,7 +898,7 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, } u32 self_loop_width = 0; - const dstate curr_raw = info.states[curr_id]; + const dstate &curr_raw = info.states[curr_id]; for (unsigned i = 0; i < N_CHARS; i++) { if (curr_raw.next[info.alpha_remap[i]] == curr_id) { self_loop_width++; diff --git a/src/nfa/mcsheng_compile.cpp b/src/nfa/mcsheng_compile.cpp index 728f03be..871ca4fb 100644 --- a/src/nfa/mcsheng_compile.cpp +++ b/src/nfa/mcsheng_compile.cpp @@ -740,7 +740,7 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, assert(info.is_normal(currState.daddy)); u32 self_loop_width = 0; - const dstate curr_raw = info.states[curr_id]; + const dstate &curr_raw = info.states[curr_id]; for (unsigned i = 0; i < N_CHARS; i++) { if (curr_raw.next[info.alpha_remap[i]] == curr_id) { self_loop_width++; From b694fed727d6bf0c539ff2985baed27e12865409 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 11 Aug 2017 16:08:05 +1000 Subject: [PATCH 168/190] mcclellancompile: simplify calc_min_dist_from_bob --- src/nfa/mcclellancompile_util.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/nfa/mcclellancompile_util.cpp b/src/nfa/mcclellancompile_util.cpp index 977cf3d5..3e299b81 100644 --- a/src/nfa/mcclellancompile_util.cpp +++ b/src/nfa/mcclellancompile_util.cpp @@ -126,13 +126,11 @@ u32 remove_leading_dots(raw_dfa &raw) { static never_inline u32 calc_min_dist_from_bob(raw_dfa &raw, vector *dist_in) { vector &dist = *dist_in; - dist.clear(); - dist.resize(raw.states.size(), ~0U); + dist.assign(raw.states.size(), ~0U); assert(raw.start_anchored != DEAD_STATE); - deque to_visit; - to_visit.push_back(raw.start_anchored); + deque to_visit = { raw.start_anchored }; dist[raw.start_anchored] = 0; u32 last_d = 0; @@ -147,8 +145,7 @@ u32 calc_min_dist_from_bob(raw_dfa &raw, vector *dist_in) { assert(d >= last_d); assert(d != ~0U); - for (u32 j = 0; j < raw.alpha_size; j++) { - dstate_id_t t = raw.states[s].next[j]; + for (dstate_id_t t : raw.states[s].next) { if (t == DEAD_STATE) { continue; } From 85c8822dd1ca60d82c7b82f5caf177303c1d84eb Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 16 Aug 2017 13:05:24 +1000 Subject: [PATCH 169/190] fdr_compile: simplify lambda use This was failing to compile on MSVC. --- src/fdr/fdr_compile.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index e50245d7..5e3c6a4e 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -644,11 +644,12 @@ bool isSuffix(const hwlmLiteral &lit1, const hwlmLiteral &lit2) { size_t len2 = s2.length(); assert(len1 >= len2); - auto lit_cmp = (lit1.nocase || lit2.nocase) - ? [](char a, char b) { return mytoupper(a) == mytoupper(b); } - : [](char a, char b) { return a == b; }; - - return equal(s2.begin(), s2.end(), s1.begin() + len1 - len2, lit_cmp); + if (lit1.nocase || lit2.nocase) { + return equal(s2.begin(), s2.end(), s1.begin() + len1 - len2, + [](char a, char b) { return mytoupper(a) == mytoupper(b); }); + } else { + return equal(s2.begin(), s2.end(), s1.begin() + len1 - len2); + } } /* From 012b3472844ff30f899e1dfc458ba90a4f994d72 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 16 Aug 2017 13:13:06 +1000 Subject: [PATCH 170/190] ue2_literal: define npos in ue2string.cpp --- src/util/ue2string.cpp | 2 ++ src/util/ue2string.h | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/util/ue2string.cpp b/src/util/ue2string.cpp index 40076056..39e1edbd 100644 --- a/src/util/ue2string.cpp +++ b/src/util/ue2string.cpp @@ -237,6 +237,8 @@ ue2_literal::elem::operator CharReach () const { } } +const ue2_literal::size_type ue2_literal::npos = std::string::npos; + ue2_literal::ue2_literal(const std::string &s_in, bool nc_in) : s(nc_in ? toUpperString(s_in) : s_in), nocase(s_in.size()) { if (nc_in) { diff --git a/src/util/ue2string.h b/src/util/ue2string.h index 3519207c..44f1f53f 100644 --- a/src/util/ue2string.h +++ b/src/util/ue2string.h @@ -138,7 +138,8 @@ public: using const_reverse_iterator = std::reverse_iterator; using size_type = std::string::size_type; - static const size_type npos = std::string::npos; + + static const size_type npos; ue2_literal() = default; ue2_literal(const std::string &s_in, bool nc_in); From 29e1aae3fbfbd5cad117994d2ac792ddef49f809 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 16 Aug 2017 15:01:42 +1000 Subject: [PATCH 171/190] Use an unsigned byte, not char --- src/nfa/limex_compile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 2010728d..75b7c72b 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -242,7 +242,7 @@ bool isLimitedTransition(int from, int to, int maxshift) { // Fill a bit mask template -void maskFill(Mask &m, char c) { +void maskFill(Mask &m, u8 c) { memset(&m, c, sizeof(m)); } From bc27d6ae4bb7e2e51bdbbfdc7a7ad6b67004575f Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 16 Aug 2017 15:02:26 +1000 Subject: [PATCH 172/190] msvc: disable more warnings --- CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fb9c7a4e..2520832b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -154,8 +154,9 @@ if(MSVC OR MSVC_IDE) # todo: change these as required set(ARCH_C_FLAGS "/arch:AVX2") set(ARCH_CXX_FLAGS "/arch:AVX2") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 /wd4244 /wd4267") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /wd4244 /wd4267 /wd4800 -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS") + set(MSVC_WARNS "/wd4101 /wd4146 /wd4172 /wd4200 /wd4244 /wd4267 /wd4307 /wd4334 /wd4805 -D_CRT_SECURE_NO_WARNINGS") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 ${MSVC_WARNS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 ${MSVC_WARNS} /wd4800 -DBOOST_DETAIL_NO_CONTAINER_FWD") endif() string(REPLACE "/RTC1" "" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") string(REPLACE "/RTC1" "" CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}") From 7fe53fec1070d2b3dc40721e1f54ddce23b4d73d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 16 Aug 2017 10:24:15 +1000 Subject: [PATCH 173/190] partitioned_set: use lower_bound(), not scan --- src/util/partitioned_set.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/util/partitioned_set.h b/src/util/partitioned_set.h index 313c08e1..8a4d3dd9 100644 --- a/src/util/partitioned_set.h +++ b/src/util/partitioned_set.h @@ -128,12 +128,10 @@ public: } for (auto it = orig.members.begin(); it != orig.members.end(); ++it) { - T member = *it; + const auto &member = *it; assert(member < member_to_subset.size()); - while (sp_it != sp_e && *sp_it < member) { - ++sp_it; - } + sp_it = std::lower_bound(sp_it, sp_e, member); if (sp_it == sp_e) { split_temp_diff.insert(split_temp_diff.end(), it, orig.members.end()); From cee0b722a3030961690ca86f7344e4987f891732 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 16 Aug 2017 14:37:05 +1000 Subject: [PATCH 174/190] reimplement hasSameEngineType() --- src/rose/rose_build_merge.cpp | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index 04d5e7d0..94bba13e 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -555,27 +555,13 @@ bool checkPrefix(const rose_literal_id &ul, const u32 ulag, static bool hasSameEngineType(const RoseVertexProps &u_prop, const RoseVertexProps &v_prop) { - const left_id u_left(u_prop.left), v_left(v_prop.left); + const left_id u_left = u_prop.left; + const left_id v_left = v_prop.left; - if (u_left.haig() || v_left.haig()) { - if (u_left.graph() != v_left.graph()) { - return false; - } - } - - if (u_left.dfa() || v_left.dfa()) { - if (u_left.graph() != v_left.graph()) { - return false; - } - } - - if (u_left.castle() || v_left.castle()) { - if (!u_left.castle() || !v_left.castle()) { - return false; // Must both be castles. - } - } - - return true; + return !u_left.haig() == !v_left.haig() + && !u_left.dfa() == !v_left.dfa() + && !u_left.castle() == !v_left.castle() + && !u_left.graph() == !v_left.graph(); } /** From 6f452668ec41390a655d36a3072f0ce7b435708e Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Fri, 11 Aug 2017 14:59:07 +1000 Subject: [PATCH 175/190] refactor mergeCastleLeftfixes --- src/rose/rose_build_merge.cpp | 202 +++++++++++++++++----------------- 1 file changed, 104 insertions(+), 98 deletions(-) diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index 94bba13e..38202932 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -84,6 +84,7 @@ using namespace std; using boost::adaptors::map_values; +using boost::adaptors::map_keys; namespace ue2 { @@ -1037,10 +1038,20 @@ bool mergeLeftfixPair(RoseBuildImpl &build, left_id &r1, left_id &r2, return false; } +/** + * Checks that there is no problem due to the involved vertices if we merge two + * leftfix engines. + * + * This functions takes the vertices on the right of the two engines. + * + * Unlike mergeableRoseVertices(), this does not: + * - check that engines themselves can be merged + * - use heuristics to find out if merging the engines is wise. + */ static -bool mergeLeftVL_checkTargetsCompatible(const RoseBuildImpl &build, - const vector &targets_1, - const vector &targets_2) { +bool checkVerticesOkForLeftfixMerge(const RoseBuildImpl &build, + const vector &targets_1, + const vector &targets_2) { assert(!targets_1.empty()); assert(!targets_2.empty()); @@ -1206,7 +1217,7 @@ bool mergeLeftVL_tryMergeCandidate(RoseBuildImpl &build, left_id &r1, /* Rechecking that the targets are compatible, as we may have already * merged new states into r1 or r2 and we need to verify that this * candidate is still ok. */ - if (!mergeLeftVL_checkTargetsCompatible(build, targets_1, targets_2)) { + if (!checkVerticesOkForLeftfixMerge(build, targets_1, targets_2)) { return false; } @@ -1460,8 +1471,8 @@ void mergeLeftfixesVariableLag(RoseBuildImpl &build) { || r1.castle()->reach() == r2.castle()->reach()); const vector &targets_2 = eng_verts[r2]; - if (!mergeLeftVL_checkTargetsCompatible(build, targets_1, - targets_2)) { + if (!checkVerticesOkForLeftfixMerge(build, targets_1, + targets_2)) { continue; // No point queueing unmergeable cases. } @@ -1847,66 +1858,6 @@ void mergeNfaLeftfixes(RoseBuildImpl &tbi, RoseBouquet &roses) { } } -static -void mergeCastleChunk(RoseBuildImpl &tbi, RoseBouquet &cands) { - /* caller must have already ensured that candidates have the same reach */ - RoseGraph &g = tbi.g; - DEBUG_PRINTF("%zu castle rose merge candidates\n", cands.size()); - - deque merged; - - for (auto it = cands.begin(); it != cands.end(); ++it) { - left_id r1 = *it; - CastleProto &castle1 = *r1.castle(); - const deque &verts1 = cands.vertices(r1); - - merged.clear(); - - for (auto jt = next(it); jt != cands.end(); ++jt) { - left_id r2 = *jt; - CastleProto &castle2 = *r2.castle(); - const deque &verts2 = cands.vertices(r2); - - if (castle1.repeats.size() == castle1.max_occupancy) { - DEBUG_PRINTF("castle1 has hit max occupancy\n"); - break; // next castle1 - } - - assert(castle1.reach() == castle2.reach()); - - if (!mergeableRoseVertices(tbi, verts1, verts2)) { - DEBUG_PRINTF("not mergeable\n"); - continue; // next castle2 - } - - DEBUG_PRINTF("castle1=%p (size %zu), castle2=%p (size %zu)\n", - &castle1, castle1.repeats.size(), &castle2, - castle2.repeats.size()); - - map top_map; - if (!mergeCastle(castle1, castle2, top_map)) { - DEBUG_PRINTF("couldn't merge\n"); - continue; // next castle2 - } - - // Update castle2's roses to point to castle1 now. - shared_ptr winner = g[verts1.front()].left.castle; - for (auto v : verts2) { - g[v].left.castle = winner; - for (const auto &e : in_edges_range(v, g)) { - g[e].rose_top = top_map.at(g[e].rose_top); - } - } - - cands.insert(r1, verts2); - merged.push_back(r2); - } - - DEBUG_PRINTF("%zu roses merged\n", merged.size()); - cands.erase_all(merged.begin(), merged.end()); - } -} - /** * This pass attempts to merge prefix/infix engines with a small number of * vertices together into larger engines. The engines must not be have a @@ -1983,55 +1934,110 @@ void mergeSmallLeftfixes(RoseBuildImpl &tbi) { } } -void mergeCastleLeftfixes(RoseBuildImpl &tbi) { + +static +void mergeCastleChunk(RoseBuildImpl &build, vector &cands, + insertion_ordered_map> &eng_verts) { + /* caller must have already ensured that candidates have the same reach */ + RoseGraph &g = build.g; + DEBUG_PRINTF("%zu castle leftfix merge candidates\n", cands.size()); + + for (auto it = cands.begin(); it != cands.end(); ++it) { + left_id &cand_1 = *it; + vector &verts_1 = eng_verts[cand_1]; + if (verts_1.empty()) { + continue; + } + + for (auto jt = next(it); jt != cands.end(); ++jt) { + const left_id &cand_2 = *jt; + vector &verts_2 = eng_verts[cand_2]; + if (verts_2.empty()) { + continue; + } + + assert(cand_1.castle()->reach() == cand_2.castle()->reach()); + + if (!checkVerticesOkForLeftfixMerge(build, verts_1, verts_2)) { + DEBUG_PRINTF("not mergeable\n"); + continue; // next cand_2 + } + + DEBUG_PRINTF("castle1=%p (size %zu)\n", cand_1.castle(), + cand_1.castle()->repeats.size()); + DEBUG_PRINTF("castle2=%p (size %zu)\n", cand_2.castle(), + cand_2.castle()->repeats.size()); + + map top_map; + if (!mergeCastle(*cand_1.castle(), *cand_2.castle(), top_map)) { + DEBUG_PRINTF("couldn't merge\n"); + continue; // next cand_2 + } + + // Update castle2's roses to point to castle1 now. + shared_ptr winner = g[verts_1.front()].left.castle; + for (auto v : verts_2) { + assert(g[v].left.castle.get() == cand_2.castle()); + g[v].left.castle = winner; + for (const auto &e : in_edges_range(v, g)) { + g[e].rose_top = top_map.at(g[e].rose_top); + } + } + + insert(&verts_1, verts_1.end(), verts_2); + verts_2.clear(); + } + } +} + +/** + * Merges castles with the same reach together regardless of where in the rose + * graph they are. Note: there is no requirement for the castles to have common + * parent or target vertices. + * + * There are no heuristics for reducing block mode merges as castle speed + * mainly depends on the reach being scanned. + */ +void mergeCastleLeftfixes(RoseBuildImpl &build) { DEBUG_PRINTF("entry\n"); - if (!tbi.cc.grey.mergeRose || !tbi.cc.grey.roseMultiTopRoses || - !tbi.cc.grey.allowCastle) { + if (!build.cc.grey.mergeRose || !build.cc.grey.roseMultiTopRoses + || !build.cc.grey.allowCastle) { return; } - RoseGraph &g = tbi.g; + RoseGraph &g = build.g; - map by_reach; + insertion_ordered_map> eng_verts; for (auto v : vertices_range(g)) { - if (!g[v].left) { + if (!g[v].left.castle) { continue; } - // Handle single-parent infixes only. - if (tbi.isRootSuccessor(v)) { + // Handle infixes only. + if (build.isRootSuccessor(v)) { continue; } - const left_id left(g[v].left); - - // Only non-transient for the moment. - if (contains(tbi.transient, left)) { - continue; - } - - if (!left.castle()) { - continue; - } - - const CastleProto &castle = *left.castle(); - const CharReach &cr = castle.reach(); - by_reach[cr].insert(left, v); + eng_verts[g[v].left].push_back(v); } - for (auto &m : by_reach) { - DEBUG_PRINTF("%zu castles for reach: %s\n", m.second.size(), - describeClass(m.first).c_str()); - RoseBouquet &candidates = m.second; - deque cand_groups; - chunkBouquets(candidates, cand_groups, MERGE_CASTLE_GROUP_SIZE_MAX); - candidates.clear(); + map> by_reach; + for (const auto &left : eng_verts | map_keys) { + by_reach[left.castle()->reach()].push_back(left); + } - for (auto &group : cand_groups) { - mergeCastleChunk(tbi, group); - } + vector> chunks; + for (auto &raw_group : by_reach | map_values) { + chunk(move(raw_group), &chunks, MERGE_CASTLE_GROUP_SIZE_MAX); + } + by_reach.clear(); + + DEBUG_PRINTF("chunked castles into %zu groups\n", chunks.size()); + + for (auto &chunk : chunks) { + mergeCastleChunk(build, chunk, eng_verts); } } From ace592e247991aeca0080675b65bb06ddd63fb74 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 14 Aug 2017 10:02:47 +1000 Subject: [PATCH 176/190] tidy mergeCastleSuffixes --- src/rose/rose_build_merge.cpp | 77 ++++++++++++----------------------- 1 file changed, 27 insertions(+), 50 deletions(-) diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index 38202932..d50a7474 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -2706,8 +2706,8 @@ void mergePuffixes(RoseBuildImpl &tbi) { static void updateCastleSuffix(RoseGraph &g, const shared_ptr &m, u32 top, const vector &verts) { - DEBUG_PRINTF("merged in as top %u, updating %zu vertices\n", top, - verts.size()); + DEBUG_PRINTF("merged in as top %u of %p, updating %zu vertices\n", top, + m.get(), verts.size()); for (auto v : verts) { assert(g[v].suffix.castle); @@ -2717,77 +2717,56 @@ void updateCastleSuffix(RoseGraph &g, const shared_ptr &m, } static -void mergeCastleSuffixes(RoseBuildImpl &tbi, - vector > &castles, - map, vector > &castle_map) { +void mergeCastleSuffixChunk(RoseGraph &g, const vector &castles, + const unordered_map> &eng_verts) { if (castles.size() <= 1) { return; } - RoseGraph &g = tbi.g; - const size_t max_size = CastleProto::max_occupancy; + DEBUG_PRINTF("merging reach %s, %zu elements\n", + describeClass(castles[0]->reach()).c_str(), castles.size()); - shared_ptr m = castles.front(); - assert(m->repeats.size() == 1); // Not yet merged. + CastleProto *m = nullptr; - // Cache repeats we've already merged, mapped to (prototype, top). That - // way, we can ensure that we don't construct more than one completely - // identical repeat. - typedef map, u32> > RepeatCache; - RepeatCache cache; - { - // Initial entry in cache. - const u32 top = m->repeats.begin()->first; - const PureRepeat &pr = m->repeats.begin()->second; - cache[pr] = make_pair(m, top); - } - - for (size_t i = 1; i < castles.size(); i++) { - shared_ptr c = castles[i]; + for (CastleProto *c : castles) { assert(c->repeats.size() == 1); // Not yet merged. - const PureRepeat &pr = c->repeats.begin()->second; - RepeatCache::const_iterator it = cache.find(pr); - if (it != cache.end()) { - DEBUG_PRINTF("reusing cached merge, top=%u, proto=%p\n", - it->second.second, it->second.first.get()); - updateCastleSuffix(g, it->second.first, it->second.second, - castle_map[c]); + assert(g[eng_verts.at(c).front()].suffix.castle.get() == c); + if (!m) { + m = c; continue; } - if (m->repeats.size() == max_size) { + u32 top = m->merge(c->repeats[0]); + if (top == CastleProto::max_occupancy) { // No room left to merge into 'm'. This one becomes the new 'm'. DEBUG_PRINTF("next mergee\n"); m = c; - u32 top = m->repeats.begin()->first; - cache[pr] = make_pair(m, top); - } else { - u32 top = m->add(pr); - updateCastleSuffix(g, m, top, castle_map[c]); - DEBUG_PRINTF("added to %p, top %u\n", m.get(), top); - cache[pr] = make_pair(m, top); + continue; } + updateCastleSuffix(g, g[eng_verts.at(m).front()].suffix.castle, top, + eng_verts.at(c)); + DEBUG_PRINTF("added to %p, top %u\n", m, top); } } -void mergeCastleSuffixes(RoseBuildImpl &tbi) { +void mergeCastleSuffixes(RoseBuildImpl &build) { DEBUG_PRINTF("entry\n"); - if (!(tbi.cc.grey.allowCastle && tbi.cc.grey.mergeSuffixes)) { + if (!build.cc.grey.allowCastle || !build.cc.grey.mergeSuffixes) { return; } - map, vector> castles; - map>> by_reach; + unordered_map> eng_verts; + map> by_reach; - RoseGraph &g = tbi.g; + RoseGraph &g = build.g; for (auto v : vertices_range(g)) { if (!g[v].suffix.castle) { continue; } - shared_ptr c = g[v].suffix.castle; + CastleProto *c = g[v].suffix.castle.get(); if (c->repeats.size() != 1) { // This code assumes it's the only place merging is being done. @@ -2795,16 +2774,14 @@ void mergeCastleSuffixes(RoseBuildImpl &tbi) { continue; } - if (!contains(castles, c)) { + if (!contains(eng_verts, c)) { by_reach[c->reach()].push_back(c); } - castles[c].push_back(v); + eng_verts[c].push_back(v); } - for (auto &m : by_reach) { - DEBUG_PRINTF("reach %s, %zu elements\n", describeClass(m.first).c_str(), - m.second.size()); - mergeCastleSuffixes(tbi, m.second, castles); + for (auto &chunk : by_reach | map_values) { + mergeCastleSuffixChunk(g, chunk, eng_verts); } } From ea2e85ac87818d5452a4eb6fc5453eb034725f73 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 14 Aug 2017 12:55:28 +1000 Subject: [PATCH 177/190] ng_squash: switch to using unordered_map Also some cleaning up, small performance improvements. --- src/nfa/limex_compile.cpp | 46 +++++++++++++++--------------- src/nfa/limex_compile.h | 34 +++++++++++----------- src/nfagraph/ng_limex.cpp | 23 +++++++-------- src/nfagraph/ng_squash.cpp | 58 ++++++++++++++++++++++---------------- src/nfagraph/ng_squash.h | 12 ++++---- 5 files changed, 93 insertions(+), 80 deletions(-) diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 75b7c72b..6053b56f 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -105,11 +105,13 @@ struct limex_accel_info { }; static -map -reindexByStateId(const map &in, const NGHolder &g, +unordered_map +reindexByStateId(const unordered_map &in, + const NGHolder &g, const unordered_map &state_ids, const u32 num_states) { - map out; + unordered_map out; + out.reserve(in.size()); vector indexToState(num_vertices(g), NO_STATE); for (const auto &m : state_ids) { @@ -141,8 +143,8 @@ struct build_info { build_info(NGHolder &hi, const unordered_map &states_in, const vector &ri, - const map &rsmi, - const map &smi, + const unordered_map &rsmi, + const unordered_map &smi, const map> &ti, const set &zi, bool dai, bool sci, const CompileContext &cci, u32 nsi) : h(hi), state_ids(states_in), repeats(ri), tops(ti), tugs(nsi), @@ -168,8 +170,8 @@ struct build_info { const vector &repeats; // Squash maps; state sets are indexed by state_id. - map reportSquashMap; - map squashMap; + unordered_map reportSquashMap; + unordered_map squashMap; const map> &tops; NFAStateSet tugs; @@ -2434,14 +2436,14 @@ u32 max_state(const unordered_map &state_ids) { } bytecode_ptr generate(NGHolder &h, - const unordered_map &states, - const vector &repeats, - const map &reportSquashMap, - const map &squashMap, - const map> &tops, - const set &zombies, bool do_accel, - bool stateCompression, u32 hint, - const CompileContext &cc) { + const unordered_map &states, + const vector &repeats, + const unordered_map &reportSquashMap, + const unordered_map &squashMap, + const map> &tops, + const set &zombies, bool do_accel, + bool stateCompression, u32 hint, + const CompileContext &cc) { const u32 num_states = max_state(states) + 1; DEBUG_PRINTF("total states: %u\n", num_states); @@ -2504,13 +2506,13 @@ bytecode_ptr generate(NGHolder &h, } u32 countAccelStates(NGHolder &h, - const unordered_map &states, - const vector &repeats, - const map &reportSquashMap, - const map &squashMap, - const map> &tops, - const set &zombies, - const CompileContext &cc) { + const unordered_map &states, + const vector &repeats, + const unordered_map &reportSquashMap, + const unordered_map &squashMap, + const map> &tops, + const set &zombies, + const CompileContext &cc) { const u32 num_states = max_state(states) + 1; DEBUG_PRINTF("total states: %u\n", num_states); diff --git a/src/nfa/limex_compile.h b/src/nfa/limex_compile.h index 3b819739..a08e0ae5 100644 --- a/src/nfa/limex_compile.h +++ b/src/nfa/limex_compile.h @@ -70,16 +70,16 @@ struct CompileContext; * graph. */ bytecode_ptr generate(NGHolder &g, - const std::unordered_map &states, - const std::vector &repeats, - const std::map &reportSquashMap, - const std::map &squashMap, - const std::map> &tops, - const std::set &zombies, - bool do_accel, - bool stateCompression, - u32 hint, - const CompileContext &cc); + const std::unordered_map &states, + const std::vector &repeats, + const std::unordered_map &reportSquashMap, + const std::unordered_map &squashMap, + const std::map> &tops, + const std::set &zombies, + bool do_accel, + bool stateCompression, + u32 hint, + const CompileContext &cc); /** * \brief For a given graph, count the number of accelerable states it has. @@ -88,13 +88,13 @@ bytecode_ptr generate(NGHolder &g, * implementable. */ u32 countAccelStates(NGHolder &h, - const std::unordered_map &states, - const std::vector &repeats, - const std::map &reportSquashMap, - const std::map &squashMap, - const std::map> &tops, - const std::set &zombies, - const CompileContext &cc); + const std::unordered_map &states, + const std::vector &repeats, + const std::unordered_map &reportSquashMap, + const std::unordered_map &squashMap, + const std::map> &tops, + const std::set &zombies, + const CompileContext &cc); } // namespace ue2 diff --git a/src/nfagraph/ng_limex.cpp b/src/nfagraph/ng_limex.cpp index 1daec578..c4147a30 100644 --- a/src/nfagraph/ng_limex.cpp +++ b/src/nfagraph/ng_limex.cpp @@ -117,10 +117,9 @@ bool sanityCheckGraph(const NGHolder &g, #endif static -void findSquashStates(const NGHolder &g, - const vector &repeats, - map &squashMap) { - squashMap = findSquashers(g); +unordered_map findSquashStates(const NGHolder &g, + const vector &repeats) { + auto squashMap = findSquashers(g); filterSquashers(g, squashMap); /* We also filter out the cyclic states representing bounded repeats, as @@ -130,6 +129,8 @@ void findSquashStates(const NGHolder &g, squashMap.erase(br.cyclic); } } + + return squashMap; } /** @@ -659,12 +660,12 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm, br_cyclic[br.cyclic] = BoundedRepeatSummary(br.repeatMin, br.repeatMax); } - map reportSquashMap; - map squashMap; + unordered_map reportSquashMap; + unordered_map squashMap; // build map of squashed and squashers if (cc.grey.squashNFA) { - findSquashStates(*h, repeats, squashMap); + squashMap = findSquashStates(*h, repeats); if (rm && cc.grey.highlanderSquash) { reportSquashMap = findHighlanderSquashers(*h, *rm); @@ -736,8 +737,8 @@ bytecode_ptr constructReversedNFA_i(const NGHolder &h_in, u32 hint, map> tops; /* only the standards tops for nfas */ set zombies; vector repeats; - map reportSquashMap; - map squashMap; + unordered_map reportSquashMap; + unordered_map squashMap; return generate(h, state_ids, repeats, reportSquashMap, squashMap, tops, zombies, false, false, hint, cc); @@ -850,8 +851,8 @@ u32 countAccelStates(const NGHolder &g, const ReportManager *rm, // Should have no bearing on accel calculation, so we leave these empty. const set zombies; - const map reportSquashMap; - const map squashMap; + const unordered_map reportSquashMap; + const unordered_map squashMap; return countAccelStates(*h, state_ids, repeats, reportSquashMap, squashMap, tops, zombies, cc); diff --git a/src/nfagraph/ng_squash.cpp b/src/nfagraph/ng_squash.cpp index df77668e..03495d14 100644 --- a/src/nfagraph/ng_squash.cpp +++ b/src/nfagraph/ng_squash.cpp @@ -121,10 +121,13 @@ using namespace std; namespace ue2 { -typedef unordered_map> PostDomTree; +using PostDomTree = unordered_map>; static -void buildPDomTree(const NGHolder &g, PostDomTree &tree) { +PostDomTree buildPDomTree(const NGHolder &g) { + PostDomTree tree; + tree.reserve(num_vertices(g)); + auto postdominators = findPostDominators(g); for (auto v : vertices_range(g)) { @@ -137,6 +140,7 @@ void buildPDomTree(const NGHolder &g, PostDomTree &tree) { tree[pdom].insert(v); } } + return tree; } /** @@ -155,7 +159,7 @@ void buildSquashMask(NFAStateSet &mask, const NGHolder &g, NFAVertex v, vector q; - PostDomTree::const_iterator it = tree.find(v); + auto it = tree.find(v); if (it != tree.end()) { q.insert(q.end(), it->second.begin(), it->second.end()); } @@ -271,8 +275,8 @@ void buildPred(NFAStateSet &pred, const NGHolder &g, NFAVertex v) { static void findDerivedSquashers(const NGHolder &g, const vector &vByIndex, const PostDomTree &pdom_tree, const NFAStateSet &init, - map *squash, som_type som, - const vector &som_depths, + unordered_map *squash, + som_type som, const vector &som_depths, const unordered_map ®ion_map, smgb_cache &cache) { deque remaining; @@ -315,37 +319,41 @@ void findDerivedSquashers(const NGHolder &g, const vector &vByIndex, } } -/* If there are redundant states in the graph, it may be possible for two sibling - * .* states to try to squash each other -- which should be prevented +/* If there are redundant states in the graph, it may be possible for two + * sibling .* states to try to squash each other -- which should be prevented. * * Note: this situation should only happen if ng_equivalence has not been run. */ static void clearMutualSquashers(const NGHolder &g, const vector &vByIndex, - map &squash) { + unordered_map &squash) { for (auto it = squash.begin(); it != squash.end();) { NFAVertex a = it->first; u32 a_index = g[a].index; NFAStateSet a_squash = ~it->second; /* default is mask of survivors */ - for (NFAStateSet::size_type b_index = a_squash.find_first(); - b_index != a_squash.npos; b_index = a_squash.find_next(b_index)) { + for (auto b_index = a_squash.find_first(); b_index != a_squash.npos; + b_index = a_squash.find_next(b_index)) { assert(b_index != a_index); NFAVertex b = vByIndex[b_index]; - if (!contains(squash, b)) { + + auto b_it = squash.find(b); + if (b_it == squash.end()) { continue; } - if (!squash[b].test(a_index)) { + auto &b_squash = b_it->second; + if (!b_squash.test(a_index)) { /* b and a squash each other, prevent this */ DEBUG_PRINTF("removing mutual squash %u %zu\n", a_index, b_index); - squash[b].set(a_index); + b_squash.set(a_index); it->second.set(b_index); } } if (it->second.all()) { - DEBUG_PRINTF("%u is no longer an effictive squash state\n", a_index); + DEBUG_PRINTF("%u is no longer an effective squash state\n", + a_index); it = squash.erase(it); } else { ++it; @@ -353,16 +361,16 @@ void clearMutualSquashers(const NGHolder &g, const vector &vByIndex, } } -map findSquashers(const NGHolder &g, som_type som) { - map squash; +unordered_map findSquashers(const NGHolder &g, + som_type som) { + unordered_map squash; // Number of bits to use for all our masks. If we're a triggered graph, // tops have already been assigned, so we don't have to account for them. const u32 numStates = num_vertices(g); // Build post-dominator tree. - PostDomTree pdom_tree; - buildPDomTree(g, pdom_tree); + auto pdom_tree = buildPDomTree(g); // Build list of vertices by state ID and a set of init states. vector vByIndex(numStates, NGHolder::null_vertex()); @@ -507,9 +515,11 @@ map findSquashers(const NGHolder &g, som_type som) { * -# squash only a few acyclic states */ void filterSquashers(const NGHolder &g, - map &squash) { + unordered_map &squash) { + assert(hasCorrectlyNumberedVertices(g)); + DEBUG_PRINTF("filtering\n"); - map rev; /* vertex_index -> vertex */ + vector rev(num_vertices(g)); /* vertex_index -> vertex */ for (auto v : vertices_range(g)) { rev[g[v].index] = v; } @@ -528,8 +538,8 @@ void filterSquashers(const NGHolder &g, NFAStateSet squashed = squash[v]; squashed.flip(); /* default sense for mask of survivors */ - for (NFAStateSet::size_type sq = squashed.find_first(); - sq != squashed.npos; sq = squashed.find_next(sq)) { + for (auto sq = squashed.find_first(); sq != squashed.npos; + sq = squashed.find_next(sq)) { NFAVertex u = rev[sq]; if (hasSelfLoop(u, g)) { DEBUG_PRINTF("squashing a cyclic (%zu) is always good\n", sq); @@ -637,9 +647,9 @@ vector findUnreachable(const NGHolder &g) { /** Populates squash masks for states that can be switched off by highlander * (single match) reporters. */ -map +unordered_map findHighlanderSquashers(const NGHolder &g, const ReportManager &rm) { - map squash; + unordered_map squash; set verts; getHighlanderReporters(g, g.accept, rm, verts); diff --git a/src/nfagraph/ng_squash.h b/src/nfagraph/ng_squash.h index 51ce245a..489f541e 100644 --- a/src/nfagraph/ng_squash.h +++ b/src/nfagraph/ng_squash.h @@ -36,7 +36,7 @@ #include "som/som.h" #include "ue2common.h" -#include +#include #include namespace ue2 { @@ -47,7 +47,7 @@ class ReportManager; /** * Dynamically-sized bitset, as an NFA can have an arbitrary number of states. */ -typedef boost::dynamic_bitset<> NFAStateSet; +using NFAStateSet = boost::dynamic_bitset<>; /** * Populates the squash mask for each vertex (i.e. the set of states to be left @@ -55,16 +55,16 @@ typedef boost::dynamic_bitset<> NFAStateSet; * * The NFAStateSet in the output map is indexed by vertex_index. */ -std::map findSquashers(const NGHolder &g, - som_type som = SOM_NONE); +std::unordered_map +findSquashers(const NGHolder &g, som_type som = SOM_NONE); /** Filters out squash states intended only for use in DFA construction. */ void filterSquashers(const NGHolder &g, - std::map &squash); + std::unordered_map &squash); /** Populates squash masks for states that can be switched off by highlander * (single match) reporters. */ -std::map +std::unordered_map findHighlanderSquashers(const NGHolder &g, const ReportManager &rm); } // namespace ue2 From d6c050abd65f0651c2cf8ce6f4b655456fe37fc7 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 16 Aug 2017 16:36:12 +1000 Subject: [PATCH 178/190] maintain castle report information --- src/rose/rose_build_role_aliasing.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/rose/rose_build_role_aliasing.cpp b/src/rose/rose_build_role_aliasing.cpp index 22581caf..359550e1 100644 --- a/src/rose/rose_build_role_aliasing.cpp +++ b/src/rose/rose_build_role_aliasing.cpp @@ -772,9 +772,13 @@ void pruneCastle(CastleProto &castle, ReportID report) { /** \brief Set all reports to the given one. */ static void setReports(CastleProto &castle, ReportID report) { - for (auto &repeat : castle.repeats | map_values) { + castle.report_map.clear(); + for (auto &e : castle.repeats) { + u32 top = e.first; + auto &repeat = e.second; repeat.reports.clear(); repeat.reports.insert(report); + castle.report_map[report].insert(top); } } From a1fdc3afcf154371713cbcf1cc474ccee8428311 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 14 Aug 2017 16:27:48 +1000 Subject: [PATCH 179/190] dedupeLeftfixesVariableLag: refactor, more blockmode deduping --- src/rose/rose_build_impl.h | 1 + src/rose/rose_build_merge.cpp | 190 +++++++++++++++++++--------------- src/rose/rose_build_misc.cpp | 13 +++ 3 files changed, 118 insertions(+), 86 deletions(-) diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 42ae054a..900aee6c 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -254,6 +254,7 @@ private: }; std::set all_tops(const left_id &r); +std::set all_reports(const left_id &left); bool isAnchored(const left_id &r); depth findMinWidth(const left_id &r); depth findMaxWidth(const left_id &r); diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index d50a7474..c0eba22b 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -124,17 +124,17 @@ size_t small_rose_threshold(const CompileContext &cc) { * reports should not contribute to the hash. */ static -size_t hashLeftfix(const LeftEngInfo &left) { +size_t hashLeftfix(const left_id &left) { size_t val = 0; - if (left.castle) { - hash_combine(val, left.castle->reach()); - for (const auto &pr : left.castle->repeats) { + if (left.castle()) { + hash_combine(val, left.castle()->reach()); + for (const auto &pr : left.castle()->repeats) { hash_combine(val, pr.first); // top hash_combine(val, pr.second.bounds); } - } else if (left.graph) { - hash_combine(val, hash_holder(*left.graph)); + } else if (left.graph()) { + hash_combine(val, hash_holder(*left.graph())); } return val; @@ -180,33 +180,24 @@ private: }; /** - * Trivial Rose comparator intended to find graphs that are identical except - * for their report IDs. Relies on vertex and edge indices to pick up graphs - * that have been messily put together in different orderings... + * Intended to find graphs that are identical except for their report + * IDs. Relies on vertex and edge indices to pick up graphs that have been + * messily put together in different orderings. Only implemented for castles and + * holders. */ -struct RoseComparator { - explicit RoseComparator(const RoseGraph &g_in) : g(g_in) {} - - bool operator()(const RoseVertex u, const RoseVertex v) const { - const LeftEngInfo &u_left = g[u].left; - const LeftEngInfo &v_left = g[v].left; - - if (u_left.castle && v_left.castle) { - return is_equal(*u_left.castle, u_left.leftfix_report, - *v_left.castle, v_left.leftfix_report); - } - - if (!u_left.graph || !v_left.graph) { - return false; - } - - return is_equal(*u_left.graph, u_left.leftfix_report, *v_left.graph, - v_left.leftfix_report); +static +bool is_equal(const left_id &u_left, ReportID u_report, + const left_id &v_left, ReportID v_report) { + if (u_left.castle() && v_left.castle()) { + return is_equal(*u_left.castle(), u_report, *v_left.castle(), v_report); } -private: - const RoseGraph &g; -}; + if (!u_left.graph() || !v_left.graph()) { + return false; + } + + return is_equal(*u_left.graph(), u_report, *v_left.graph(), v_report); +} } // namespace @@ -253,8 +244,6 @@ bool dedupeLeftfixes(RoseBuildImpl &tbi) { DEBUG_PRINTF("collected %zu rose groups\n", roses.size()); - const RoseComparator rosecmp(g); - // Walk groups and dedupe the roses therein. for (deque &verts : roses | map_values) { DEBUG_PRINTF("group has %zu vertices\n", verts.size()); @@ -272,7 +261,9 @@ bool dedupeLeftfixes(RoseBuildImpl &tbi) { // Scan the rest of the list for dupes. for (auto kt = std::next(jt); kt != jte; ++kt) { - if (g[v].left == g[*kt].left || !rosecmp(v, *kt)) { + if (g[v].left == g[*kt].left + || !is_equal(g[v].left, g[v].left.leftfix_report, + g[*kt].left, g[*kt].left.leftfix_report)) { continue; } @@ -1346,6 +1337,21 @@ void chunk(vector in, vector> *out, size_t chunk_size) { } } +static +insertion_ordered_map> get_eng_verts(RoseGraph &g) { + insertion_ordered_map> eng_verts; + for (auto v : vertices_range(g)) { + const auto &left = g[v].left; + if (!left) { + continue; + } + assert(contains(all_reports(left), left.leftfix_report)); + eng_verts[left].push_back(v); + } + + return eng_verts; +} + /** * This pass attempts to merge prefix/infix engines which share a common set of * parent vertices. @@ -1377,19 +1383,11 @@ void mergeLeftfixesVariableLag(RoseBuildImpl &build) { RoseGraph &g = build.g; - insertion_ordered_map> eng_verts; - DEBUG_PRINTF("-----\n"); DEBUG_PRINTF("entry\n"); DEBUG_PRINTF("-----\n"); - for (auto v : vertices_range(g)) { - const auto &left = g[v].left; - if (!left) { - continue; - } - eng_verts[left].push_back(v); - } + auto eng_verts = get_eng_verts(g); map> engine_groups; for (const auto &e : eng_verts) { @@ -1511,13 +1509,10 @@ namespace { * Key used to group sets of leftfixes for the dedupeLeftfixesVariableLag path. */ struct DedupeLeftKey { - DedupeLeftKey(const RoseBuildImpl &build, RoseVertex v) - : left_hash(hashLeftfix(build.g[v].left)), - transient(contains(build.transient, build.g[v].left)) { - const auto &g = build.g; - for (const auto &e : in_edges_range(v, g)) { - preds.emplace(g[source(e, g)].index, g[e].rose_top); - } + DedupeLeftKey(const RoseBuildImpl &build, + flat_set> preds_in, const left_id &left) + : left_hash(hashLeftfix(left)), preds(move(preds_in)), + transient(contains(build.transient, left)) { } bool operator<(const DedupeLeftKey &b) const { @@ -1531,7 +1526,7 @@ private: size_t left_hash; /** For each in-edge, the pair of (parent index, edge top). */ - set> preds; + flat_set> preds; /** We don't want to combine transient with non-transient. */ bool transient; @@ -1539,6 +1534,15 @@ private: } // namespace +static +flat_set> get_pred_tops(RoseVertex v, const RoseGraph &g) { + flat_set> preds; + for (const auto &e : in_edges_range(v, g)) { + preds.emplace(g[source(e, g)].index, g[e].rose_top); + } + return preds; +} + /** * This is a generalisation of \ref dedupeLeftfixes which relaxes two * restrictions: multiple predecessor roles are allowed and the delay used by @@ -1558,83 +1562,97 @@ private: * * Note: this is unable to dedupe when delayed literals are involved unlike * dedupeLeftfixes. - * - * Note: in block mode we restrict the dedupe of prefixes further as some of - * logic checks are shared with the mergeLeftfix functions. */ void dedupeLeftfixesVariableLag(RoseBuildImpl &build) { - map roseGrouping; - DEBUG_PRINTF("entry\n"); RoseGraph &g = build.g; - for (auto v : vertices_range(g)) { - if (!g[v].left) { + auto eng_verts = get_eng_verts(g); + + map> engine_groups; + for (const auto &e : eng_verts) { + const left_id &left = e.first; + const auto &verts = e.second; + + /* There should only be one report on an engine as no merges have + * happened yet. (aside from eod prefixes) */ + if (all_reports(left).size() != 1) { + assert(any_of_in(adjacent_vertices_range(verts.front(), g), + [&](RoseVertex w) { return g[w].eod_accept; })); continue; } - const left_id leftfix(g[v].left); - - if (leftfix.haig()) { - /* TODO: allow merging of identical haigs */ + if (left.haig()) { + /* TODO: allow deduping of identical haigs */ continue; } - if (leftfix.graph()) { + if (left.graph()) { /* we should not have merged yet */ - assert(!is_triggered(*leftfix.graph()) - || onlyOneTop(*leftfix.graph())); + assert(!is_triggered(*left.graph()) || onlyOneTop(*left.graph())); } - roseGrouping[DedupeLeftKey(build, v)].insert(leftfix, v); + auto preds = get_pred_tops(verts.front(), g); + for (RoseVertex v : verts) { + if (preds != get_pred_tops(v, g)) { + DEBUG_PRINTF("distinct pred sets\n"); + continue; + } + } + engine_groups[DedupeLeftKey(build, move(preds), left)].push_back(left); } - for (RoseBouquet &roses : roseGrouping | map_values) { - DEBUG_PRINTF("group of %zu roses\n", roses.size()); + /* We don't bother chunking as we expect deduping to be successful if the + * hashes match */ - if (roses.size() < 2) { + for (auto &group : engine_groups | map_values) { + DEBUG_PRINTF("group of %zu roses\n", group.size()); + + if (group.size() < 2) { continue; } - const RoseComparator rosecmp(g); - - for (auto it = roses.begin(); it != roses.end(); ++it) { + for (auto it = group.begin(); it != group.end(); ++it) { left_id r1 = *it; - const deque &verts1 = roses.vertices(r1); + vector &verts1 = eng_verts[r1]; + assert(!verts1.empty()); /* cleared engines should be behind us */ - for (auto jt = next(it); jt != roses.end(); ++jt) { + assert(all_reports(r1).size() == 1); + ReportID r1_report = *all_reports(r1).begin(); + + for (auto jt = next(it); jt != group.end(); ++jt) { left_id r2 = *jt; - const deque &verts2 = roses.vertices(r2); + vector &verts2 = eng_verts[r2]; + assert(!verts2.empty()); + assert(all_reports(r2).size() == 1); + ReportID r2_report = *all_reports(r2).begin(); - if (!rosecmp(verts1.front(), verts2.front())) { + if (!is_equal(r1, r1_report, r2, r2_report)) { continue; } - if (!mergeableRoseVertices(build, verts1, verts2)) { + if (!checkVerticesOkForLeftfixMerge(build, verts1, verts2)) { continue; } DEBUG_PRINTF("%p and %p are dupes\n", r1.graph(), r2.graph()); - // Replace h1 with h2. - - const LeftEngInfo &v2_left = g[verts2.front()].left; - assert(v2_left.graph.get() == r2.graph()); + // Replace r1 with r2. for (auto v : verts1) { DEBUG_PRINTF("replacing report %u with %u on %zu\n", - g[v].left.leftfix_report, - v2_left.leftfix_report, g[v].index); + r2_report, r1_report, g[v].index); u32 orig_lag = g[v].left.lag; - g[v].left = v2_left; + g[v].left = g[verts2.front()].left; g[v].left.lag = orig_lag; } - roses.insert(r2, verts1); - /* remove stale entry from transient set, if present */ + insert(&verts2, verts2.end(), verts1); + verts1.clear(); + + /* remove stale entry from transient set, if present */ build.transient.erase(r1); - // no need to erase h1 from roses, that would invalidate `it'. break; } } diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index af2af5de..a7332df7 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -750,6 +750,19 @@ set all_tops(const left_id &r) { return {0}; } +set all_reports(const left_id &left) { + assert(left.graph() || left.castle() || left.haig() || left.dfa()); + if (left.graph()) { + return all_reports(*left.graph()); + } else if (left.castle()) { + return all_reports(*left.castle()); + } else if (left.dfa()) { + return all_reports(*left.dfa()); + } else { + return all_reports(*left.haig()); + } +} + u32 num_tops(const left_id &r) { return all_tops(r).size(); } From 19e95b0314cf7712e9e1b68b085366b558121b30 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 21 Aug 2017 15:25:04 +1000 Subject: [PATCH 180/190] rose_build_matchers: init LitFragment fields Silences Coverity warning about squash, delay_squash. --- src/rose/rose_build_matchers.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rose/rose_build_matchers.h b/src/rose/rose_build_matchers.h index 9668ebc9..ef8999ed 100644 --- a/src/rose/rose_build_matchers.h +++ b/src/rose/rose_build_matchers.h @@ -68,13 +68,13 @@ struct LitFragment { /** * \brief FDR confirm squash mask for included literals. */ - u8 squash; + u8 squash = 0; /** * \brief FDR confirm squash mask for included literals (Delayed * literals only). */ - u8 delay_squash; + u8 delay_squash = 0; /** * \brief Fragment id of included literal. From af519f319066fe480426a5b9cb0602766296719e Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 21 Aug 2017 15:26:59 +1000 Subject: [PATCH 181/190] hwlm_build: default for HWLMProto::make_small Silences Coverity warning. --- src/hwlm/hwlm_build.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hwlm/hwlm_build.h b/src/hwlm/hwlm_build.h index 4aefc364..91f227dc 100644 --- a/src/hwlm/hwlm_build.h +++ b/src/hwlm/hwlm_build.h @@ -81,7 +81,7 @@ struct HWLMProto { /** * \brief Flag to optimise matcher for small size from Rose. */ - bool make_small; + bool make_small = false; HWLMProto(u8 engType_in, std::vector lits_in); From 5fc2c803a29220173e871eb4c07cf2987971f28f Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Mon, 21 Aug 2017 15:12:36 +1000 Subject: [PATCH 182/190] teddy: alignment decl should match defn Spotted by coverity. #174512 --- src/fdr/teddy_runtime_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fdr/teddy_runtime_common.h b/src/fdr/teddy_runtime_common.h index 5332423e..1dbeb097 100644 --- a/src/fdr/teddy_runtime_common.h +++ b/src/fdr/teddy_runtime_common.h @@ -42,7 +42,7 @@ extern const u8 ALIGN_DIRECTIVE p_mask_arr[17][32]; #if defined(HAVE_AVX2) -extern const u8 ALIGN_DIRECTIVE p_mask_arr256[33][64]; +extern const u8 ALIGN_AVX_DIRECTIVE p_mask_arr256[33][64]; #endif #ifdef ARCH_64_BIT From 3b63a95f016d462ba01a2acb615ad65b3c7f1c86 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Mon, 21 Aug 2017 15:23:21 +1000 Subject: [PATCH 183/190] Handle any exceptions while constructing compiler elements Specifically, NG has a Rose which has a LeftEng which has a depth, which can throw an error on construction. If we put these in the try-catch we don't have to worry so much in future. --- src/hs.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/hs.cpp b/src/hs.cpp index 9305c924..c2143fe3 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -227,10 +227,10 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags, target_t target_info = platform ? target_t(*platform) : get_current_target(); - CompileContext cc(isStreaming, isVectored, target_info, g); - NG ng(cc, elements, somPrecision); - try { + CompileContext cc(isStreaming, isVectored, target_info, g); + NG ng(cc, elements, somPrecision); + for (unsigned int i = 0; i < elements; i++) { // Add this expression to the compiler try { From ba0bf0c991075f75e6ea017fa8bc2e73902f4940 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 15 Aug 2017 13:57:43 +1000 Subject: [PATCH 184/190] rose_build_add_mask: improve findMaskLiteral perf --- src/rose/rose_build_add_mask.cpp | 57 ++++++++++++++++++-------------- src/util/ue2string.cpp | 26 +++++++++------ src/util/ue2string.h | 3 ++ 3 files changed, 52 insertions(+), 34 deletions(-) diff --git a/src/rose/rose_build_add_mask.cpp b/src/rose/rose_build_add_mask.cpp index c60c053e..0a7e44c3 100644 --- a/src/rose/rose_build_add_mask.cpp +++ b/src/rose/rose_build_add_mask.cpp @@ -144,7 +144,7 @@ void findMaskLiteral(const vector &mask, bool streaming, } static -bool initFmlCandidates(const CharReach &cr, vector *cand) { +bool initFmlCandidates(const CharReach &cr, vector &cand) { for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) { char c = (char)i; bool nocase = myisupper(c) && cr.test(mytolower(c)); @@ -152,24 +152,25 @@ bool initFmlCandidates(const CharReach &cr, vector *cand) { continue; } - if (cand->size() >= MAX_MASK_LITS) { + if (cand.size() >= MAX_MASK_LITS) { DEBUG_PRINTF("hit lit limit of %u\n", MAX_MASK_LITS); return false; } - cand->emplace_back(c, nocase); + cand.emplace_back(c, nocase); } - assert(cand->size() <= MAX_MASK_LITS); - return !cand->empty(); + assert(cand.size() <= MAX_MASK_LITS); + return !cand.empty(); } static -bool expandFmlCandidates(const CharReach &cr, vector *cand) { +bool expandFmlCandidates(const CharReach &cr, vector &curr, + vector &cand) { DEBUG_PRINTF("expanding string with cr of %zu\n", cr.count()); - DEBUG_PRINTF(" current cand list size %zu\n", cand->size()); + DEBUG_PRINTF(" current cand list size %zu\n", cand.size()); - vector curr; + curr.clear(); for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) { char c = (char)i; @@ -178,14 +179,14 @@ bool expandFmlCandidates(const CharReach &cr, vector *cand) { continue; } - for (const auto &lit : *cand) { + for (const auto &lit : cand) { if (curr.size() >= MAX_MASK_LITS) { DEBUG_PRINTF("hit lit limit of %u\n", MAX_MASK_LITS); return false; } - curr.emplace_back(c, nocase); - curr.back() += lit; + curr.push_back(lit); + curr.back().push_back(c, nocase); } } @@ -196,7 +197,7 @@ bool expandFmlCandidates(const CharReach &cr, vector *cand) { } assert(curr.size() <= MAX_MASK_LITS); - cand->swap(curr); + cand.swap(curr); return true; } @@ -213,6 +214,7 @@ u32 scoreFmlCandidates(const vector &cand) { u32 min_period = len; for (const auto &lit : cand) { + DEBUG_PRINTF("candidate: %s\n", dumpString(lit).c_str()); u32 period = lit.length() - maxStringSelfOverlap(lit); min_period = min(min_period, period); } @@ -238,31 +240,37 @@ bool findMaskLiterals(const vector &mask, vector *lit, *minBound = 0; *length = 0; - vector candidates, best_candidates; + vector candidates, best_candidates, curr_candidates; u32 best_score = 0; u32 best_minOffset = 0; - vector::const_iterator it, itb, ite; - for (it = itb = mask.begin(), ite = mask.end(); it != ite; ++it) { + + for (auto it = mask.begin(); it != mask.end(); ++it) { candidates.clear(); - if (!initFmlCandidates(*it, &candidates)) { + if (!initFmlCandidates(*it, candidates)) { DEBUG_PRINTF("failed to init\n"); continue; } DEBUG_PRINTF("++\n"); - vector::const_iterator jt = it; - while (jt != itb) { + auto jt = it; + while (jt != mask.begin()) { --jt; DEBUG_PRINTF("--\n"); - if (!expandFmlCandidates(*jt, &candidates)) { + if (!expandFmlCandidates(*jt, curr_candidates, candidates)) { DEBUG_PRINTF("expansion stopped\n"); break; } } + + // Candidates have been expanded in reverse order. + for (auto &cand : candidates) { + cand = reverse_literal(cand); + } + u32 score = scoreFmlCandidates(candidates); DEBUG_PRINTF("scored %u for literal set of size %zu\n", score, candidates.size()); if (!candidates.empty() && score >= best_score) { - best_minOffset = it - itb - candidates.back().length() + 1; + best_minOffset = it - mask.begin() - candidates.back().length() + 1; best_candidates.swap(candidates); best_score = score; } @@ -277,11 +285,12 @@ bool findMaskLiterals(const vector &mask, vector *lit, *length = best_candidates.back().length(); DEBUG_PRINTF("best minbound %u length %u\n", *minBound, *length); - for (const auto &cand : best_candidates) { - assert(cand.length() == *length); - lit->push_back(cand); - } + assert(all_of_in(best_candidates, [&](const ue2_literal &s) { + return s.length() == *length; + })); + + *lit = std::move(best_candidates); return true; } diff --git a/src/util/ue2string.cpp b/src/util/ue2string.cpp index 39e1edbd..98b007d4 100644 --- a/src/util/ue2string.cpp +++ b/src/util/ue2string.cpp @@ -291,18 +291,24 @@ void ue2_literal::push_back(char c, bool nc) { s.push_back(c); } +void ue2_literal::reverse() { + std::reverse(s.begin(), s.end()); + + const size_t len = nocase.size(); + for (size_t i = 0; i < len / 2; i++) { + size_t j = len - i - 1; + bool a = nocase.test(i); + bool b = nocase.test(j); + nocase.set(i, b); + nocase.set(j, a); + } +} + // Return a copy of this literal in reverse order. ue2_literal reverse_literal(const ue2_literal &in) { - ue2_literal rv; - if (in.empty()) { - return rv; - } - - for (ue2_literal::const_iterator it = in.end(); it != in.begin();) { - --it; - rv.push_back(it->c, it->nocase); - } - return rv; + auto out = in; + out.reverse(); + return out; } bool ue2_literal::operator<(const ue2_literal &b) const { diff --git a/src/util/ue2string.h b/src/util/ue2string.h index 44f1f53f..0fa76c3a 100644 --- a/src/util/ue2string.h +++ b/src/util/ue2string.h @@ -191,6 +191,9 @@ public: return a; } + /// Reverse this literal in-place. + void reverse(); + void operator+=(const ue2_literal &b); bool operator==(const ue2_literal &b) const { return s == b.s && nocase == b.nocase; From dd286323a99ddc4d9da5ddb294dc05955f194499 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 23 Aug 2017 11:54:31 +1000 Subject: [PATCH 185/190] Don't let haigs into Tamarama --- src/rose/rose_build_bytecode.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index d3ae52bf..9a546ae4 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -1445,6 +1445,10 @@ void findExclusiveInfixes(RoseBuildImpl &build, build_context &bc, continue; } + if (leftfix.haig()) { + continue; + } + if (leftfix.graph() || leftfix.castle()) { leftfixes.emplace(leftfix, role_id); vertex_map[role_id].push_back(v); @@ -1881,6 +1885,10 @@ void findExclusiveSuffixes(RoseBuildImpl &tbi, build_context &bc, continue; } + if (s.haig()) { + continue; + } + // Currently disable eod suffixes for exclusive analysis if (!tbi.isInETable(v) && (s.graph() || s.castle())) { DEBUG_PRINTF("assigning %p to id %u\n", s.graph(), role_id); From ae918116ab1c7e27613c674d2301076417d34f68 Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Tue, 29 Aug 2017 03:42:46 +0800 Subject: [PATCH 186/190] find_better_daddy: position change --- src/nfa/mcclellancompile.cpp | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index ff18a68a..d705ddf9 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -940,7 +940,6 @@ bytecode_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, set *accel_states) { assert(!is_dead(raw)); - u16 total_daddy = 0; dfa_info info(strat); bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256; @@ -950,21 +949,24 @@ bytecode_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, } bool has_eod_reports = raw.hasEodReports(); - bool any_cyclic_near_anchored_state = is_cyclic_near(raw, - raw.start_anchored); - - for (u32 i = 0; i < info.size(); i++) { - find_better_daddy(info, i, using8bit, any_cyclic_near_anchored_state, - trust_daddy_states, cc.grey); - total_daddy += info.extra[i].daddytaken; - } - - DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy, - info.size() * info.impl_alpha_size, info.size(), - info.impl_alpha_size); bytecode_ptr nfa; if (!using8bit) { + u16 total_daddy = 0; + bool any_cyclic_near_anchored_state + = is_cyclic_near(raw, raw.start_anchored); + + for (u32 i = 0; i < info.size(); i++) { + find_better_daddy(info, i, using8bit, + any_cyclic_near_anchored_state, + trust_daddy_states, cc.grey); + total_daddy += info.extra[i].daddytaken; + } + + DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy, + info.size() * info.impl_alpha_size, info.size(), + info.impl_alpha_size); + nfa = mcclellanCompile16(info, cc, accel_states); } else { nfa = mcclellanCompile8(info, cc, accel_states); From 1a812637443772a188a52827e88b63350b663efc Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Mon, 18 Sep 2017 15:07:53 +1000 Subject: [PATCH 187/190] Check for unused typedef warning and disable This affects older versions of Boost (1.58), and we were only disabling the warning for g++. Fixes #62. --- CMakeLists.txt | 4 +++- unit/CMakeLists.txt | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2520832b..b07aa103 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -255,7 +255,7 @@ else() endif() if(CMAKE_COMPILER_IS_GNUCXX) - set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-unused-local-typedefs -Wno-maybe-uninitialized") + set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-maybe-uninitialized") if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0) set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fabi-version=0") endif () @@ -404,6 +404,8 @@ if (CXX_MISSING_DECLARATIONS) set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wmissing-declarations") endif() +CHECK_CXX_COMPILER_FLAG("-Wunused-local-typedefs" CXX_UNUSED_LOCAL_TYPEDEFS) + # gcc5 complains about this CHECK_CXX_COMPILER_FLAG("-Wunused-variable" CXX_WUNUSED_VARIABLE) diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index e9d1821b..06cddebd 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -20,6 +20,10 @@ if(CXX_WUNUSED_VARIABLE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-variable") endif() +if (CXX_UNUSED_LOCAL_TYPEDEFS) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-local-typedefs") +endif() + if(CMAKE_COMPILER_IS_GNUCC) # spurious warnings? set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-array-bounds") From 5021d7ab78c5867131608b8b06bf84afa035c15d Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Fri, 22 Sep 2017 14:23:05 +1000 Subject: [PATCH 188/190] De-const the empty maps. Clang 3.8 complains about attempting to default init const objects without a user-provided constructor. --- src/nfagraph/ng_limex.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nfagraph/ng_limex.cpp b/src/nfagraph/ng_limex.cpp index c4147a30..922100e7 100644 --- a/src/nfagraph/ng_limex.cpp +++ b/src/nfagraph/ng_limex.cpp @@ -851,8 +851,8 @@ u32 countAccelStates(const NGHolder &g, const ReportManager *rm, // Should have no bearing on accel calculation, so we leave these empty. const set zombies; - const unordered_map reportSquashMap; - const unordered_map squashMap; + unordered_map reportSquashMap; + unordered_map squashMap; return countAccelStates(*h, state_ids, repeats, reportSquashMap, squashMap, tops, zombies, cc); From b2e09d78b0914efedfd61feb64bd196ebcf967a0 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 20 Sep 2017 14:57:36 +1000 Subject: [PATCH 189/190] changelog: updates for 4.6.0 release --- CHANGELOG.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e28e3b1..97b311e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,17 @@ This is a list of notable changes to Hyperscan, in reverse chronological order. +## [4.6.0] 2017-09-22 +- New API feature: stream state compression. This allows the user to compress + and restore state for streams to reduce memory usage. +- Many improvements to literal matching performance, including more support + for Intel(R) Advanced Vector Extensions 512 (Intel(R) AVX-512). +- Compile time improvements, mainly reducing compiler memory allocation. + Also results in reduced compile time for some pattern sets. +- Bugfix for issue #62: fix error building Hyperscan using older versions of + Boost. +- Small updates to fix warnings identified by Coverity. + ## [4.5.2] 2017-07-26 - Bugfix for issue #57: Treat characters between `\Q.\E` as codepoints in UTF8 mode. From 3fa1236f09813925c4ff93fdf08df721b32b83c5 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 20 Sep 2017 14:57:46 +1000 Subject: [PATCH 190/190] Bump version number for release --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b07aa103..59a3292b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,8 +2,8 @@ cmake_minimum_required (VERSION 2.8.11) project (hyperscan C CXX) set (HS_MAJOR_VERSION 4) -set (HS_MINOR_VERSION 5) -set (HS_PATCH_VERSION 2) +set (HS_MINOR_VERSION 6) +set (HS_PATCH_VERSION 0) set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION}) set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)