From e58a33c9cbdcccae3940137ff63bde734939b76d Mon Sep 17 00:00:00 2001 From: EaseTheWorld Date: Thu, 26 Jan 2017 08:42:53 +0900 Subject: [PATCH 001/326] Change sqlite3_errstr to sqlite3_errmsg sqlite3_errstr was introduced in 3.7.15 (http://sqlite.org/changes.html) which breaks build in redhat6 (sqlite3 ver 3.6.X) and in line 77, sqlite3_errmsg is already used. --- tools/hsbench/data_corpus.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/hsbench/data_corpus.cpp b/tools/hsbench/data_corpus.cpp index 55bfe93a..8e761ec3 100644 --- a/tools/hsbench/data_corpus.cpp +++ b/tools/hsbench/data_corpus.cpp @@ -110,7 +110,7 @@ vector readCorpus(const string &filename) { if (status != SQLITE_DONE) { ostringstream oss; oss << "Error retrieving blocks from corpus: " - << sqlite3_errstr(status); + << sqlite3_errmsg(db); status = sqlite3_finalize(statement); assert(status == SQLITE_OK); From 22edaad1dd2a77633da864578bf564610551f72b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 6 Dec 2016 15:56:27 +1100 Subject: [PATCH 002/326] fdr: compile algo/heuristics improvements These changes fix a small bug in the algorithm used for bucket assignment in FDR's compile process, and also tweak a few of the heuristics governing it. --- src/fdr/fdr_compile.cpp | 282 +++++++++++++++++++++++----------------- 1 file changed, 165 insertions(+), 117 deletions(-) diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index 937513a8..f7451492 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -53,13 +53,16 @@ #include #include #include +#include #include #include +#include #include #include #include #include +#include using namespace std; @@ -71,12 +74,11 @@ class FDRCompiler : boost::noncopyable { private: const FDREngineDescription ŋ vector tab; - const vector &lits; + vector lits; map > bucketToLits; bool make_small; u8 *tabIndexToMask(u32 indexInTable); - void assignStringToBucket(LiteralIndex l, BucketIndex b); void assignStringsToBuckets(); #ifdef DEBUG void dumpMasks(const u8 *defaultMask); @@ -86,9 +88,9 @@ private: void createInitialState(FDR *fdr); public: - FDRCompiler(const vector &lits_in, - const FDREngineDescription &eng_in, bool make_small_in) - : eng(eng_in), tab(eng_in.getTabSizeBytes()), lits(lits_in), + FDRCompiler(vector lits_in, const FDREngineDescription &eng_in, + bool make_small_in) + : eng(eng_in), tab(eng_in.getTabSizeBytes()), lits(move(lits_in)), make_small(make_small_in) {} aligned_unique_ptr build(pair, size_t> &link); @@ -197,66 +199,121 @@ FDRCompiler::setupFDR(pair, size_t> &link) { return fdr; } -void FDRCompiler::assignStringToBucket(LiteralIndex l, BucketIndex b) { - bucketToLits[b].push_back(l); +//#define DEBUG_ASSIGNMENT + +static +double getScoreUtil(u32 len, u32 count) { + return len == 0 ? numeric_limits::max() + : pow(count, 1.05) * pow(len, -3.0); } -struct LitOrder { - explicit LitOrder(const vector &vl_) : vl(vl_) {} - bool operator()(const u32 &i1, const u32 &i2) const { - const string &i1s = vl[i1].s; - const string &i2s = vl[i2].s; +/** + * Returns true if the two given literals should be placed in the same chunk as + * they are identical except for a difference in caselessness. + */ +static +bool isEquivLit(const hwlmLiteral &a, const hwlmLiteral &b, + const hwlmLiteral *last_nocase_lit) { + const size_t a_len = a.s.size(); + const size_t b_len = b.s.size(); - size_t len1 = i1s.size(), len2 = i2s.size(); + if (a_len != b_len) { + return false; + } - if (len1 != len2) { - return len1 < len2; - } else { - auto p = std::mismatch(i1s.rbegin(), i1s.rend(), i2s.rbegin()); - if (p.first == i1s.rend()) { - return false; + bool nocase = last_nocase_lit && a_len == last_nocase_lit->s.size() && + !cmp(a.s.c_str(), last_nocase_lit->s.c_str(), a_len, true); + return !cmp(a.s.c_str(), b.s.c_str(), a.s.size(), nocase); +} + +struct Chunk { + Chunk(u32 first_id_in, u32 count_in, u32 length_in) + : first_id(first_id_in), count(count_in), length(length_in) {} + u32 first_id; //!< first id in this chunk + u32 count; //!< how many are in this chunk + u32 length; //!< how long things in the chunk are +}; + +static +vector assignChunks(const vector &lits, + const map &lenCounts) { + const u32 CHUNK_MAX = 512; + const u32 MAX_CONSIDERED_LENGTH = 16; + + // TODO: detailed early stage literal analysis for v. small cases (actually + // look at lits) yes - after we factor this out and merge in the Teddy + // style of building we can look at this, although the teddy merge + // modelling is quite different. It's still probably adaptable to some + // extent for this class of problem. + + vector chunks; + chunks.reserve(CHUNK_MAX); + + const u32 maxPerChunk = lits.size() / + (CHUNK_MAX - MIN(MAX_CONSIDERED_LENGTH, lenCounts.size())) + 1; + + u32 currentSize = 0; + u32 chunkStartID = 0; + const hwlmLiteral *last_nocase_lit = nullptr; + + for (u32 i = 0; i < lits.size() && chunks.size() < CHUNK_MAX - 1; i++) { + const auto &lit = lits[i]; + + DEBUG_PRINTF("i=%u, lit=%s%s\n", i, escapeString(lit.s).c_str(), + lit.nocase ? " (nocase)" : ""); + + // If this literal is identical to the last one (aside from differences + // in caselessness), keep going even if we will "overfill" a chunk; we + // don't want to split identical literals into different buckets. + if (i != 0 && isEquivLit(lit, lits[i - 1], last_nocase_lit)) { + DEBUG_PRINTF("identical lit\n"); + goto next_literal; + } + + if ((currentSize < MAX_CONSIDERED_LENGTH && + (lit.s.size() != currentSize)) || + (currentSize != 1 && ((i - chunkStartID) >= maxPerChunk))) { + currentSize = lit.s.size(); + if (!chunks.empty()) { + chunks.back().count = i - chunkStartID; } - return *p.first < *p.second; + chunkStartID = i; + chunks.emplace_back(i, 0, currentSize); + } +next_literal: + if (lit.nocase) { + last_nocase_lit = &lit; } } -private: - const vector &vl; -}; + assert(!chunks.empty()); + chunks.back().count = lits.size() - chunkStartID; + // close off chunks with an empty row + chunks.emplace_back(lits.size(), 0, 0); -static u64a getScoreUtil(u32 len, u32 count) { - if (len == 0) { - return (u64a)-1; +#ifdef DEBUG_ASSIGNMENT + for (size_t j = 0; j < chunks.size(); j++) { + const auto &chunk = chunks[j]; + printf("chunk %zu first_id=%u count=%u length=%u\n", j, chunk.first_id, + chunk.count, chunk.length); } - const u32 LEN_THRESH = 128; - const u32 elen = (len > LEN_THRESH) ? LEN_THRESH : len; - const u64a lenScore = - (LEN_THRESH * LEN_THRESH * LEN_THRESH) / (elen * elen * elen); - return count * lenScore; // deemphasize count - possibly more than needed - // this might be overkill in the other direction +#endif + + DEBUG_PRINTF("built %zu chunks (%zu lits)\n", chunks.size(), lits.size()); + assert(chunks.size() <= CHUNK_MAX); + return chunks; } -//#define DEBUG_ASSIGNMENT void FDRCompiler::assignStringsToBuckets() { - typedef u64a SCORE; // 'Score' type - const SCORE MAX_SCORE = (SCORE)-1; - const u32 CHUNK_MAX = 512; - const u32 BUCKET_MAX = 16; - typedef pair SCORE_INDEX_PAIR; + const double MAX_SCORE = numeric_limits::max(); - u32 ls = verify_u32(lits.size()); - assert(ls); // Shouldn't be called with no literals. + assert(!lits.empty()); // Shouldn't be called with no literals. - // make a vector that contains our literals as pointers or u32 LiteralIndex values - vector vli; - vli.resize(ls); + // Count the number of literals for each length. map lenCounts; - for (LiteralIndex l = 0; l < ls; l++) { - vli[l] = l; - lenCounts[lits[l].s.size()]++; + for (const auto &lit : lits) { + lenCounts[lit.s.size()]++; } - // sort vector by literal length + if tied on length, 'magic' criteria of some kind (tbd) - stable_sort(vli.begin(), vli.end(), LitOrder(lits)); #ifdef DEBUG_ASSIGNMENT for (const auto &m : lenCounts) { @@ -265,103 +322,94 @@ void FDRCompiler::assignStringsToBuckets() { printf("\n"); #endif - // TODO: detailed early stage literal analysis for v. small cases (actually look at lits) - // yes - after we factor this out and merge in the Teddy style of building we can look - // at this, although the teddy merge modelling is quite different. It's still probably - // adaptable to some extent for this class of problem + // Sort literals by literal length. If tied on length, use lexicographic + // ordering (of the reversed literals). + stable_sort(lits.begin(), lits.end(), + [](const hwlmLiteral &a, const hwlmLiteral &b) { + if (a.s.size() != b.s.size()) { + return a.s.size() < b.s.size(); + } + auto p = mismatch(a.s.rbegin(), a.s.rend(), b.s.rbegin()); + if (p.first != a.s.rend()) { + return *p.first < *p.second; + } + // Sort caseless variants first. + return a.nocase > b.nocase; + }); - u32 firstIds[CHUNK_MAX]; // how many are in this chunk (CHUNK_MAX - 1 contains 'last' bound) - u32 count[CHUNK_MAX]; // how many are in this chunk - u32 length[CHUNK_MAX]; // how long things in the chunk are + vector chunks = assignChunks(lits, lenCounts); - const u32 MAX_CONSIDERED_LENGTH = 16; - u32 currentChunk = 0; - u32 currentSize = 0; - u32 chunkStartID = 0; - u32 maxPerChunk = ls/(CHUNK_MAX - MIN(MAX_CONSIDERED_LENGTH, lenCounts.size())) + 1; + const u32 numChunks = chunks.size(); + const u32 numBuckets = eng.getNumBuckets(); - for (u32 i = 0; i < ls && currentChunk < CHUNK_MAX - 1; i++) { - LiteralIndex l = vli[i]; - if ((currentSize < MAX_CONSIDERED_LENGTH && (lits[l].s.size() != currentSize)) || - (currentSize != 1 && ((i - chunkStartID) >= maxPerChunk))) { - currentSize = lits[l].s.size(); - if (currentChunk) { - count[currentChunk - 1 ] = i - chunkStartID; - } - chunkStartID = firstIds[currentChunk] = i; - length[currentChunk] = currentSize; - currentChunk++; - } - } + // 2D array of (score, chunk index) pairs, indexed by + // [chunk_index][bucket_index]. + boost::multi_array, 2> t( + boost::extents[numChunks][numBuckets]); - assert(currentChunk > 0); - count[currentChunk - 1] = ls - chunkStartID; - // close off chunks with an empty row - firstIds[currentChunk] = ls; - length[currentChunk] = 0; - count[currentChunk] = 0; - u32 nChunks = currentChunk + 1; - -#ifdef DEBUG_ASSIGNMENT - for (u32 j = 0; j < nChunks; j++) { - printf("%d %d %d %d\n", j, firstIds[j], count[j], length[j]); - } -#endif - - SCORE_INDEX_PAIR t[CHUNK_MAX][BUCKET_MAX]; // pair of score, index - u32 nb = eng.getNumBuckets(); - - for (u32 j = 0; j < nChunks; j++) { + for (u32 j = 0; j < numChunks; j++) { u32 cnt = 0; - for (u32 k = j; k < nChunks; ++k) { - cnt += count[k]; + for (u32 k = j; k < numChunks; ++k) { + cnt += chunks[k].count; } - t[j][0] = {getScoreUtil(length[j], cnt), 0}; + t[j][0] = {getScoreUtil(chunks[j].length, cnt), 0}; } - for (u32 i = 1; i < nb; i++) { - for (u32 j = 0; j < nChunks - 1; j++) { // don't process last, empty row - SCORE_INDEX_PAIR best = {MAX_SCORE, 0}; - u32 cnt = count[j]; - for (u32 k = j + 1; k < nChunks - 1; k++, cnt += count[k]) { - SCORE score = getScoreUtil(length[j], cnt); + for (u32 i = 1; i < numBuckets; i++) { + for (u32 j = 0; j < numChunks - 1; j++) { // don't do last, empty row + pair best = {MAX_SCORE, 0}; + u32 cnt = chunks[j].count; + for (u32 k = j + 1; k < numChunks - 1; k++) { + auto score = getScoreUtil(chunks[j].length, cnt); if (score > best.first) { - break; // if we're now worse locally than our best score, give up + break; // now worse locally than our best score, give up } score += t[k][i-1].first; if (score < best.first) { best = {score, k}; } + cnt += chunks[k].count; } t[j][i] = best; } - t[nChunks - 1][i] = {0,0}; // fill in empty final row for next iteration + t[numChunks - 1][i] = {0,0}; // fill in empty final row for next iter } #ifdef DEBUG_ASSIGNMENT - for (u32 j = 0; j < nChunks; j++) { - for (u32 i = 0; i < nb; i++) { - SCORE_INDEX_PAIR v = t[j][i]; - printf("<%7lld,%3d>", v.first, v.second); + for (u32 j = 0; j < numChunks; j++) { + printf("%03u: ", j); + for (u32 i = 0; i < numBuckets; i++) { + const auto &v = t[j][i]; + printf("<%0.3f,%3d> ", v.first, v.second); } printf("\n"); } #endif - // our best score is in best[0][N_BUCKETS-1] and we can follow the links + // our best score is in t[0][N_BUCKETS-1] and we can follow the links // to find where our buckets should start and what goes into them - for (u32 i = 0, n = nb; n && (i != nChunks - 1); n--) { + for (u32 i = 0, n = numBuckets; n && (i != numChunks - 1); n--) { u32 j = t[i][n - 1].second; if (j == 0) { - j = nChunks - 1; + j = numChunks - 1; } - // put chunks between i - j into bucket (NBUCKETS-1) - n -#ifdef DEBUG_ASSIGNMENT - printf("placing from %d to %d in bucket %d\n", firstIds[i], firstIds[j], - nb - n); -#endif - for (u32 k = firstIds[i]; k < firstIds[j]; k++) { - assignStringToBucket((LiteralIndex)vli[k], nb - n); + + // put chunks between i - j into bucket (numBuckets - n). + u32 first_id = chunks[i].first_id; + u32 last_id = chunks[j].first_id; + assert(first_id < last_id); + u32 bucket = numBuckets - n; + UNUSED const auto &first_lit = lits[first_id]; + UNUSED const auto &last_lit = lits[last_id - 1]; + DEBUG_PRINTF("placing [%u-%u) in bucket %u (%u lits, len %zu-%zu, " + "score %0.4f)\n", + first_id, last_id, bucket, last_id - first_id, + first_lit.s.length(), last_lit.s.length(), + getScoreUtil(first_lit.s.length(), last_id - first_id)); + + auto &bucket_lits = bucketToLits[bucket]; + for (u32 k = first_id; k < last_id; k++) { + bucket_lits.push_back(k); } i = j; } From 68bdc800fc10b217a36bedd9ce34d54cd052b9d2 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 9 Jan 2017 11:05:02 +1100 Subject: [PATCH 003/326] dump: render literals as regexes (with comments) --- src/rose/rose_build_dump.cpp | 75 ++++++++++++++++++++++++++++++++---- 1 file changed, 67 insertions(+), 8 deletions(-) diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 105ee338..67740312 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -419,22 +419,81 @@ string toHex(Iter i, const Iter &end) { return oss.str(); } +static +bool isMetaChar(char c) { + switch (c) { + case '#': + case '$': + case '(': + case ')': + case '*': + case '+': + case '.': + case '/': + case '?': + case '[': + case '\\': + case ']': + case '^': + case '{': + case '|': + case '}': + return true; + default: + return false; + } +} + +static +string toRegex(const string &lit) { + ostringstream os; + for (char c : lit) { + if (0x20 <= c && c <= 0x7e) { + if (isMetaChar(c)) { + os << "\\" << c; + } else { + os << c; + } + } else if (c == '\n') { + os << "\\n"; + } else if (c == '\r') { + os << "\\r"; + } else if (c == '\t') { + os << "\\t"; + } else { + os << "\\x" << hex << setw(2) << setfill('0') + << (unsigned)(c & 0xff) << dec; + } + } + return os.str(); +} + static void dumpTestLiterals(const string &filename, const vector &lits) { ofstream of(filename.c_str()); + // Unique regex index, as literals may share an ID. + u32 i = 0; + for (const hwlmLiteral &lit : lits) { - of << lit.id << "="; - if (lit.nocase) { - of << "!"; - } - of << toHex(lit.s.begin(), lit.s.end()); + // First, detail in a comment. + of << "# id=" << lit.id; if (!lit.msk.empty()) { - of << " " << toHex(lit.msk.begin(), lit.msk.end()); - of << " " << toHex(lit.cmp.begin(), lit.cmp.end()); + of << " msk=0x" << toHex(lit.msk.begin(), lit.msk.end()); + of << " cmp=0x" << toHex(lit.cmp.begin(), lit.cmp.end()); } + of << " groups=0x" << hex << setfill('0') << lit.groups << dec; + if (lit.noruns) { + of << " noruns"; + } + of << endl; + + // Second, literal rendered as a regex. + of << i << ":/" << toRegex(lit.s) << (lit.nocase ? "/i" : "/"); of << endl; + + i++; } of.close(); From bc2f336d9dff145961161b8705939b46c1c4c484 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Mon, 9 Jan 2017 09:30:03 +1100 Subject: [PATCH 004/326] Work around for deficiency in C++11/14/17 standard As explained to us by STL at Microsoft (the author of their vector), there is a hole in the standard wrt the vector copy constructor, which always exists even if it won't compile. --- src/rose/rose_build_bytecode.cpp | 26 +++++++++++++++++++++++++- src/rose/rose_build_exclusive.h | 19 +------------------ 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 9f4abcad..daf827e9 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -248,6 +248,30 @@ struct build_context : boost::noncopyable { rose_group squashable_groups = 0; }; +/** \brief subengine info including built engine and +* corresponding triggering rose vertices */ +struct ExclusiveSubengine { + aligned_unique_ptr nfa; + vector vertices; +}; + +/** \brief exclusive info to build tamarama */ +struct ExclusiveInfo { + // subengine info + vector subengines; + // all the report in tamarama + set reports; + // assigned queue id + u32 queue; + + // workaround a deficiency in the standard (as explained by STL @ MS) we + // need to tell the compiler that ExclusiveInfo is moveable-only by + // deleting the copy cons so that vector doesn't get confused + ExclusiveInfo() = default; + ExclusiveInfo(const ExclusiveInfo &) = delete; + ExclusiveInfo(ExclusiveInfo &&) = default; +}; + } static diff --git a/src/rose/rose_build_exclusive.h b/src/rose/rose_build_exclusive.h index 9cabb1d2..3269dce6 100644 --- a/src/rose/rose_build_exclusive.h +++ b/src/rose/rose_build_exclusive.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -49,23 +49,6 @@ namespace ue2 { -/** brief subengine info including built engine and - * corresponding triggering rose vertices */ -struct ExclusiveSubengine { - aligned_unique_ptr nfa; - std::vector vertices; -}; - -/** \brief exclusive info to build tamarama */ -struct ExclusiveInfo { - // subengine info - std::vector subengines; - // all the report in tamarama - std::set reports; - // assigned queue id - u32 queue; -}; - /** \brief role info structure for exclusive analysis */ template struct RoleInfo { From 8f1b3c89fac47f3f247876ff9d14036cf8bad2dd Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 20 Jan 2017 13:23:52 +1100 Subject: [PATCH 005/326] rose: remove no-longer-used convertBadLeaves pass --- src/grey.cpp | 6 +- src/grey.h | 4 +- src/rose/rose_build_compile.cpp | 4 +- src/rose/rose_build_convert.cpp | 297 +------------------------------- src/rose/rose_build_convert.h | 3 +- 5 files changed, 5 insertions(+), 309 deletions(-) diff --git a/src/grey.cpp b/src/grey.cpp index 340a34bf..f0374b6d 100644 --- a/src/grey.cpp +++ b/src/grey.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -105,8 +105,6 @@ Grey::Grey(void) : roseGraphReduction(true), roseRoleAliasing(true), roseMasks(true), - roseMaxBadLeafLength(5), - roseConvertInfBadLeaves(true), roseConvertFloodProneSuffixes(true), roseMergeRosesDuringAliasing(true), roseMultiTopRoses(true), @@ -272,8 +270,6 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(roseGraphReduction); G_UPDATE(roseRoleAliasing); G_UPDATE(roseMasks); - G_UPDATE(roseMaxBadLeafLength); - G_UPDATE(roseConvertInfBadLeaves); G_UPDATE(roseConvertFloodProneSuffixes); G_UPDATE(roseMergeRosesDuringAliasing); G_UPDATE(roseMultiTopRoses); diff --git a/src/grey.h b/src/grey.h index 4882af7d..7a6a168b 100644 --- a/src/grey.h +++ b/src/grey.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -118,8 +118,6 @@ struct Grey { bool roseGraphReduction; bool roseRoleAliasing; bool roseMasks; - u32 roseMaxBadLeafLength; - bool roseConvertInfBadLeaves; bool roseConvertFloodProneSuffixes; bool roseMergeRosesDuringAliasing; bool roseMultiTopRoses; diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index e13d7c5c..1237a014 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -1644,8 +1644,6 @@ aligned_unique_ptr RoseBuildImpl::buildRose(u32 minWidth) { dedupeLeftfixes(*this); aliasRoles(*this, false); // Don't merge leftfixes. dedupeLeftfixes(*this); - - convertBadLeaves(*this); uncalcLeaves(*this); /* note the leftfixes which do not need to keep state across stream diff --git a/src/rose/rose_build_convert.cpp b/src/rose/rose_build_convert.cpp index b151c0c9..a15d4dc6 100644 --- a/src/rose/rose_build_convert.cpp +++ b/src/rose/rose_build_convert.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -76,301 +76,6 @@ NFAVertex addHolderVertex(const CharReach &cr, NGHolder &out) { return v; } -// Returns the first and last vertices. -static -pair addLiteralVertices(const RoseGraph &g, - const RoseLiteralMap &literals, - const RoseVertex &t_v, - NGHolder &out) { - // We have limited cases that we support: one literal of arbitrary length, - // or a bunch of literals of length one that just become a vertex with - // their reach unioned together. - - // TODO: generalise this and handle more cases. - - const auto &litids = g[t_v].literals; - if (litids.size() > 1) { - // Multiple literals of len 1. - CharReach v_cr; - for (const auto &lit_id : litids) { - const rose_literal_id &litv = literals.right.at(lit_id); - assert(litv.s.length() == 1); - v_cr |= *litv.s.begin(); - } - - NFAVertex v = addHolderVertex(v_cr, out); - return make_pair(v, v); - } - - // Otherwise, we have a single literal, could be of arbitrary length. - assert(litids.size() == 1); - u32 lit_id = *(litids.begin()); - const rose_literal_id &litv = literals.right.at(lit_id); - assert(!litv.s.empty()); - - ue2_literal::const_iterator it = litv.s.begin(), ite = litv.s.end(); - NFAVertex first = addHolderVertex(*it, out), last = first; - for (++it; it != ite; ++it) { - NFAVertex v = addHolderVertex(*it, out); - add_edge(last, v, out); - last = v; - } - - return make_pair(first, last); -} - -static -unique_ptr convertLeafToHolder(const RoseGraph &g, - const RoseEdge &t_e, - const RoseLiteralMap &literals) { - RoseVertex t_v = target(t_e, g); // leaf vertex for demolition. - u32 minBound = g[t_e].minBound; - u32 maxBound = g[t_e].maxBound; - - const CharReach dot = CharReach::dot(); - - assert(!g[t_v].left); - - auto out = ue2::make_unique(NFA_SUFFIX); - - // Repeats wired to the start of the graph. - DEBUG_PRINTF("bounds [%u, %u]\n", minBound, maxBound); - u32 i = 1; - NFAVertex last = out->start; - for (; i <= minBound; i++) { - NFAVertex v = addHolderVertex(dot, *out); - add_edge(last, v, *out); - last = v; - } - NFAVertex last_mand = last; - if (maxBound != ROSE_BOUND_INF) { - for (; i <= maxBound; i++) { - NFAVertex v = addHolderVertex(dot, *out); - add_edge(last_mand, v, *out); - if (last != last_mand) { - add_edge(last, v, *out); - } - last = v; - } - } else { - if (minBound) { - add_edge(last_mand, last_mand, *out); - } else { - NFAVertex v = addHolderVertex(dot, *out); - add_edge(last_mand, v, *out); - add_edge(v, v, *out); - last = v; - } - } - - setTops(*out); - - // Literal vertices wired to accept. - NFAVertex litfirst, litlast; - tie(litfirst, litlast) = addLiteralVertices(g, literals, t_v, *out); - add_edge(last, litfirst, *out); - if (last != last_mand) { - add_edge(last_mand, litfirst, *out); - } - add_edge(litlast, out->accept, *out); - insert(&(*out)[litlast].reports, g[t_v].reports); - return out; -} - -static -bool areLiteralsConvertible(const RoseLiteralMap &literals, - const flat_set &ids) { - // Every literal in v must have the same length. - - // TODO: at the moment, we only handle two cases in construction: (a) one - // literal of arbitrary length, and (b) many literals, but all with length - // 1. - - if (ids.empty()) { - return false; - } - - auto it = ids.begin(), ite = ids.end(); - const size_t len = literals.right.at(*it).elength(); - - // Note: len may be 0 for cases with special literals, like EOD prefixes. - - if (len != 1 && ids.size() != 1) { - DEBUG_PRINTF("more than one literal of len > 1\n"); - return false; - } - - // Check the others all have the same length. - while (++it != ite) { - if (literals.right.at(*it).elength() != len) { - DEBUG_PRINTF("literals have different lengths\n"); - return false; - } - } - - return true; -} - -// Returns true if the given vertex doesn't qualify as a bad leaf to be eaten -// by an NFA. -static -bool isUnconvertibleLeaf(const RoseBuildImpl &tbi, const RoseVertex v) { - const RoseGraph &g = tbi.g; - - if (in_degree(v, g) != 1) { - DEBUG_PRINTF("more than one in-edge\n"); - return true; - } - - const RoseEdge &e = *(in_edges(v, g).first); - RoseVertex u = source(e, g); - - if (!g[u].reports.empty()) { - DEBUG_PRINTF("pred has accept\n"); - return true; - } - - if (g[u].suffix) { - // TODO: this could be handled by adding new vertices to the existing - // suffix. - DEBUG_PRINTF("pred already has suffix\n"); - return true; - } - - if (tbi.isAnyStart(u)) { - DEBUG_PRINTF("fail start\n"); - return true; - } - - if (tbi.isAnchored(u)) { - /* TODO need to check for possible anchored queue overflow? maybe? */ - DEBUG_PRINTF("fail anchored\n"); - return true; - } - - if (g[v].reports.empty() || g[v].eod_accept) { - DEBUG_PRINTF("bad accept\n"); - return true; - } - - if (g[v].suffix) { - DEBUG_PRINTF("suffix\n"); - return true; - } - - if (g[v].left) { - /* TODO: we really should handle this case as we would be checking - * an nfa each time. However it requires completely different graph - * fiddling logic */ - DEBUG_PRINTF("rose prefix action\n"); - return true; - } - - if (!areLiteralsConvertible(tbi.literals, g[v].literals)) { - DEBUG_PRINTF("fail length\n"); - return true; - } - - u32 max_lit_len = tbi.maxLiteralLen(v); - - u32 maxbound = max_lit_len == 1 ? 124 : 32; // arbitrary magic numbers - if (g[e].maxBound > maxbound && g[e].maxBound != ROSE_BOUND_INF) { - DEBUG_PRINTF("fail maxbound (%u)\n", maxbound); - return true; - } - - if (g[e].maxBound == ROSE_BOUND_INF) { - /* slightly risky as nfa won't die */ - DEBUG_PRINTF("fail: .*\n"); - return true; - } - - return false; -} - -// Find all of the leaves with literals whose length is <= len. -static -void findBadLeaves(RoseBuildImpl &tbi, set &bad) { - RoseGraph &g = tbi.g; - u32 len = tbi.cc.grey.roseMaxBadLeafLength; - - for (const auto &m : tbi.literals.right) { - if (m.second.s.length() > len) { - continue; - } - u32 lid = m.first; - DEBUG_PRINTF("%u is a short lit (length %zu)\n", lid, - m.second.s.length()); - - if (tbi.isDelayed(lid)) { - DEBUG_PRINTF("delayed, skipping!\n"); - continue; - } - - const rose_literal_info &info = tbi.literal_info[lid]; - - for (auto v : info.vertices) { - if (!isLeafNode(v, g)) { - continue; - } - if (isUnconvertibleLeaf(tbi, v)) { - continue; // we don't want to touch it - } - - // This leaf may have a predecessor with more than one successor, - // in which case we want to clone the pred just to support this - // leaf. - const RoseEdge &e = *in_edges(v, g).first; - RoseVertex u = source(e, g); - if (out_degree(u, g) != 1) { - DEBUG_PRINTF("re-homing %zu to cloned pred\n", g[v].index); - RoseVertex u2 = tbi.cloneVertex(u); - for (const auto &e_in : in_edges_range(u, g)) { - add_edge(source(e_in, g), u2, g[e_in], g); - } - add_edge(u2, v, g[e], g); - remove_edge(e, g); - } - - DEBUG_PRINTF("%zu is a bad leaf vertex\n", g[v].index); - bad.insert(v); - } - } -} - -void convertBadLeaves(RoseBuildImpl &tbi) { - RoseGraph &g = tbi.g; - set bad; - findBadLeaves(tbi, bad); - DEBUG_PRINTF("found %zu bad leaves\n", bad.size()); - - if (bad.empty()) { - return; - } - - vector dead; - for (auto v : bad) { - assert(in_degree(v, g)); - - const RoseEdge &e = *(in_edges(v, g).first); - - shared_ptr h = convertLeafToHolder(g, e, tbi.literals); - if (num_vertices(*h) >= NFA_MAX_STATES) { - assert(0); // too big! - continue; - } - - RoseVertex u = source(e, g); - assert(!g[u].suffix); - g[u].suffix.graph = h; - DEBUG_PRINTF("%zu's nfa holder %p\n", g[u].index, h.get()); - - dead.push_back(v); - } - - tbi.removeVertices(dead); -} - static size_t suffixFloodLen(const ue2_literal &s) { if (s.empty()) { diff --git a/src/rose/rose_build_convert.h b/src/rose/rose_build_convert.h index fd7c6d3e..7307c213 100644 --- a/src/rose/rose_build_convert.h +++ b/src/rose/rose_build_convert.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,7 +34,6 @@ namespace ue2 { class RoseBuildImpl; void convertFloodProneSuffixes(RoseBuildImpl &tbi); -void convertBadLeaves(RoseBuildImpl &tbi); void convertPrefixToBounds(RoseBuildImpl &tbi); void convertAnchPrefixToBounds(RoseBuildImpl &tbi); From 5061b76901a15ca3151580d819ed193fd955471e Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 29 Nov 2016 09:25:43 +1100 Subject: [PATCH 006/326] rose: mark RoseInstrCheckLongLit ctor explit --- src/rose/rose_build_program.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index 0c725b46..39e2e23c 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -1729,7 +1729,7 @@ class RoseInstrCheckLongLit public: std::string literal; - RoseInstrCheckLongLit(std::string literal_in) + explicit RoseInstrCheckLongLit(std::string literal_in) : literal(std::move(literal_in)) {} bool operator==(const RoseInstrCheckLongLit &ri) const { @@ -1756,7 +1756,7 @@ class RoseInstrCheckLongLitNocase public: std::string literal; - RoseInstrCheckLongLitNocase(std::string literal_in) + explicit RoseInstrCheckLongLitNocase(std::string literal_in) : literal(std::move(literal_in)) { upperString(literal); } From 5c9c54042442ae5df476643e562a98ab86303c0f Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 29 Nov 2016 09:27:59 +1100 Subject: [PATCH 007/326] rose: fix up comments referring to CHECK_LITERAL This instruction is now called CHECK_LONG_LIT. --- src/rose/rose_build_bytecode.cpp | 8 ++++---- src/rose/rose_build_compile.cpp | 4 ++-- src/rose/stream_long_lit.h | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index daf827e9..edf3e5e9 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -216,7 +216,7 @@ struct build_context : boost::noncopyable { * written to the engine_blob. */ vector litPrograms; - /** \brief List of long literals (ones with CHECK_LITERAL instructions) + /** \brief List of long literals (ones with CHECK_LONG_LIT instructions) * that need hash table support. */ vector longLiterals; @@ -2595,14 +2595,14 @@ void recordLongLiterals(build_context &bc, const RoseProgram &program) { for (const auto &ri : program) { if (const auto *ri_check = dynamic_cast(ri.get())) { - DEBUG_PRINTF("found CHECK_LITERAL for string '%s'\n", + DEBUG_PRINTF("found CHECK_LONG_LIT for string '%s'\n", escapeString(ri_check->literal).c_str()); bc.longLiterals.emplace_back(ri_check->literal, false); continue; } if (const auto *ri_check = dynamic_cast(ri.get())) { - DEBUG_PRINTF("found CHECK_LITERAL_NOCASE for string '%s'\n", + DEBUG_PRINTF("found CHECK_LONG_LIT_NOCASE for string '%s'\n", escapeString(ri_check->literal).c_str()); bc.longLiterals.emplace_back(ri_check->literal, true); } @@ -4974,7 +4974,7 @@ void allocateFinalIdToSet(RoseBuildImpl &build, const set &lits, goto assign_new_id; } - // Long literals (that require CHECK_LITERAL instructions) cannot be + // Long literals (that require CHECK_LONG_LIT instructions) cannot be // merged. if (lit.s.length() > longLitLengthThreshold) { DEBUG_PRINTF("id %u is a long literal\n", int_id); diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 1237a014..9b8ea7f7 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -120,9 +120,9 @@ void RoseBuildImpl::handleMixedSensitivity(void) { } // We don't want to explode long literals, as they require confirmation - // with a CHECK_LITERAL instruction and need unique final_ids. + // with a CHECK_LONG_LIT instruction and need unique final_ids. // TODO: we could allow explosion for literals where the prefixes - // covered by CHECK_LITERAL are identical. + // covered by CHECK_LONG_LIT are identical. if (lit.s.length() <= ROSE_LONG_LITERAL_THRESHOLD_MIN && limited_explosion(lit.s)) { DEBUG_PRINTF("need to explode existing string '%s'\n", diff --git a/src/rose/stream_long_lit.h b/src/rose/stream_long_lit.h index d78e2863..0736ec88 100644 --- a/src/rose/stream_long_lit.h +++ b/src/rose/stream_long_lit.h @@ -111,7 +111,7 @@ void loadLongLiteralState(const struct RoseEngine *t, char *state, } // If we don't have any long literals in play, these values must point to - // the real history buffer so that CHECK_LITERAL instructions examine the + // the real history buffer so that CHECK_LONG_LIT instructions examine the // history buffer. scratch->tctxt.ll_buf = scratch->core_info.hbuf; scratch->tctxt.ll_len = scratch->core_info.hlen; From 07a6b6510ca51255245d8198a44e36218f1c28a2 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 28 Nov 2016 16:46:03 +1100 Subject: [PATCH 008/326] rose/hwlm: limit literals to eight bytes Rework HWLM to work over literals of eight bytes ("medium length"), doing confirm in the Rose interpreter. --- src/fdr/fdr_compile.cpp | 48 +++-------- src/fdr/fdr_compile.h | 9 +- src/hwlm/hwlm_build.cpp | 48 +---------- src/hwlm/hwlm_build.h | 26 +----- src/hwlm/hwlm_literal.h | 4 +- src/rose/program_runtime.h | 100 ++++++++++++++++++++-- src/rose/rose_build_bytecode.cpp | 79 +++++++++++------ src/rose/rose_build_compile.cpp | 2 +- src/rose/rose_build_dump.cpp | 26 +++--- src/rose/rose_build_impl.h | 13 ++- src/rose/rose_build_matchers.cpp | 141 +++++++++++++++---------------- src/rose/rose_build_matchers.h | 16 ++-- src/rose/rose_build_misc.cpp | 4 +- src/rose/rose_build_program.cpp | 24 +++++- src/rose/rose_build_program.h | 98 ++++++++++++++++++--- src/rose/rose_dump.cpp | 24 +++++- src/rose/rose_program.h | 34 +++++++- unit/internal/fdr.cpp | 92 ++------------------ unit/internal/fdr_flood.cpp | 12 +-- 19 files changed, 452 insertions(+), 348 deletions(-) diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index f7451492..c9d6cbcb 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -545,35 +545,12 @@ FDRCompiler::build(pair, size_t> &link) { } // namespace static -size_t maxMaskLen(const vector &lits) { - size_t rv = 0; - for (const auto &lit : lits) { - rv = max(rv, lit.msk.size()); - } - return rv; -} - -static -void setHistoryRequired(hwlmStreamingControl &stream_ctl, - const vector &lits) { - size_t max_mask_len = maxMaskLen(lits); - - // we want enough history to manage the longest literal and the longest - // mask. - stream_ctl.literal_history_required = max(maxLen(lits), max_mask_len) - 1; -} - -static -aligned_unique_ptr -fdrBuildTableInternal(const vector &lits, bool make_small, - const target_t &target, const Grey &grey, u32 hint, - hwlmStreamingControl *stream_control) { +aligned_unique_ptr fdrBuildTableInternal(const vector &lits, + bool make_small, + const target_t &target, + const Grey &grey, u32 hint) { pair, size_t> link(nullptr, 0); - if (stream_control) { - setHistoryRequired(*stream_control, lits); - } - DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2"); if (grey.fdrAllowTeddy) { @@ -606,21 +583,18 @@ fdrBuildTableInternal(const vector &lits, bool make_small, aligned_unique_ptr fdrBuildTable(const vector &lits, bool make_small, const target_t &target, - const Grey &grey, - hwlmStreamingControl *stream_control) { - return fdrBuildTableInternal(lits, make_small, target, grey, HINT_INVALID, - stream_control); + const Grey &grey) { + return fdrBuildTableInternal(lits, make_small, target, grey, HINT_INVALID); } #if !defined(RELEASE_BUILD) -aligned_unique_ptr -fdrBuildTableHinted(const vector &lits, bool make_small, u32 hint, - const target_t &target, const Grey &grey, - hwlmStreamingControl *stream_control) { +aligned_unique_ptr fdrBuildTableHinted(const vector &lits, + bool make_small, u32 hint, + const target_t &target, + const Grey &grey) { pair link(nullptr, 0); - return fdrBuildTableInternal(lits, make_small, target, grey, hint, - stream_control); + return fdrBuildTableInternal(lits, make_small, target, grey, hint); } #endif diff --git a/src/fdr/fdr_compile.h b/src/fdr/fdr_compile.h index c12e0071..a135a6e1 100644 --- a/src/fdr/fdr_compile.h +++ b/src/fdr/fdr_compile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -43,21 +43,18 @@ struct FDR; namespace ue2 { struct hwlmLiteral; -struct hwlmStreamingControl; struct Grey; struct target_t; ue2::aligned_unique_ptr fdrBuildTable(const std::vector &lits, bool make_small, - const target_t &target, const Grey &grey, - hwlmStreamingControl *stream_control = nullptr); + const target_t &target, const Grey &grey); #if !defined(RELEASE_BUILD) ue2::aligned_unique_ptr fdrBuildTableHinted(const std::vector &lits, bool make_small, - u32 hint, const target_t &target, const Grey &grey, - hwlmStreamingControl *stream_control = nullptr); + u32 hint, const target_t &target, const Grey &grey); #endif diff --git a/src/hwlm/hwlm_build.cpp b/src/hwlm/hwlm_build.cpp index fa6335c9..29e71293 100644 --- a/src/hwlm/hwlm_build.cpp +++ b/src/hwlm/hwlm_build.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,6 +33,7 @@ #include "hwlm.h" #include "hwlm_build.h" #include "hwlm_internal.h" +#include "hwlm_literal.h" #include "noodle_engine.h" #include "noodle_build.h" #include "scratch.h" @@ -512,7 +513,6 @@ bool everyoneHasGroups(const vector &lits) { static bool isNoodleable(const vector &lits, - const hwlmStreamingControl *stream_control, const CompileContext &cc) { if (!cc.grey.allowNoodle) { return false; @@ -523,19 +523,6 @@ bool isNoodleable(const vector &lits, return false; } - if (stream_control) { // nullptr if in block mode - if (lits.front().s.length() > stream_control->history_max + 1) { - DEBUG_PRINTF("length of %zu too long for history max %zu\n", - lits.front().s.length(), - stream_control->history_max); - return false; - } - if (2 * lits.front().s.length() - 2 > FDR_TEMP_BUF_SIZE) { - assert(0); - return false; - } - } - if (!lits.front().msk.empty()) { DEBUG_PRINTF("noodle can't handle supplementary masks\n"); return false; @@ -545,22 +532,11 @@ bool isNoodleable(const vector &lits, } aligned_unique_ptr hwlmBuild(const vector &lits, - hwlmStreamingControl *stream_control, bool make_small, const CompileContext &cc, hwlm_group_t expected_groups) { assert(!lits.empty()); dumpLits(lits); - if (stream_control) { - assert(stream_control->history_min <= stream_control->history_max); - - // We should not have been passed any literals that are too long to - // match with a maximally-sized history buffer. - assert(all_of(begin(lits), end(lits), [&](const hwlmLiteral &lit) { - return lit.s.length() <= stream_control->history_max + 1; - })); - } - // Check that we haven't exceeded the maximum number of literals. if (lits.size() > cc.grey.limitLiteralCount) { throw ResourceLimitError(); @@ -595,7 +571,7 @@ aligned_unique_ptr hwlmBuild(const vector &lits, assert(everyoneHasGroups(lits)); - if (isNoodleable(lits, stream_control, cc)) { + if (isNoodleable(lits, cc)) { DEBUG_PRINTF("build noodle table\n"); engType = HWLM_ENGINE_NOOD; const hwlmLiteral &lit = lits.front(); @@ -603,19 +579,11 @@ aligned_unique_ptr hwlmBuild(const vector &lits, if (noodle) { engSize = noodSize(noodle.get()); } - if (stream_control) { - // For now, a single literal still goes to noodle and asks - // for a great big history - stream_control->literal_history_required = lit.s.length() - 1; - assert(stream_control->literal_history_required - <= stream_control->history_max); - } eng = move(noodle); } else { DEBUG_PRINTF("building a new deal\n"); engType = HWLM_ENGINE_FDR; - auto fdr = fdrBuildTable(lits, make_small, cc.target_info, cc.grey, - stream_control); + auto fdr = fdrBuildTable(lits, make_small, cc.target_info, cc.grey); if (fdr) { engSize = fdrSize(fdr.get()); } @@ -640,14 +608,6 @@ aligned_unique_ptr hwlmBuild(const vector &lits, buildForwardAccel(h.get(), lits, expected_groups); } - if (stream_control) { - DEBUG_PRINTF("requires %zu (of max %zu) bytes of history\n", - stream_control->literal_history_required, - stream_control->history_max); - assert(stream_control->literal_history_required - <= stream_control->history_max); - } - return h; } diff --git a/src/hwlm/hwlm_build.h b/src/hwlm/hwlm_build.h index fbf359e6..5dd7dbc9 100644 --- a/src/hwlm/hwlm_build.h +++ b/src/hwlm/hwlm_build.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,7 +34,6 @@ #define HWLM_BUILD_H #include "hwlm.h" -#include "hwlm_literal.h" #include "ue2common.h" #include "util/alloc.h" @@ -47,30 +46,12 @@ namespace ue2 { struct CompileContext; struct Grey; -struct target_t; - -/** \brief Structure gathering together the input/output parameters related to - * streaming mode operation. */ -struct hwlmStreamingControl { - /** \brief IN parameter: Upper limit on the amount of history that can be - * requested. */ - size_t history_max; - - /** \brief IN parameter: History already known to be used before literal - * analysis. */ - size_t history_min; - - /** \brief OUT parameter: History required by the literal matcher to - * correctly match all literals. */ - size_t literal_history_required; -}; +struct hwlmLiteral; /** \brief Build an \ref HWLM literal matcher runtime structure for a group of * literals. * * \param lits The group of literals. - * \param stream_control Streaming control parameters. If the matcher will - * operate in non-streaming (block) mode, this pointer should be NULL. * \param make_small Optimise matcher for small size. * \param cc Compile context. * \param expected_groups FIXME: document me! @@ -80,8 +61,7 @@ struct hwlmStreamingControl { * thrown. */ aligned_unique_ptr -hwlmBuild(const std::vector &lits, - hwlmStreamingControl *stream_control, bool make_small, +hwlmBuild(const std::vector &lits, bool make_small, const CompileContext &cc, hwlm_group_t expected_groups = HWLM_ALL_GROUPS); diff --git a/src/hwlm/hwlm_literal.h b/src/hwlm/hwlm_literal.h index b7af99d3..a08b2ff6 100644 --- a/src/hwlm/hwlm_literal.h +++ b/src/hwlm/hwlm_literal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,7 +42,7 @@ namespace ue2 { /** \brief Max length of the literal passed to HWLM. */ -#define HWLM_LITERAL_MAX_LEN 255 +#define HWLM_LITERAL_MAX_LEN 8 /** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */ #define HWLM_MASKLEN 8 diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index e883c239..1a5f25e9 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -1409,6 +1409,68 @@ int roseCheckLongLiteral(const struct RoseEngine *t, return 1; } +static rose_inline +int roseCheckMediumLiteral(const struct RoseEngine *t, + const struct hs_scratch *scratch, u64a end, + u32 lit_offset, u32 lit_length, char nocase) { + const struct core_info *ci = &scratch->core_info; + const u8 *lit = getByOffset(t, lit_offset); + + DEBUG_PRINTF("check lit at %llu, length %u\n", end, lit_length); + DEBUG_PRINTF("base buf_offset=%llu\n", ci->buf_offset); + + if (end < lit_length) { + DEBUG_PRINTF("too short!\n"); + return 0; + } + + // If any portion of the literal matched in the current buffer, check it. + if (end > ci->buf_offset) { + u32 scan_len = MIN(end - ci->buf_offset, lit_length); + u64a scan_start = end - ci->buf_offset - scan_len; + DEBUG_PRINTF("checking suffix (%u bytes) in buf[%llu:%llu]\n", scan_len, + scan_start, end); + if (cmpForward(ci->buf + scan_start, lit + lit_length - scan_len, + scan_len, nocase)) { + DEBUG_PRINTF("cmp of suffix failed\n"); + return 0; + } + } + + // If the entirety of the literal was in the current block, we are done. + if (end - lit_length >= ci->buf_offset) { + DEBUG_PRINTF("literal confirmed in current block\n"); + return 1; + } + + // We still have a prefix which we must test against the history buffer. + assert(t->mode != HS_MODE_BLOCK); + + u64a lit_start_offset = end - lit_length; + u32 prefix_len = MIN(lit_length, ci->buf_offset - lit_start_offset); + u32 hist_rewind = ci->buf_offset - lit_start_offset; + DEBUG_PRINTF("hlen=%zu, hist_rewind=%u\n", ci->hlen, hist_rewind); + + // History length check required for confirm in the EOD and delayed + // rebuild paths. + if (hist_rewind > ci->hlen) { + DEBUG_PRINTF("not enough history\n"); + return 0; + } + + DEBUG_PRINTF("check prefix len=%u from hist (len %zu, rewind %u)\n", + prefix_len, ci->hlen, hist_rewind); + assert(hist_rewind <= ci->hlen); + if (cmpForward(ci->hbuf + ci->hlen - hist_rewind, lit, prefix_len, + nocase)) { + DEBUG_PRINTF("cmp of prefix failed\n"); + return 0; + } + + DEBUG_PRINTF("cmp succeeded\n"); + return 1; +} + static void updateSeqPoint(struct RoseContext *tctxt, u64a offset, const char from_mpv) { @@ -2060,8 +2122,10 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, const char nocase = 0; if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset, ri->lit_length, nocase)) { - DEBUG_PRINTF("halt: failed long lit check\n"); - return HWLM_CONTINUE_MATCHING; + DEBUG_PRINTF("failed long lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; } } PROGRAM_NEXT_INSTRUCTION @@ -2070,8 +2134,34 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, const char nocase = 1; if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset, ri->lit_length, nocase)) { - DEBUG_PRINTF("halt: failed nocase long lit check\n"); - return HWLM_CONTINUE_MATCHING; + DEBUG_PRINTF("failed nocase long lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MED_LIT) { + const char nocase = 0; + if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("failed lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MED_LIT_NOCASE) { + const char nocase = 1; + if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset, + ri->lit_length, nocase)) { + DEBUG_PRINTF("failed long lit check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; } } PROGRAM_NEXT_INSTRUCTION diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index edf3e5e9..9f978134 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4353,13 +4353,18 @@ static void makeCheckLiteralInstruction(const RoseBuildImpl &build, const build_context &bc, u32 final_id, RoseProgram &program) { + assert(bc.longLitLengthThreshold > 0); + + DEBUG_PRINTF("final_id %u, long lit threshold %zu\n", final_id, + bc.longLitLengthThreshold); + const auto &lits = build.final_id_to_literal.at(final_id); if (lits.size() != 1) { - // Long literals should not share a final_id. + // final_id sharing is only allowed for literals that are short enough + // to not require any additional confirm work. assert(all_of(begin(lits), end(lits), [&](u32 lit_id) { const rose_literal_id &lit = build.literals.right.at(lit_id); - return lit.table != ROSE_FLOATING || - lit.s.length() <= bc.longLitLengthThreshold; + return lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX; })); return; } @@ -4370,11 +4375,9 @@ void makeCheckLiteralInstruction(const RoseBuildImpl &build, } const rose_literal_id &lit = build.literals.right.at(lit_id); - if (lit.table != ROSE_FLOATING) { - return; - } - assert(bc.longLitLengthThreshold > 0); - if (lit.s.length() <= bc.longLitLengthThreshold) { + + if (lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX) { + DEBUG_PRINTF("lit short enough to not need confirm\n"); return; } @@ -4383,11 +4386,34 @@ void makeCheckLiteralInstruction(const RoseBuildImpl &build, throw ResourceLimitError(); } + if (lit.s.length() <= bc.longLitLengthThreshold) { + DEBUG_PRINTF("is a medium-length literal\n"); + const auto *end_inst = program.end_instruction(); + unique_ptr ri; + if (lit.s.any_nocase()) { + ri = make_unique(lit.s.get_string(), + end_inst); + } else { + ri = make_unique(lit.s.get_string(), + end_inst); + } + program.add_before_end(move(ri)); + return; + } + + // Long literal support should only really be used for the floating table + // in streaming mode. + assert(lit.table == ROSE_FLOATING && build.cc.streaming); + + DEBUG_PRINTF("is a long literal\n"); + + const auto *end_inst = program.end_instruction(); unique_ptr ri; if (lit.s.any_nocase()) { - ri = make_unique(lit.s.get_string()); + ri = make_unique(lit.s.get_string(), + end_inst); } else { - ri = make_unique(lit.s.get_string()); + ri = make_unique(lit.s.get_string(), end_inst); } program.add_before_end(move(ri)); } @@ -4522,6 +4548,7 @@ u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc, } RoseProgram program; + makeCheckLiteralInstruction(build, bc, final_id, program); makeCheckLitMaskInstruction(build, bc, final_id, program); makePushDelayedInstructions(build, final_id, program); assert(!program.empty()); @@ -4951,7 +4978,7 @@ u32 buildEagerQueueIter(const set &eager, u32 leftfixBeginQueue, static void allocateFinalIdToSet(RoseBuildImpl &build, const set &lits, - size_t longLitLengthThreshold, u32 *next_final_id) { + u32 *next_final_id) { const auto &g = build.g; auto &literal_info = build.literal_info; auto &final_id_to_literal = build.final_id_to_literal; @@ -4961,8 +4988,6 @@ void allocateFinalIdToSet(RoseBuildImpl &build, const set &lits, * ids and squash the same roles and have the same group squashing * behaviour. Benefits literals cannot be merged. */ - assert(longLitLengthThreshold > 0); - for (u32 int_id : lits) { rose_literal_info &curr_info = literal_info[int_id]; const rose_literal_id &lit = build.literals.right.at(int_id); @@ -4974,10 +4999,10 @@ void allocateFinalIdToSet(RoseBuildImpl &build, const set &lits, goto assign_new_id; } - // Long literals (that require CHECK_LONG_LIT instructions) cannot be - // merged. - if (lit.s.length() > longLitLengthThreshold) { - DEBUG_PRINTF("id %u is a long literal\n", int_id); + // Literals that need confirmation with CHECK_LONG_LIT or CHECK_MED_LIT + // cannot be merged. + if (lit.s.length() > ROSE_SHORT_LITERAL_LEN_MAX) { + DEBUG_PRINTF("id %u needs lit confirm\n", int_id); goto assign_new_id; } @@ -5001,7 +5026,7 @@ void allocateFinalIdToSet(RoseBuildImpl &build, const set &lits, const auto &cand_info = literal_info[cand_id]; const auto &cand_lit = build.literals.right.at(cand_id); - if (cand_lit.s.length() > longLitLengthThreshold) { + if (cand_lit.s.length() > ROSE_SHORT_LITERAL_LEN_MAX) { continue; } @@ -5071,8 +5096,7 @@ bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) { /** \brief Allocate final literal IDs for all literals. */ static -void allocateFinalLiteralId(RoseBuildImpl &build, - size_t longLitLengthThreshold) { +void allocateFinalLiteralId(RoseBuildImpl &build) { set anch; set norm; set delay; @@ -5106,15 +5130,15 @@ void allocateFinalLiteralId(RoseBuildImpl &build, } /* normal lits */ - allocateFinalIdToSet(build, norm, longLitLengthThreshold, &next_final_id); + allocateFinalIdToSet(build, norm, &next_final_id); /* next anchored stuff */ build.anchored_base_id = next_final_id; - allocateFinalIdToSet(build, anch, longLitLengthThreshold, &next_final_id); + allocateFinalIdToSet(build, anch, &next_final_id); /* delayed ids come last */ build.delay_base_id = next_final_id; - allocateFinalIdToSet(build, delay, longLitLengthThreshold, &next_final_id); + allocateFinalIdToSet(build, delay, &next_final_id); } static @@ -5188,10 +5212,11 @@ size_t calcLongLitThreshold(const RoseBuildImpl &build, const size_t historyRequired) { const auto &cc = build.cc; - // In block mode, we should only use the long literal support for literals - // that cannot be handled by HWLM. + // In block mode, we don't have history, so we don't need long literal + // support and can just use "medium-length" literal confirm. TODO: we could + // specialize further and have a block mode literal confirm instruction. if (!cc.streaming) { - return HWLM_LITERAL_MAX_LEN; + return SIZE_MAX; } size_t longLitLengthThreshold = ROSE_LONG_LITERAL_THRESHOLD_MIN; @@ -5227,7 +5252,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { historyRequired); DEBUG_PRINTF("longLitLengthThreshold=%zu\n", longLitLengthThreshold); - allocateFinalLiteralId(*this, longLitLengthThreshold); + allocateFinalLiteralId(*this); auto anchored_dfas = buildAnchoredDfas(*this); diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 9b8ea7f7..0505a71e 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -123,7 +123,7 @@ void RoseBuildImpl::handleMixedSensitivity(void) { // with a CHECK_LONG_LIT instruction and need unique final_ids. // TODO: we could allow explosion for literals where the prefixes // covered by CHECK_LONG_LIT are identical. - if (lit.s.length() <= ROSE_LONG_LITERAL_THRESHOLD_MIN && + if (lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX && limited_explosion(lit.s)) { DEBUG_PRINTF("need to explode existing string '%s'\n", dumpString(lit.s).c_str()); diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 67740312..e7cef100 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -35,7 +35,7 @@ #include "rose/rose_dump.h" #include "rose_internal.h" #include "ue2common.h" -#include "hwlm/hwlm_build.h" +#include "hwlm/hwlm_literal.h" #include "nfa/castlecompile.h" #include "nfa/nfa_internal.h" #include "nfagraph/ng_dump.h" @@ -505,24 +505,22 @@ void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) { size_t longLitLengthThreshold = calcLongLitThreshold(build, historyRequired); - auto lits = fillHamsterLiteralList(build, ROSE_ANCHORED, - longLitLengthThreshold); - dumpTestLiterals(base + "rose_anchored_test_literals.txt", lits); + auto mp = makeMatcherProto(build, ROSE_ANCHORED, longLitLengthThreshold); + dumpTestLiterals(base + "rose_anchored_test_literals.txt", mp.lits); - lits = fillHamsterLiteralList(build, ROSE_FLOATING, longLitLengthThreshold); - dumpTestLiterals(base + "rose_float_test_literals.txt", lits); + mp = makeMatcherProto(build, ROSE_FLOATING, longLitLengthThreshold); + dumpTestLiterals(base + "rose_float_test_literals.txt", mp.lits); - lits = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED, - build.ematcher_region_size); - dumpTestLiterals(base + "rose_eod_test_literals.txt", lits); + mp = makeMatcherProto(build, ROSE_EOD_ANCHORED, build.ematcher_region_size); + dumpTestLiterals(base + "rose_eod_test_literals.txt", mp.lits); if (!build.cc.streaming) { - lits = fillHamsterLiteralList(build, ROSE_FLOATING, + mp = makeMatcherProto(build, ROSE_FLOATING, ROSE_SMALL_BLOCK_LEN, + ROSE_SMALL_BLOCK_LEN); + auto mp2 = makeMatcherProto(build, ROSE_ANCHORED_SMALL_BLOCK, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); - auto lits2 = fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK, - ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); - lits.insert(end(lits), begin(lits2), end(lits2)); - dumpTestLiterals(base + "rose_smallblock_test_literals.txt", lits); + mp.lits.insert(end(mp.lits), begin(mp2.lits), end(mp2.lits)); + dumpTestLiterals(base + "rose_smallblock_test_literals.txt", mp.lits); } } diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 6b326d34..02c5a389 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -58,6 +58,17 @@ namespace ue2 { #define ROSE_LONG_LITERAL_THRESHOLD_MIN 33 +/** + * \brief The largest allowable "short" literal fragment which can be given to + * a literal matcher directly. + * + * Literals longer than this will be truncated to their suffix and confirmed in + * the Rose interpreter, either as "medium length" literals which can be + * confirmed from history, or "long literals" which make use of the streaming + * table support. + */ +#define ROSE_SHORT_LITERAL_LEN_MAX 8 + struct BoundaryReports; struct CastleProto; struct CompileContext; diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index 01633c06..f7c237a7 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -491,8 +491,14 @@ bool isNoRunsLiteral(const RoseBuildImpl &build, const u32 id, return false; } - if (build.literals.right.at(id).s.length() > max_len) { - DEBUG_PRINTF("requires literal check\n"); + size_t len = build.literals.right.at(id).s.length(); + if (len > max_len) { + DEBUG_PRINTF("long literal, requires confirm\n"); + return false; + } + + if (len > ROSE_SHORT_LITERAL_LEN_MAX) { + DEBUG_PRINTF("medium-length literal, requires confirm\n"); return false; } @@ -626,10 +632,10 @@ u64a literalMinReportOffset(const RoseBuildImpl &build, return lit_min_offset; } -vector fillHamsterLiteralList(const RoseBuildImpl &build, - rose_literal_table table, - size_t max_len, u32 max_offset) { - vector lits; +MatcherProto makeMatcherProto(const RoseBuildImpl &build, + rose_literal_table table, size_t max_len, + u32 max_offset) { + MatcherProto mp; for (const auto &e : build.literals.right) { const u32 id = e.first; @@ -652,7 +658,8 @@ vector fillHamsterLiteralList(const RoseBuildImpl &build, /* Note: requires_benefits are handled in the literal entries */ const ue2_literal &lit = e.second.s; - DEBUG_PRINTF("lit='%s'\n", escapeString(lit).c_str()); + DEBUG_PRINTF("lit='%s' (len %zu)\n", escapeString(lit).c_str(), + lit.length()); if (max_offset != ROSE_BOUND_INF) { u64a min_report = literalMinReportOffset(build, e.second, info); @@ -665,14 +672,22 @@ vector fillHamsterLiteralList(const RoseBuildImpl &build, const vector &msk = e.second.msk; const vector &cmp = e.second.cmp; - bool noruns = isNoRunsLiteral(build, id, info, max_len); + size_t lit_hist_len = 0; + if (build.cc.streaming) { + lit_hist_len = max(msk.size(), min(lit.length(), max_len)); + lit_hist_len = lit_hist_len ? lit_hist_len - 1 : 0; + } + DEBUG_PRINTF("lit requires %zu bytes of history\n", lit_hist_len); + assert(lit_hist_len <= build.cc.grey.maxHistoryAvailable); + if (info.requires_explode) { DEBUG_PRINTF("exploding lit\n"); - // We do not require_explode for long literals. - assert(lit.length() <= max_len); + // We do not require_explode for literals that need confirm + // (long/medium length literals). + assert(lit.length() <= ROSE_SHORT_LITERAL_LEN_MAX); case_iter cit = caseIterateBegin(lit); case_iter cite = caseIterateEnd(); @@ -690,8 +705,9 @@ vector fillHamsterLiteralList(const RoseBuildImpl &build, continue; } - lits.emplace_back(move(s), nocase, noruns, final_id, groups, - msk, cmp); + mp.history_required = max(mp.history_required, lit_hist_len); + mp.lits.emplace_back(move(s), nocase, noruns, final_id, groups, + msk, cmp); } } else { string s = lit.get_string(); @@ -702,11 +718,13 @@ vector fillHamsterLiteralList(const RoseBuildImpl &build, final_id, escapeString(s).c_str(), (int)nocase, noruns, dumpMask(msk).c_str(), dumpMask(cmp).c_str()); - if (s.length() > max_len) { - DEBUG_PRINTF("truncating to tail of length %zu\n", max_len); - s.erase(0, s.length() - max_len); + if (s.length() > ROSE_SHORT_LITERAL_LEN_MAX) { + DEBUG_PRINTF("truncating to tail of length %zu\n", + size_t{ROSE_SHORT_LITERAL_LEN_MAX}); + s.erase(0, s.length() - ROSE_SHORT_LITERAL_LEN_MAX); // We shouldn't have set a threshold below 8 chars. - assert(msk.size() <= max_len); + assert(msk.size() <= ROSE_SHORT_LITERAL_LEN_MAX); + assert(!noruns); } if (!maskIsConsistent(s, nocase, msk, cmp)) { @@ -714,12 +732,13 @@ vector fillHamsterLiteralList(const RoseBuildImpl &build, continue; } - lits.emplace_back(move(s), nocase, noruns, final_id, groups, msk, - cmp); + mp.history_required = max(mp.history_required, lit_hist_len); + mp.lits.emplace_back(move(s), nocase, noruns, final_id, groups, msk, + cmp); } } - return lits; + return mp; } aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, @@ -730,49 +749,31 @@ aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, *fsize = 0; *fgroups = 0; - auto fl = fillHamsterLiteralList(build, ROSE_FLOATING, - longLitLengthThreshold); - if (fl.empty()) { + auto mp = makeMatcherProto(build, ROSE_FLOATING, longLitLengthThreshold); + if (mp.lits.empty()) { DEBUG_PRINTF("empty floating matcher\n"); return nullptr; } - for (const hwlmLiteral &hlit : fl) { - *fgroups |= hlit.groups; + for (const hwlmLiteral &lit : mp.lits) { + *fgroups |= lit.groups; } - hwlmStreamingControl ctl; - hwlmStreamingControl *ctlp; - if (build.cc.streaming) { - ctl.history_max = build.cc.grey.maxHistoryAvailable; - ctl.history_min = MAX(*historyRequired, - build.cc.grey.minHistoryAvailable); - DEBUG_PRINTF("streaming control, history max=%zu, min=%zu\n", - ctl.history_max, ctl.history_min); - ctlp = &ctl; - } else { - ctlp = nullptr; // Null for non-streaming. - } - - aligned_unique_ptr ftable = - hwlmBuild(fl, ctlp, false, build.cc, build.getInitialGroups()); - if (!ftable) { + auto hwlm = hwlmBuild(mp.lits, false, build.cc, build.getInitialGroups()); + if (!hwlm) { throw CompileError("Unable to generate bytecode."); } if (build.cc.streaming) { - DEBUG_PRINTF("literal_history_required=%zu\n", - ctl.literal_history_required); - assert(ctl.literal_history_required <= - build.cc.grey.maxHistoryAvailable); - *historyRequired = max(*historyRequired, - ctl.literal_history_required); + DEBUG_PRINTF("history_required=%zu\n", mp.history_required); + assert(mp.history_required <= build.cc.grey.maxHistoryAvailable); + *historyRequired = max(*historyRequired, mp.history_required); } - *fsize = hwlmSize(ftable.get()); + *fsize = hwlmSize(hwlm.get()); assert(*fsize); DEBUG_PRINTF("built floating literal table size %zu bytes\n", *fsize); - return ftable; + return hwlm; } aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, @@ -791,38 +792,38 @@ aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, return nullptr; } - auto lits = fillHamsterLiteralList( - build, ROSE_FLOATING, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); - if (lits.empty()) { + auto mp = makeMatcherProto(build, ROSE_FLOATING, ROSE_SMALL_BLOCK_LEN, + ROSE_SMALL_BLOCK_LEN); + if (mp.lits.empty()) { DEBUG_PRINTF("no floating table\n"); return nullptr; - } else if (lits.size() == 1) { + } else if (mp.lits.size() == 1) { DEBUG_PRINTF("single floating literal, noodle will be fast enough\n"); return nullptr; } - auto anchored_lits = - fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK, - ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); - if (anchored_lits.empty()) { + auto mp_anchored = + makeMatcherProto(build, ROSE_ANCHORED_SMALL_BLOCK, ROSE_SMALL_BLOCK_LEN, + ROSE_SMALL_BLOCK_LEN); + if (mp_anchored.lits.empty()) { DEBUG_PRINTF("no small-block anchored literals\n"); return nullptr; } - lits.insert(lits.end(), anchored_lits.begin(), anchored_lits.end()); + mp.lits.insert(mp.lits.end(), mp_anchored.lits.begin(), + mp_anchored.lits.end()); // None of our literals should be longer than the small block limit. - assert(all_of(begin(lits), end(lits), [](const hwlmLiteral &lit) { + assert(all_of(begin(mp.lits), end(mp.lits), [](const hwlmLiteral &lit) { return lit.s.length() <= ROSE_SMALL_BLOCK_LEN; })); - if (lits.empty()) { + if (mp.lits.empty()) { DEBUG_PRINTF("no literals shorter than small block len\n"); return nullptr; } - aligned_unique_ptr hwlm = - hwlmBuild(lits, nullptr, true, build.cc, build.getInitialGroups()); + auto hwlm = hwlmBuild(mp.lits, true, build.cc, build.getInitialGroups()); if (!hwlm) { throw CompileError("Unable to generate bytecode."); } @@ -837,10 +838,10 @@ aligned_unique_ptr buildEodAnchoredMatcher(const RoseBuildImpl &build, size_t *esize) { *esize = 0; - auto el = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED, - build.ematcher_region_size); + auto mp = + makeMatcherProto(build, ROSE_EOD_ANCHORED, build.ematcher_region_size); - if (el.empty()) { + if (mp.lits.empty()) { DEBUG_PRINTF("no eod anchored literals\n"); assert(!build.ematcher_region_size); return nullptr; @@ -848,17 +849,15 @@ aligned_unique_ptr buildEodAnchoredMatcher(const RoseBuildImpl &build, assert(build.ematcher_region_size); - hwlmStreamingControl *ctlp = nullptr; // not a streaming case - aligned_unique_ptr etable = - hwlmBuild(el, ctlp, true, build.cc, build.getInitialGroups()); - if (!etable) { + auto hwlm = hwlmBuild(mp.lits, true, build.cc, build.getInitialGroups()); + if (!hwlm) { throw CompileError("Unable to generate bytecode."); } - *esize = hwlmSize(etable.get()); + *esize = hwlmSize(hwlm.get()); assert(*esize); DEBUG_PRINTF("built eod-anchored literal table size %zu bytes\n", *esize); - return etable; + return hwlm; } } // namespace ue2 diff --git a/src/rose/rose_build_matchers.h b/src/rose/rose_build_matchers.h index a25dbca3..15ccf278 100644 --- a/src/rose/rose_build_matchers.h +++ b/src/rose/rose_build_matchers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -44,15 +44,21 @@ namespace ue2 { struct hwlmLiteral; +struct MatcherProto { + std::vector lits; + size_t history_required = 0; +}; + /** - * \brief Build up a vector of literals for the given table. + * \brief Build up a vector of literals (and associated other data) for the + * given table. * * If max_offset is specified (and not ROSE_BOUND_INF), then literals that can * only lead to a pattern match after max_offset may be excluded. */ -std::vector fillHamsterLiteralList(const RoseBuildImpl &build, - rose_literal_table table, size_t max_len, - u32 max_offset = ROSE_BOUND_INF); +MatcherProto makeMatcherProto(const RoseBuildImpl &build, + rose_literal_table table, size_t max_len, + u32 max_offset = ROSE_BOUND_INF); aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 28b885bd..97579111 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -28,7 +28,7 @@ #include "rose_build_impl.h" -#include "hwlm/hwlm_build.h" +#include "hwlm/hwlm_literal.h" #include "nfa/castlecompile.h" #include "nfa/goughcompile.h" #include "nfa/mcclellancompile_util.h" diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index ee237639..5f7ab0bf 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -502,6 +502,7 @@ void RoseInstrCheckLongLit::write(void *dest, RoseEngineBlob &blob, assert(!literal.empty()); inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); inst->lit_length = verify_u32(literal.size()); + inst->fail_jump = calc_jump(offset_map, this, target); } void RoseInstrCheckLongLitNocase::write(void *dest, RoseEngineBlob &blob, @@ -511,6 +512,27 @@ void RoseInstrCheckLongLitNocase::write(void *dest, RoseEngineBlob &blob, assert(!literal.empty()); inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); inst->lit_length = verify_u32(literal.size()); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMedLit::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + assert(!literal.empty()); + inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); + inst->lit_length = verify_u32(literal.size()); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMedLitNocase::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + assert(!literal.empty()); + inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); + inst->lit_length = verify_u32(literal.size()); + inst->fail_jump = calc_jump(offset_map, this, target); } static diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index 39e2e23c..440bf4e1 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -1723,17 +1723,19 @@ public: }; class RoseInstrCheckLongLit - : public RoseInstrBaseNoTargets { public: std::string literal; + const RoseInstruction *target; - explicit RoseInstrCheckLongLit(std::string literal_in) - : literal(std::move(literal_in)) {} + RoseInstrCheckLongLit(std::string literal_in, + const RoseInstruction *target_in) + : literal(std::move(literal_in)), target(target_in) {} bool operator==(const RoseInstrCheckLongLit &ri) const { - return literal == ri.literal; + return literal == ri.literal && target == ri.target; } size_t hash() const override { @@ -1743,26 +1745,29 @@ public: void write(void *dest, RoseEngineBlob &blob, const OffsetMap &offset_map) const override; - bool equiv_to(const RoseInstrCheckLongLit &ri, const OffsetMap &, - const OffsetMap &) const { - return literal == ri.literal; + bool equiv_to(const RoseInstrCheckLongLit &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return literal == ri.literal && + offsets.at(target) == other_offsets.at(ri.target); } }; class RoseInstrCheckLongLitNocase - : public RoseInstrBaseNoTargets { public: std::string literal; + const RoseInstruction *target; - explicit RoseInstrCheckLongLitNocase(std::string literal_in) - : literal(std::move(literal_in)) { + RoseInstrCheckLongLitNocase(std::string literal_in, + const RoseInstruction *target_in) + : literal(std::move(literal_in)), target(target_in) { upperString(literal); } bool operator==(const RoseInstrCheckLongLitNocase &ri) const { - return literal == ri.literal; + return literal == ri.literal && target == ri.target; } size_t hash() const override { @@ -1772,9 +1777,74 @@ public: void write(void *dest, RoseEngineBlob &blob, const OffsetMap &offset_map) const override; - bool equiv_to(const RoseInstrCheckLongLitNocase &ri, const OffsetMap &, - const OffsetMap &) const { - return literal == ri.literal; + bool equiv_to(const RoseInstrCheckLongLitNocase &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return literal == ri.literal && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMedLit + : public RoseInstrBaseNoTargets { +public: + std::string literal; + const RoseInstruction *target; + + explicit RoseInstrCheckMedLit(std::string literal_in, + const RoseInstruction *target_in) + : literal(std::move(literal_in)), target(target_in) {} + + bool operator==(const RoseInstrCheckMedLit &ri) const { + return literal == ri.literal && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), literal); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMedLit &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return literal == ri.literal && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMedLitNocase + : public RoseInstrBaseNoTargets { +public: + std::string literal; + const RoseInstruction *target; + + explicit RoseInstrCheckMedLitNocase(std::string literal_in, + const RoseInstruction *target_in) + : literal(std::move(literal_in)), target(target_in) { + upperString(literal); + } + + bool operator==(const RoseInstrCheckMedLitNocase &ri) const { + return literal == ri.literal && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), literal); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMedLitNocase &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return literal == ri.literal && + offsets.at(target) == other_offsets.at(ri.target); } }; diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 1867be50..5d79da2e 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -617,6 +617,7 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { const char *lit = (const char *)t + ri->lit_offset; os << " literal: \"" << escapeString(string(lit, ri->lit_length)) << "\"" << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION @@ -626,6 +627,27 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { const char *lit = (const char *)t + ri->lit_offset; os << " literal: \"" << escapeString(string(lit, ri->lit_length)) << "\"" << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MED_LIT) { + os << " lit_offset " << ri->lit_offset << endl; + os << " lit_length " << ri->lit_length << endl; + const char *lit = (const char *)t + ri->lit_offset; + os << " literal: \"" + << escapeString(string(lit, ri->lit_length)) << "\"" << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MED_LIT_NOCASE) { + os << " lit_offset " << ri->lit_offset << endl; + os << " lit_length " << ri->lit_length << endl; + const char *lit = (const char *)t + ri->lit_offset; + os << " literal: \"" + << escapeString(string(lit, ri->lit_length)) << "\"" << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index ed913316..c5ddc942 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -129,7 +129,19 @@ enum RoseInstructionCode { */ ROSE_INSTR_CHECK_LONG_LIT_NOCASE, - LAST_ROSE_INSTRUCTION = ROSE_INSTR_CHECK_LONG_LIT_NOCASE //!< Sentinel. + /** + * \brief Confirm a case-sensitive "medium length" literal at the current + * offset. In streaming mode, this will check history if needed. + */ + ROSE_INSTR_CHECK_MED_LIT, + + /** + * \brief Confirm a case-insensitive "medium length" literal at the current + * offset. In streaming mode, this will check history if needed. + */ + ROSE_INSTR_CHECK_MED_LIT_NOCASE, + + LAST_ROSE_INSTRUCTION = ROSE_INSTR_CHECK_MED_LIT_NOCASE //!< Sentinel. }; struct ROSE_STRUCT_END { @@ -477,18 +489,32 @@ struct ROSE_STRUCT_MATCHER_EOD { u8 code; //!< From enum RoseInstructionCode. }; -/** Note: check failure will halt program. */ struct ROSE_STRUCT_CHECK_LONG_LIT { u8 code; //!< From enum RoseInstructionCode. u32 lit_offset; //!< Offset of literal string. u32 lit_length; //!< Length of literal string. + u32 fail_jump; //!< Jump forward this many bytes on failure. }; -/** Note: check failure will halt program. */ struct ROSE_STRUCT_CHECK_LONG_LIT_NOCASE { u8 code; //!< From enum RoseInstructionCode. u32 lit_offset; //!< Offset of literal string. u32 lit_length; //!< Length of literal string. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MED_LIT { + u8 code; //!< From enum RoseInstructionCode. + u32 lit_offset; //!< Offset of literal string. + u32 lit_length; //!< Length of literal string. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MED_LIT_NOCASE { + u8 code; //!< From enum RoseInstructionCode. + u32 lit_offset; //!< Offset of literal string. + u32 lit_length; //!< Length of literal string. + u32 fail_jump; //!< Jump forward this many bytes on failure. }; #endif // ROSE_ROSE_PROGRAM_H diff --git a/unit/internal/fdr.cpp b/unit/internal/fdr.cpp index 6116bfdb..8ec72598 100644 --- a/unit/internal/fdr.cpp +++ b/unit/internal/fdr.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -96,15 +96,6 @@ struct match { }; extern "C" { -static -hwlmcb_rv_t countCallback(UNUSED size_t start, UNUSED size_t end, u32, - void *ctxt) { - if (ctxt) { - ++*(u32 *)ctxt; - } - - return HWLM_CONTINUE_MATCHING; -} static hwlmcb_rv_t decentCallback(size_t start, size_t end, u32 id, void *ctxt) { @@ -231,42 +222,6 @@ TEST_P(FDRp, MultiLocation) { } } -TEST_P(FDRp, Flood) { - const u32 hint = GetParam(); - SCOPED_TRACE(hint); - - vector lits; - lits.push_back(hwlmLiteral("aaaa", 0, 1)); - lits.push_back(hwlmLiteral("aaaaaaaa", 0, 2)); - lits.push_back(hwlmLiteral("baaaaaaaa", 0, 3)); - lits.push_back(hwlmLiteral("aaaaaaaab", 0, 4)); - - auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); - CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); - - const u32 testSize = 1024; - vector data(testSize, 'a'); - - vector matches; - fdrExec(fdr.get(), data.data(), testSize, 0, decentCallback, &matches, - HWLM_ALL_GROUPS); - ASSERT_EQ(testSize - 3 + testSize - 7, matches.size()); - EXPECT_EQ(match(0, 3, 1), matches[0]); - EXPECT_EQ(match(1, 4, 1), matches[1]); - EXPECT_EQ(match(2, 5, 1), matches[2]); - EXPECT_EQ(match(3, 6, 1), matches[3]); - - u32 currentMatch = 4; - for (u32 i = 7; i < testSize; i++, currentMatch += 2) { - EXPECT_TRUE( - (match(i - 3, i, 1) == matches[currentMatch] && - match(i - 7, i, 2) == matches[currentMatch+1]) || - (match(i - 7, i, 2) == matches[currentMatch+1] && - match(i - 3, i, 1) == matches[currentMatch]) - ); - } -} - TEST_P(FDRp, NoRepeat1) { const u32 hint = GetParam(); SCOPED_TRACE(hint); @@ -414,36 +369,6 @@ TEST_P(FDRp, SmallStreaming2) { ASSERT_EQ(expected.size(), matches.size()); } -TEST_P(FDRp, LongLiteral) { - const u32 hint = GetParam(); - SCOPED_TRACE(hint); - size_t sz; - const u8 *data; - vector lits; - - string alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; - string alpha4 = alpha+alpha+alpha+alpha; - lits.push_back(hwlmLiteral(alpha4.c_str(), 0,10)); - - auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); - CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); - - u32 count = 0; - - data = (const u8 *)alpha4.c_str(); - sz = alpha4.size(); - - fdrExec(fdr.get(), data, sz, 0, countCallback, &count, HWLM_ALL_GROUPS); - EXPECT_EQ(1U, count); - count = 0; - fdrExec(fdr.get(), data, sz - 1, 0, countCallback, &count, HWLM_ALL_GROUPS); - EXPECT_EQ(0U, count); - count = 0; - fdrExec(fdr.get(), data + 1, sz - 1, 0, countCallback, &count, - HWLM_ALL_GROUPS); - EXPECT_EQ(0U, count); -} - TEST_P(FDRp, moveByteStream) { const u32 hint = GetParam(); SCOPED_TRACE(hint); @@ -491,7 +416,7 @@ TEST_P(FDRp, Stream1) { vector lits; lits.push_back(hwlmLiteral("f", 0, 0)); - lits.push_back(hwlmLiteral("longsigislong", 0, 1)); + lits.push_back(hwlmLiteral("literal", 0, 1)); auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); @@ -514,7 +439,7 @@ INSTANTIATE_TEST_CASE_P(FDR, FDRp, ValuesIn(getValidFdrEngines())); typedef struct { string pattern; - unsigned char alien; + unsigned char alien; // character not present in pattern } pattern_alien_t; // gtest helper @@ -529,7 +454,6 @@ class FDRpp : public TestWithParam> {}; // not happen if literal is partially (from 1 character up to full literal // length) is out of searched buffer - "too early" and "too late" conditions TEST_P(FDRpp, AlignAndTooEarly) { - const size_t buf_alignment = 32; // Buffer should be big enough to hold two instances of matching literals // (up to 64 bytes each) and room for offset (up to 32 bytes) @@ -538,7 +462,7 @@ TEST_P(FDRpp, AlignAndTooEarly) { const u32 hint = get<0>(GetParam()); SCOPED_TRACE(hint); - // pattern which is used to generate literals of variable size - from 1 to 64 + // pattern which is used to generate literals of variable size - from 1 to 8 const string &pattern = get<1>(GetParam()).pattern; const size_t patLen = pattern.size(); const unsigned char alien = get<1>(GetParam()).alien; @@ -551,7 +475,7 @@ TEST_P(FDRpp, AlignAndTooEarly) { vector lits; for (size_t litLen = 1; litLen <= patLen; litLen++) { - // building literal from pattern substring of variable length 1-64 + // building literal from pattern substring of variable length 1-patLen lits.push_back(hwlmLiteral(string(pattern, 0, litLen), 0, 0)); auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey()); @@ -596,9 +520,9 @@ TEST_P(FDRpp, AlignAndTooEarly) { } static const pattern_alien_t test_pattern[] = { - {"abaabaaabaaabbaaaaabaaaaabbaaaaaaabaabbaaaabaaaaaaaabbbbaaaaaaab", 'x'}, - {"zzzyyzyzyyyyzyyyyyzzzzyyyyyyyyzyyyyyyyzzzzzyzzzzzzzzzyzzyzzzzzzz", (unsigned char)'\x99'}, - {"abcdef lafjk askldfjklf alfqwei9rui 'gldgkjnooiuswfs138746453583", '\0'} + {"abaabaaa", 'x'}, + {"zzzyyzyz", (unsigned char)'\x99'}, + {"abcdef l", '\0'} }; INSTANTIATE_TEST_CASE_P(FDR, FDRpp, Combine(ValuesIn(getValidFdrEngines()), diff --git a/unit/internal/fdr_flood.cpp b/unit/internal/fdr_flood.cpp index 7b00ac4c..952fffc1 100644 --- a/unit/internal/fdr_flood.cpp +++ b/unit/internal/fdr_flood.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -161,8 +161,8 @@ TEST_P(FDRFloodp, NoMask) { vector lits; // build literals of type "aaaa", "aaab", "baaa" - // of lengths 1, 2, 4, 8, 16, 32, both case-less and case-sensitive - for (int i = 0; i < 6 ; i++) { + // of lengths 1, 2, 4, 8, both case-less and case-sensitive + for (int i = 0; i < 4; i++) { string s(1 << i, c); lits.push_back(hwlmLiteral(s, false, i * 8 + 0)); s[0] = cAlt; @@ -183,13 +183,13 @@ TEST_P(FDRFloodp, NoMask) { Grey()); CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint); - map matchesCounts; + map matchesCounts; hwlm_error_t fdrStatus = fdrExec(fdr.get(), &data[0], dataSize, 0, countCallback, (void *)&matchesCounts, HWLM_ALL_GROUPS); ASSERT_EQ(0, fdrStatus); - for (u8 i = 0; i < 6 ; i++) { + for (u8 i = 0; i < 4; i++) { u32 cnt = dataSize - (1 << i) + 1; ASSERT_EQ(cnt, matchesCounts[i * 8 + 0]); ASSERT_EQ(0, matchesCounts[i * 8 + 1]); @@ -214,7 +214,7 @@ TEST_P(FDRFloodp, NoMask) { 0, countCallback, (void *)&matchesCounts, HWLM_ALL_GROUPS); ASSERT_EQ(0, fdrStatus); - for (u8 i = 0; i < 6 ; i++) { + for (u8 i = 0; i < 4; i++) { u32 cnt = dataSize - (1 << i) + 1; ASSERT_EQ(0, matchesCounts[i * 8 + 0]); ASSERT_EQ(i == 0 ? cnt : 0, matchesCounts[i * 8 + 1]); From eb14792a63805ffc3aaff2a574f95dd71ca89844 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 12 Dec 2016 17:08:06 +1100 Subject: [PATCH 009/326] rose: group final ids by fragment --- src/hwlm/hwlm_literal.h | 14 ++ src/rose/match.c | 42 +++--- src/rose/program_runtime.h | 4 +- src/rose/rose_build_anchored.cpp | 18 ++- src/rose/rose_build_anchored.h | 6 +- src/rose/rose_build_bytecode.cpp | 220 ++++++++++++++++++++++++++----- src/rose/rose_build_dump.cpp | 18 ++- src/rose/rose_build_impl.h | 2 + src/rose/rose_build_matchers.cpp | 93 +++++++++---- src/rose/rose_build_matchers.h | 23 ++-- src/rose/rose_build_program.cpp | 1 + src/rose/rose_build_program.h | 19 +-- src/rose/rose_dump.cpp | 1 + src/rose/rose_internal.h | 13 +- src/rose/rose_program.h | 2 +- 15 files changed, 354 insertions(+), 122 deletions(-) diff --git a/src/hwlm/hwlm_literal.h b/src/hwlm/hwlm_literal.h index a08b2ff6..0e2a1ea5 100644 --- a/src/hwlm/hwlm_literal.h +++ b/src/hwlm/hwlm_literal.h @@ -37,6 +37,7 @@ #include "ue2common.h" #include +#include #include namespace ue2 { @@ -111,6 +112,19 @@ struct hwlmLiteral { : hwlmLiteral(s_in, nocase_in, false, id_in, HWLM_ALL_GROUPS, {}, {}) {} }; +inline +bool operator<(const hwlmLiteral &a, const hwlmLiteral &b) { + return std::tie(a.id, a.s, a.nocase, a.noruns, a.groups, a.msk, a.cmp) < + std::tie(b.id, b.s, b.nocase, b.noruns, b.groups, b.msk, b.cmp); +} + +inline +bool operator==(const hwlmLiteral &a, const hwlmLiteral &b) { + return a.id == b.id && a.s == b.s && a.nocase == b.nocase && + a.noruns == b.noruns && a.groups == b.groups && a.msk == b.msk && + a.cmp == b.cmp; +} + /** * Consistency test; returns false if the given msk/cmp test can never match * the literal string s. diff --git a/src/rose/match.c b/src/rose/match.c index b641e39d..9a702804 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -253,24 +253,6 @@ hwlmcb_rv_t roseProcessMatchInline(const struct RoseEngine *t, flags); } -/** - * \brief Run the program for the given literal ID, with the interpreter - * out of line. - * - * Assumes not in_anchored. - */ -static really_inline -hwlmcb_rv_t roseProcessMatch(const struct RoseEngine *t, - struct hs_scratch *scratch, u64a end, - size_t match_len, u32 id) { - DEBUG_PRINTF("id=%u\n", id); - const u32 *programs = getByOffset(t, t->litProgramOffset); - assert(id < t->literalCount); - const u64a som = 0; - const u8 flags = 0; - return roseRunProgram(t, scratch, programs[id], som, end, match_len, flags); -} - static rose_inline hwlmcb_rv_t playDelaySlot(const struct RoseEngine *t, struct hs_scratch *scratch, @@ -290,14 +272,17 @@ hwlmcb_rv_t playDelaySlot(const struct RoseEngine *t, roseFlushLastByteHistory(t, scratch, offset); tctxt->lastEndOffset = offset; + const u32 *programs = getByOffset(t, t->delayProgramOffset); + for (u32 it = fatbit_iterate(vicSlot, delay_count, MMB_INVALID); it != MMB_INVALID; it = fatbit_iterate(vicSlot, delay_count, it)) { - u32 literal_id = t->delay_base_id + it; - UNUSED rose_group old_groups = tctxt->groups; - DEBUG_PRINTF("DELAYED MATCH id=%u offset=%llu\n", literal_id, offset); - hwlmcb_rv_t rv = roseProcessMatch(t, scratch, offset, 0, literal_id); + DEBUG_PRINTF("DELAYED MATCH id=%u offset=%llu\n", it, offset); + const u64a som = 0; + const u8 flags = 0; + hwlmcb_rv_t rv = roseRunProgram(t, scratch, programs[it], som, offset, + 0, flags); DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups); /* delayed literals can't safely set groups. @@ -322,16 +307,19 @@ hwlmcb_rv_t flushAnchoredLiteralAtLoc(const struct RoseEngine *t, struct fatbit *curr_row = getAnchoredLiteralLog(scratch)[curr_loc - 1]; u32 region_width = t->anchored_count; + const u32 *programs = getByOffset(t, t->anchoredProgramOffset); + DEBUG_PRINTF("report matches at curr loc\n"); for (u32 it = fatbit_iterate(curr_row, region_width, MMB_INVALID); it != MMB_INVALID; it = fatbit_iterate(curr_row, region_width, it)) { DEBUG_PRINTF("it = %u/%u\n", it, region_width); - u32 literal_id = t->anchored_base_id + it; rose_group old_groups = tctxt->groups; - DEBUG_PRINTF("ANCH REPLAY MATCH id=%u offset=%u\n", literal_id, - curr_loc); - hwlmcb_rv_t rv = roseProcessMatch(t, scratch, curr_loc, 0, literal_id); + DEBUG_PRINTF("ANCH REPLAY MATCH id=%u offset=%u\n", it, curr_loc); + const u64a som = 0; + const u8 flags = 0; + hwlmcb_rv_t rv = roseRunProgram(t, scratch, programs[it], som, curr_loc, + 0, flags); DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups); /* anchored literals can't safely set groups. diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 1a5f25e9..8f4c528d 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -1554,7 +1554,9 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, if (end < ri->min_offset) { DEBUG_PRINTF("halt: before min_offset=%u\n", ri->min_offset); - return HWLM_CONTINUE_MATCHING; + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; } } PROGRAM_NEXT_INSTRUCTION diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index 3d0affc6..ea565eaa 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -208,7 +208,8 @@ void remapAnchoredReports(RoseBuildImpl &build) { * raw_dfa with program offsets. */ static -void remapIdsToPrograms(raw_dfa &rdfa, const vector &litPrograms) { +void remapIdsToPrograms(raw_dfa &rdfa, const vector &litPrograms, + const map &final_to_frag_map) { for (dstate &ds : rdfa.states) { assert(ds.reports_eod.empty()); // Not used in anchored matcher. if (ds.reports.empty()) { @@ -216,9 +217,11 @@ void remapIdsToPrograms(raw_dfa &rdfa, const vector &litPrograms) { } flat_set new_reports; - for (auto id : ds.reports) { - assert(id < litPrograms.size()); - new_reports.insert(litPrograms.at(id)); + for (auto final_id : ds.reports) { + assert(contains(final_to_frag_map, final_id)); + auto frag_id = final_to_frag_map.at(final_id); + assert(frag_id < litPrograms.size()); + new_reports.insert(litPrograms.at(frag_id)); } ds.reports = move(new_reports); } @@ -846,7 +849,8 @@ vector buildAnchoredDfas(RoseBuildImpl &build) { aligned_unique_ptr buildAnchoredMatcher(RoseBuildImpl &build, vector &dfas, - const vector &litPrograms, size_t *asize) { + const vector &litPrograms, + const map &final_to_frag_map, size_t *asize) { const CompileContext &cc = build.cc; if (dfas.empty()) { @@ -856,7 +860,7 @@ buildAnchoredMatcher(RoseBuildImpl &build, vector &dfas, } for (auto &rdfa : dfas) { - remapIdsToPrograms(rdfa, litPrograms); + remapIdsToPrograms(rdfa, litPrograms, final_to_frag_map); } vector> nfas; diff --git a/src/rose/rose_build_anchored.h b/src/rose/rose_build_anchored.h index ef06fcbb..fa379ff6 100644 --- a/src/rose/rose_build_anchored.h +++ b/src/rose/rose_build_anchored.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -59,7 +59,9 @@ std::vector buildAnchoredDfas(RoseBuildImpl &build); */ aligned_unique_ptr buildAnchoredMatcher(RoseBuildImpl &build, std::vector &dfas, - const std::vector &litPrograms, size_t *asize); + const std::vector &litPrograms, + const std::map &final_to_frag_map, + size_t *asize); u32 anchoredStateSize(const anchored_matcher_info &atable); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 9f978134..6f996979 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4346,7 +4346,9 @@ void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc, assert(min_offset < UINT32_MAX); DEBUG_PRINTF("adding lit early check, min_offset=%u\n", min_offset); - program.add_before_end(make_unique(min_offset)); + const auto *end_inst = program.end_instruction(); + program.add_before_end( + make_unique(min_offset, end_inst)); } static @@ -4528,9 +4530,33 @@ RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, } static -u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id, - const vector &lit_edges) { - RoseProgram program = buildLiteralProgram(build, bc, final_id, lit_edges); +RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, + const flat_set &final_ids, + const map> &lit_edges) { + assert(!final_ids.empty()); + + DEBUG_PRINTF("entry, %zu final ids\n", final_ids.size()); + const vector no_edges; + + RoseProgram program; + for (const auto &final_id : final_ids) { + const auto *edges_ptr = &no_edges; + if (contains(lit_edges, final_id)) { + edges_ptr = &(lit_edges.at(final_id)); + } + auto prog = buildLiteralProgram(build, bc, final_id, *edges_ptr); + DEBUG_PRINTF("final_id=%u, prog has %zu entries\n", final_id, + prog.size()); + program.add_block(move(prog)); + } + return program; +} + +static +u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc, + const flat_set &final_ids, + const map> &lit_edges) { + RoseProgram program = buildLiteralProgram(build, bc, final_ids, lit_edges); if (program.empty()) { return 0; } @@ -4540,18 +4566,26 @@ u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id, static u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc, - u32 final_id) { - const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); - const auto &arb_lit_info = **lit_infos.begin(); - if (arb_lit_info.delayed_ids.empty()) { - return 0; // No delayed IDs, no work to do. + const flat_set &final_ids) { + RoseProgram program; + + for (const auto &final_id : final_ids) { + const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); + const auto &arb_lit_info = **lit_infos.begin(); + if (arb_lit_info.delayed_ids.empty()) { + continue; // No delayed IDs, no work to do. + } + + RoseProgram prog; + makeCheckLiteralInstruction(build, bc, final_id, prog); + makeCheckLitMaskInstruction(build, bc, final_id, prog); + makePushDelayedInstructions(build, final_id, prog); + program.add_block(move(prog)); } - RoseProgram program; - makeCheckLiteralInstruction(build, bc, final_id, program); - makeCheckLitMaskInstruction(build, bc, final_id, program); - makePushDelayedInstructions(build, final_id, program); - assert(!program.empty()); + if (program.empty()) { + return 0; + } applyFinalSpecialisation(program); return writeProgram(bc, move(program)); } @@ -4590,27 +4624,104 @@ map> findEdgesByLiteral(const RoseBuildImpl &build) { return lit_edge_map; } +static +rose_literal_id getFragment(const rose_literal_id &lit) { + if (lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX) { + DEBUG_PRINTF("whole lit is frag\n"); + return lit; + } + + rose_literal_id frag = lit; + frag.s = frag.s.substr(frag.s.length() - ROSE_SHORT_LITERAL_LEN_MAX); + + DEBUG_PRINTF("fragment: %s\n", dumpString(frag.s).c_str()); + return frag; +} + +map groupByFragment(const RoseBuildImpl &build) { + u32 frag_id = 0; + map final_to_frag; + + map> frag_lits; + for (const auto &m : build.final_id_to_literal) { + u32 final_id = m.first; + const auto &lit_ids = m.second; + assert(!lit_ids.empty()); + + if (lit_ids.size() > 1) { + final_to_frag.emplace(final_id, frag_id++); + continue; + } + + const auto lit_id = *lit_ids.begin(); + const auto &lit = build.literals.right.at(lit_id); + if (lit.s.length() < ROSE_SHORT_LITERAL_LEN_MAX) { + final_to_frag.emplace(final_id, frag_id++); + continue; + } + + // Combining exploded fragments with others is unsafe. + const auto &info = build.literal_info[lit_id]; + if (info.requires_explode) { + final_to_frag.emplace(final_id, frag_id++); + continue; + } + + DEBUG_PRINTF("fragment candidate: final_id=%u %s\n", final_id, + dumpString(lit.s).c_str()); + auto frag = getFragment(lit); + frag_lits[frag].push_back(final_id); + } + + for (const auto &m : frag_lits) { + DEBUG_PRINTF("frag %s -> ids: %s\n", dumpString(m.first.s).c_str(), + as_string_list(m.second).c_str()); + for (const auto final_id : m.second) { + assert(!contains(final_to_frag, final_id)); + final_to_frag.emplace(final_id, frag_id); + } + frag_id++; + } + + return final_to_frag; +} + /** * \brief Build the interpreter programs for each literal. * - * Returns the base of the literal program list and the base of the delay - * rebuild program list. + * Returns the following as a tuple: + * + * - base of the literal program list + * - base of the delay rebuild program list + * - total number of literal fragments */ static -pair buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { - const u32 num_literals = build.final_id_to_literal.size(); +tuple +buildLiteralPrograms(RoseBuildImpl &build, build_context &bc, + const map &final_to_frag_map) { + // Build a reverse mapping from fragment -> final_id. + map> frag_to_final_map; + for (const auto &m : final_to_frag_map) { + frag_to_final_map[m.second].insert(m.first); + } + + const u32 num_fragments = verify_u32(frag_to_final_map.size()); + DEBUG_PRINTF("%u fragments\n", num_fragments); + auto lit_edge_map = findEdgesByLiteral(build); - bc.litPrograms.resize(num_literals); - vector delayRebuildPrograms(num_literals); + bc.litPrograms.resize(num_fragments); + vector delayRebuildPrograms(num_fragments); - for (u32 finalId = 0; finalId != num_literals; ++finalId) { - const auto &lit_edges = lit_edge_map[finalId]; + for (u32 frag_id = 0; frag_id != num_fragments; ++frag_id) { + const auto &final_ids = frag_to_final_map[frag_id]; + DEBUG_PRINTF("frag_id=%u, final_ids=[%s]\n", frag_id, + as_string_list(final_ids).c_str()); - bc.litPrograms[finalId] = - writeLiteralProgram(build, bc, finalId, lit_edges); - delayRebuildPrograms[finalId] = - buildDelayRebuildProgram(build, bc, finalId); + bc.litPrograms[frag_id] = + writeLiteralProgram(build, bc, final_ids, lit_edge_map); + delayRebuildPrograms[frag_id] = + buildDelayRebuildProgram(build, bc, final_ids); } u32 litProgramsOffset = @@ -4618,7 +4729,40 @@ pair buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { u32 delayRebuildProgramsOffset = bc.engine_blob.add( begin(delayRebuildPrograms), end(delayRebuildPrograms)); - return {litProgramsOffset, delayRebuildProgramsOffset}; + return tuple{litProgramsOffset, delayRebuildProgramsOffset, + num_fragments}; +} + +static +u32 buildDelayPrograms(RoseBuildImpl &build, build_context &bc) { + auto lit_edge_map = findEdgesByLiteral(build); + + vector programs; + + for (u32 final_id = build.delay_base_id; + final_id < build.final_id_to_literal.size(); final_id++) { + u32 offset = writeLiteralProgram(build, bc, {final_id}, lit_edge_map); + programs.push_back(offset); + } + + DEBUG_PRINTF("%zu delay programs\n", programs.size()); + return bc.engine_blob.add(begin(programs), end(programs)); +} + +static +u32 buildAnchoredPrograms(RoseBuildImpl &build, build_context &bc) { + auto lit_edge_map = findEdgesByLiteral(build); + + vector programs; + + for (u32 final_id = build.anchored_base_id; + final_id < build.delay_base_id; final_id++) { + u32 offset = writeLiteralProgram(build, bc, {final_id}, lit_edge_map); + programs.push_back(offset); + } + + DEBUG_PRINTF("%zu anchored programs\n", programs.size()); + return bc.engine_blob.add(begin(programs), end(programs)); } /** @@ -5253,6 +5397,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { DEBUG_PRINTF("longLitLengthThreshold=%zu\n", longLitLengthThreshold); allocateFinalLiteralId(*this); + auto final_to_frag_map = groupByFragment(*this); auto anchored_dfas = buildAnchoredDfas(*this); @@ -5316,8 +5461,12 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 litProgramOffset; u32 litDelayRebuildProgramOffset; - tie(litProgramOffset, litDelayRebuildProgramOffset) = - buildLiteralPrograms(*this, bc); + u32 litProgramCount; + tie(litProgramOffset, litDelayRebuildProgramOffset, litProgramCount) = + buildLiteralPrograms(*this, bc, final_to_frag_map); + + u32 delayProgramOffset = buildDelayPrograms(*this, bc); + u32 anchoredProgramOffset = buildAnchoredPrograms(*this, bc); u32 eodProgramOffset = writeEodProgram(*this, bc, eodNfaIterOffset); @@ -5354,7 +5503,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { size_t asize = 0; u32 amatcherOffset = 0; auto atable = buildAnchoredMatcher(*this, anchored_dfas, bc.litPrograms, - &asize); + final_to_frag_map, &asize); if (atable) { currOffset = ROUNDUP_CL(currOffset); amatcherOffset = currOffset; @@ -5365,7 +5514,8 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { rose_group fgroups = 0; size_t fsize = 0; auto ftable = buildFloatingMatcher(*this, bc.longLitLengthThreshold, - &fgroups, &fsize, &historyRequired); + final_to_frag_map, &fgroups, &fsize, + &historyRequired); u32 fmatcherOffset = 0; if (ftable) { currOffset = ROUNDUP_CL(currOffset); @@ -5375,7 +5525,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Build EOD-anchored HWLM matcher. size_t esize = 0; - auto etable = buildEodAnchoredMatcher(*this, &esize); + auto etable = buildEodAnchoredMatcher(*this, final_to_frag_map, &esize); u32 ematcherOffset = 0; if (etable) { currOffset = ROUNDUP_CL(currOffset); @@ -5385,7 +5535,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Build small-block HWLM matcher. size_t sbsize = 0; - auto sbtable = buildSmallBlockMatcher(*this, &sbsize); + auto sbtable = buildSmallBlockMatcher(*this, final_to_frag_map, &sbsize); u32 sbmatcherOffset = 0; if (sbtable) { currOffset = ROUNDUP_CL(currOffset); @@ -5495,11 +5645,13 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->needsCatchup = bc.needs_catchup ? 1 : 0; - engine->literalCount = verify_u32(final_id_to_literal.size()); + engine->literalCount = litProgramCount; engine->litProgramOffset = litProgramOffset; engine->litDelayRebuildProgramOffset = litDelayRebuildProgramOffset; engine->reportProgramOffset = reportProgramOffset; engine->reportProgramCount = reportProgramCount; + engine->delayProgramOffset = delayProgramOffset; + engine->anchoredProgramOffset = anchoredProgramOffset; engine->runtimeImpl = pickRuntimeImpl(*this, bc, outfixEndQueue); engine->mpvTriggeredByLeaf = anyEndfixMpvTriggers(*this); diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index e7cef100..495d6f36 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -505,19 +505,25 @@ void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) { size_t longLitLengthThreshold = calcLongLitThreshold(build, historyRequired); - auto mp = makeMatcherProto(build, ROSE_ANCHORED, longLitLengthThreshold); + const auto final_to_frag_map = groupByFragment(build); + + auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_ANCHORED, + longLitLengthThreshold); dumpTestLiterals(base + "rose_anchored_test_literals.txt", mp.lits); - mp = makeMatcherProto(build, ROSE_FLOATING, longLitLengthThreshold); + mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, + longLitLengthThreshold); dumpTestLiterals(base + "rose_float_test_literals.txt", mp.lits); - mp = makeMatcherProto(build, ROSE_EOD_ANCHORED, build.ematcher_region_size); + mp = makeMatcherProto(build, final_to_frag_map, ROSE_EOD_ANCHORED, + build.ematcher_region_size); dumpTestLiterals(base + "rose_eod_test_literals.txt", mp.lits); if (!build.cc.streaming) { - mp = makeMatcherProto(build, ROSE_FLOATING, ROSE_SMALL_BLOCK_LEN, - ROSE_SMALL_BLOCK_LEN); - auto mp2 = makeMatcherProto(build, ROSE_ANCHORED_SMALL_BLOCK, + mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, + ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); + auto mp2 = makeMatcherProto(build, final_to_frag_map, + ROSE_ANCHORED_SMALL_BLOCK, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); mp.lits.insert(end(mp.lits), begin(mp2.lits), end(mp2.lits)); dumpTestLiterals(base + "rose_smallblock_test_literals.txt", mp.lits); diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 02c5a389..7421dbfa 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -644,6 +644,8 @@ void normaliseLiteralMask(const ue2_literal &s, std::vector &msk, bool canImplementGraphs(const RoseBuildImpl &tbi); #endif +std::map groupByFragment(const RoseBuildImpl &build); + } // namespace ue2 #endif /* ROSE_BUILD_IMPL_H_17E20A3C6935D6 */ diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index f7c237a7..c51905ca 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -632,7 +632,27 @@ u64a literalMinReportOffset(const RoseBuildImpl &build, return lit_min_offset; } +static +map makeFragGroupMap(const RoseBuildImpl &build, + const map &final_to_frag_map) { + map frag_to_group; + + for (const auto &m : final_to_frag_map) { + u32 final_id = m.first; + u32 frag_id = m.second; + hwlm_group_t groups = 0; + const auto &lits = build.final_id_to_literal.at(final_id); + for (auto lit_id : lits) { + groups |= build.literal_info[lit_id].group_mask; + } + frag_to_group[frag_id] |= groups; + } + + return frag_to_group; +} + MatcherProto makeMatcherProto(const RoseBuildImpl &build, + const map &final_to_frag_map, rose_literal_table table, size_t max_len, u32 max_offset) { MatcherProto mp; @@ -710,23 +730,26 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, msk, cmp); } } else { - string s = lit.get_string(); - bool nocase = lit.any_nocase(); + auto lit_final = lit; // copy + + if (lit_final.length() > ROSE_SHORT_LITERAL_LEN_MAX) { + DEBUG_PRINTF("truncating to tail of length %zu\n", + size_t{ROSE_SHORT_LITERAL_LEN_MAX}); + lit_final.erase(0, lit_final.length() - + ROSE_SHORT_LITERAL_LEN_MAX); + // We shouldn't have set a threshold below 8 chars. + assert(msk.size() <= ROSE_SHORT_LITERAL_LEN_MAX); + assert(!noruns); + } + + const auto &s = lit_final.get_string(); + bool nocase = lit_final.any_nocase(); DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, " "cmp=%s\n", final_id, escapeString(s).c_str(), (int)nocase, noruns, dumpMask(msk).c_str(), dumpMask(cmp).c_str()); - if (s.length() > ROSE_SHORT_LITERAL_LEN_MAX) { - DEBUG_PRINTF("truncating to tail of length %zu\n", - size_t{ROSE_SHORT_LITERAL_LEN_MAX}); - s.erase(0, s.length() - ROSE_SHORT_LITERAL_LEN_MAX); - // We shouldn't have set a threshold below 8 chars. - assert(msk.size() <= ROSE_SHORT_LITERAL_LEN_MAX); - assert(!noruns); - } - if (!maskIsConsistent(s, nocase, msk, cmp)) { DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); continue; @@ -738,18 +761,32 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, } } + auto frag_group_map = makeFragGroupMap(build, final_to_frag_map); + + for (auto &lit : mp.lits) { + u32 final_id = lit.id; + assert(contains(final_to_frag_map, final_id)); + lit.id = final_to_frag_map.at(final_id); + assert(contains(frag_group_map, lit.id)); + lit.groups = frag_group_map.at(lit.id); + } + + sort(begin(mp.lits), end(mp.lits)); + mp.lits.erase(unique(begin(mp.lits), end(mp.lits)), end(mp.lits)); + return mp; } -aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, - size_t longLitLengthThreshold, - rose_group *fgroups, - size_t *fsize, - size_t *historyRequired) { +aligned_unique_ptr +buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, + const map &final_to_frag_map, + rose_group *fgroups, size_t *fsize, + size_t *historyRequired) { *fsize = 0; *fgroups = 0; - auto mp = makeMatcherProto(build, ROSE_FLOATING, longLitLengthThreshold); + auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, + longLitLengthThreshold); if (mp.lits.empty()) { DEBUG_PRINTF("empty floating matcher\n"); return nullptr; @@ -776,8 +813,9 @@ aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, return hwlm; } -aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, - size_t *sbsize) { +aligned_unique_ptr +buildSmallBlockMatcher(const RoseBuildImpl &build, + const map &final_to_frag_map, size_t *sbsize) { *sbsize = 0; if (build.cc.streaming) { @@ -792,8 +830,8 @@ aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, return nullptr; } - auto mp = makeMatcherProto(build, ROSE_FLOATING, ROSE_SMALL_BLOCK_LEN, - ROSE_SMALL_BLOCK_LEN); + auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, + ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); if (mp.lits.empty()) { DEBUG_PRINTF("no floating table\n"); return nullptr; @@ -803,8 +841,8 @@ aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, } auto mp_anchored = - makeMatcherProto(build, ROSE_ANCHORED_SMALL_BLOCK, ROSE_SMALL_BLOCK_LEN, - ROSE_SMALL_BLOCK_LEN); + makeMatcherProto(build, final_to_frag_map, ROSE_ANCHORED_SMALL_BLOCK, + ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); if (mp_anchored.lits.empty()) { DEBUG_PRINTF("no small-block anchored literals\n"); return nullptr; @@ -834,12 +872,13 @@ aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, return hwlm; } -aligned_unique_ptr buildEodAnchoredMatcher(const RoseBuildImpl &build, - size_t *esize) { +aligned_unique_ptr +buildEodAnchoredMatcher(const RoseBuildImpl &build, + const map &final_to_frag_map, size_t *esize) { *esize = 0; - auto mp = - makeMatcherProto(build, ROSE_EOD_ANCHORED, build.ematcher_region_size); + auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_EOD_ANCHORED, + build.ematcher_region_size); if (mp.lits.empty()) { DEBUG_PRINTF("no eod anchored literals\n"); diff --git a/src/rose/rose_build_matchers.h b/src/rose/rose_build_matchers.h index 15ccf278..742e8a14 100644 --- a/src/rose/rose_build_matchers.h +++ b/src/rose/rose_build_matchers.h @@ -36,6 +36,7 @@ #include "rose_build_impl.h" +#include #include struct HWLM; @@ -57,20 +58,26 @@ struct MatcherProto { * only lead to a pattern match after max_offset may be excluded. */ MatcherProto makeMatcherProto(const RoseBuildImpl &build, + const std::map &final_to_frag_map, rose_literal_table table, size_t max_len, u32 max_offset = ROSE_BOUND_INF); aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, - size_t longLitLengthThreshold, - rose_group *fgroups, - size_t *fsize, - size_t *historyRequired); + size_t longLitLengthThreshold, + const std::map &final_to_frag_map, + rose_group *fgroups, + size_t *fsize, + size_t *historyRequired); -aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, - size_t *sbsize); +aligned_unique_ptr +buildSmallBlockMatcher(const RoseBuildImpl &build, + const std::map &final_to_frag_map, + size_t *sbsize); -aligned_unique_ptr buildEodAnchoredMatcher(const RoseBuildImpl &build, - size_t *esize); +aligned_unique_ptr +buildEodAnchoredMatcher(const RoseBuildImpl &build, + const std::map &final_to_frag_map, + size_t *esize); void findMoreLiteralMasks(RoseBuildImpl &build); diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index 5f7ab0bf..112b93f9 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -79,6 +79,7 @@ void RoseInstrCheckLitEarly::write(void *dest, RoseEngineBlob &blob, RoseInstrBase::write(dest, blob, offset_map); auto *inst = static_cast(dest); inst->min_offset = min_offset; + inst->fail_jump = calc_jump(offset_map, this, target); } void RoseInstrCheckGroups::write(void *dest, RoseEngineBlob &blob, diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index 440bf4e1..fd966a8d 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -241,16 +241,18 @@ public: }; class RoseInstrCheckLitEarly - : public RoseInstrBaseNoTargets { public: u32 min_offset; + const RoseInstruction *target; - explicit RoseInstrCheckLitEarly(u32 min) : min_offset(min) {} + RoseInstrCheckLitEarly(u32 min_offset_in, const RoseInstruction *target_in) + : min_offset(min_offset_in), target(target_in) {} bool operator==(const RoseInstrCheckLitEarly &ri) const { - return min_offset == ri.min_offset; + return min_offset == ri.min_offset && target == ri.target; } size_t hash() const override { @@ -260,9 +262,10 @@ public: void write(void *dest, RoseEngineBlob &blob, const OffsetMap &offset_map) const override; - bool equiv_to(const RoseInstrCheckLitEarly &ri, const OffsetMap &, - const OffsetMap &) const { - return min_offset == ri.min_offset; + bool equiv_to(const RoseInstrCheckLitEarly &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return min_offset == ri.min_offset && + offsets.at(target) == other_offsets.at(ri.target); } }; @@ -1786,7 +1789,7 @@ public: }; class RoseInstrCheckMedLit - : public RoseInstrBaseNoTargets { public: @@ -1816,7 +1819,7 @@ public: }; class RoseInstrCheckMedLitNocase - : public RoseInstrBaseNoTargets { public: diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 5d79da2e..f38d94c8 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -250,6 +250,7 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { PROGRAM_CASE(CHECK_LIT_EARLY) { os << " min_offset " << ri->min_offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 411ce03f..bf6e9a86 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -361,6 +361,17 @@ struct RoseEngine { */ u32 reportProgramCount; + /** + * \brief Offset of u32 array of program offsets for delayed replay of + * literals. + */ + u32 delayProgramOffset; + + /** + * \brief Offset of u32 array of program offsets for anchored literals. + */ + u32 anchoredProgramOffset; + /** * \brief Number of entries in the arrays pointed to by litProgramOffset, * litDelayRebuildProgramOffset. diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index c5ddc942..652b9109 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -154,10 +154,10 @@ struct ROSE_STRUCT_ANCHORED_DELAY { u32 done_jump; //!< Jump forward this many bytes if successful. }; -/** Note: check failure will halt program. */ struct ROSE_STRUCT_CHECK_LIT_EARLY { u8 code; //!< From enum RoseInstructionCode. u32 min_offset; //!< Minimum offset for this literal. + u32 fail_jump; //!< Jump forward this many bytes on failure. }; /** Note: check failure will halt program. */ From abcac24b47178e0ae0dd6e831209372eb0060af4 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 14 Dec 2016 11:58:01 +1100 Subject: [PATCH 010/326] rose: add more program dump code --- src/rose/rose_dump.cpp | 52 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index f38d94c8..d3bf980e 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -739,6 +739,54 @@ void dumpRoseReportPrograms(const RoseEngine *t, const string &filename) { os.close(); } +static +void dumpRoseDelayPrograms(const RoseEngine *t, const string &filename) { + ofstream os(filename); + + const u32 *programs = + (const u32 *)loadFromByteCodeOffset(t, t->delayProgramOffset); + + for (u32 i = 0; i < t->delay_count; i++) { + os << "Delay entry " << i << endl; + os << "---------------" << endl; + + if (programs[i]) { + os << "Program @ " << programs[i] << ":" << endl; + const char *prog = + (const char *)loadFromByteCodeOffset(t, programs[i]); + dumpProgram(os, t, prog); + } else { + os << "" << endl; + } + } + + os.close(); +} + +static +void dumpRoseAnchoredPrograms(const RoseEngine *t, const string &filename) { + ofstream os(filename); + + const u32 *programs = + (const u32 *)loadFromByteCodeOffset(t, t->anchoredProgramOffset); + + for (u32 i = 0; i < t->anchored_count; i++) { + os << "Anchored entry " << i << endl; + os << "---------------" << endl; + + if (programs[i]) { + os << "Program @ " << programs[i] << ":" << endl; + const char *prog = + (const char *)loadFromByteCodeOffset(t, programs[i]); + dumpProgram(os, t, prog); + } else { + os << "" << endl; + } + } + + os.close(); +} + static void dumpNfaNotes(ofstream &fout, const RoseEngine *t, const NFA *n) { const u32 qindex = n->queueIndex; @@ -1279,6 +1327,8 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, litDelayRebuildProgramOffset); DUMP_U32(t, reportProgramOffset); DUMP_U32(t, reportProgramCount); + DUMP_U32(t, delayProgramOffset); + DUMP_U32(t, anchoredProgramOffset); DUMP_U32(t, literalCount); DUMP_U32(t, activeArrayCount); DUMP_U32(t, activeLeftCount); @@ -1358,6 +1408,8 @@ void roseDumpComponents(const RoseEngine *t, bool dump_raw, dumpRoseLitPrograms(t, base + "/rose_lit_programs.txt"); dumpRoseEodPrograms(t, base + "/rose_eod_programs.txt"); dumpRoseReportPrograms(t, base + "/rose_report_programs.txt"); + dumpRoseDelayPrograms(t, base + "/rose_delay_programs.txt"); + dumpRoseAnchoredPrograms(t, base + "/rose_anchored_programs.txt"); } void roseDumpInternals(const RoseEngine *t, const string &base) { From 1b36594bda4929e01e4d8cf09a48376ae78dee70 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 20 Dec 2016 15:05:28 +1100 Subject: [PATCH 011/326] fdr: simplify confirm for 8-byte fragments --- src/fdr/fdr_confirm_runtime.h | 36 ++--------------------------------- 1 file changed, 2 insertions(+), 34 deletions(-) diff --git a/src/fdr/fdr_confirm_runtime.h b/src/fdr/fdr_confirm_runtime.h index 87ade9fe..aaaecb38 100644 --- a/src/fdr/fdr_confirm_runtime.h +++ b/src/fdr/fdr_confirm_runtime.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -70,11 +70,8 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a const u8 *loc = buf + i - li->size + 1 - pullBackAmount; - u8 caseless = li->flags & Caseless; if (loc < buf) { u32 full_overhang = buf - loc; - - const u8 *history = a->buf_history; size_t len_history = a->len_history; // can't do a vectored confirm either if we don't have @@ -82,37 +79,8 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a if (full_overhang > len_history) { goto out; } - - // as for the regular case, no need to do a full confirm if - // we're a short literal - if (unlikely(li->size > sizeof(CONF_TYPE))) { - const u8 *s1 = (const u8 *)li + sizeof(*li); - const u8 *s2 = s1 + full_overhang; - const u8 *loc1 = history + len_history - full_overhang; - const u8 *loc2 = buf; - size_t size1 = MIN(full_overhang, li->size - sizeof(CONF_TYPE)); - size_t wind_size2_back = sizeof(CONF_TYPE) + full_overhang; - size_t size2 = wind_size2_back > li->size ? - 0 : li->size - wind_size2_back; - - if (cmpForward(loc1, s1, size1, caseless)) { - goto out; - } - if (cmpForward(loc2, s2, size2, caseless)) { - goto out; - } - } - } else { // NON-VECTORING PATH - - // if string < conf_type we don't need regular string cmp - if (unlikely(li->size > sizeof(CONF_TYPE))) { - const u8 *s = (const u8 *)li + sizeof(*li); - if (cmpForward(loc, s, li->size - sizeof(CONF_TYPE), - caseless)) { - goto out; - } - } } + assert(li->size <= sizeof(CONF_TYPE)); if (unlikely(!(li->groups & *control))) { goto out; From 7984b26b14918d900e8892c4f85bcc435ffe8945 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 20 Dec 2016 15:10:47 +1100 Subject: [PATCH 012/326] fdr: rely on fixed size confirm structures --- src/fdr/fdr_confirm_compile.cpp | 21 ++------------------- src/fdr/fdr_confirm_runtime.h | 2 +- 2 files changed, 3 insertions(+), 20 deletions(-) diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index e77c46d1..4aa2df0d 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -322,32 +322,15 @@ getFDRConfirm(const vector &lits, bool applyOneCharOpt, LiteralIndex litIdx = *i; // Write LitInfo header. - u8 *oldPtr = ptr; LitInfo &finalLI = *(LitInfo *)ptr; finalLI = tmpLitInfo[litIdx]; ptr += sizeof(LitInfo); // String starts directly after LitInfo. - - // Write literal prefix (everything before the last N characters, - // as the last N are already confirmed). - const string &t = lits[litIdx].s; - if (t.size() > sizeof(CONF_TYPE)) { - size_t prefix_len = t.size() - sizeof(CONF_TYPE); - memcpy(ptr, t.c_str(), prefix_len); - ptr += prefix_len; - } - - ptr = ROUNDUP_PTR(ptr, alignof(LitInfo)); + assert(lits[litIdx].s.size() <= sizeof(CONF_TYPE)); if (next(i) == e) { finalLI.next = 0; } else { - // our next field represents an adjustment on top of - // current address + the actual size of the literal - // so we track any rounding up done for alignment and - // add this in - that way we don't have to use bigger - // than a u8 (for now) - assert((size_t)(ptr - oldPtr) > t.size()); - finalLI.next = verify_u8(ptr - oldPtr - t.size()); + finalLI.next = 1; } } assert((size_t)(ptr - fdrc_base) <= size); diff --git a/src/fdr/fdr_confirm_runtime.h b/src/fdr/fdr_confirm_runtime.h index aaaecb38..55985846 100644 --- a/src/fdr/fdr_confirm_runtime.h +++ b/src/fdr/fdr_confirm_runtime.h @@ -101,7 +101,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a *control = a->cb(loc - buf, i, li->id, a->ctxt); out: oldNext = li->next; // oldNext is either 0 or an 'adjust' value - li = (const struct LitInfo *)((const u8 *)li + oldNext + li->size); + li++; } while (oldNext); } From 2fda8c0b2035f894344f4e8df1abd5f18b1fe096 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 30 Nov 2016 16:55:52 +1100 Subject: [PATCH 013/326] hwlm: move accel from hwlm build to rose --- CMakeLists.txt | 2 + src/hwlm/hwlm_build.cpp | 446 +--------------------------- src/rose/rose_build_lit_accel.cpp | 471 ++++++++++++++++++++++++++++++ src/rose/rose_build_lit_accel.h | 71 +++++ src/rose/rose_build_matchers.cpp | 7 + 5 files changed, 555 insertions(+), 442 deletions(-) create mode 100644 src/rose/rose_build_lit_accel.cpp create mode 100644 src/rose/rose_build_lit_accel.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 7ede52b4..8fe82a70 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -936,6 +936,8 @@ SET (hs_SRCS src/rose/rose_build_impl.h src/rose/rose_build_infix.cpp src/rose/rose_build_infix.h + src/rose/rose_build_lit_accel.cpp + src/rose/rose_build_lit_accel.h src/rose/rose_build_long_lit.cpp src/rose/rose_build_long_lit.h src/rose/rose_build_lookaround.cpp diff --git a/src/hwlm/hwlm_build.cpp b/src/hwlm/hwlm_build.cpp index 29e71293..a05ca1a2 100644 --- a/src/hwlm/hwlm_build.cpp +++ b/src/hwlm/hwlm_build.cpp @@ -29,9 +29,11 @@ /** \file * \brief Hamster Wheel Literal Matcher: build code. */ + +#include "hwlm_build.h" + #include "grey.h" #include "hwlm.h" -#include "hwlm_build.h" #include "hwlm_internal.h" #include "hwlm_literal.h" #include "noodle_engine.h" @@ -39,22 +41,11 @@ #include "scratch.h" #include "ue2common.h" #include "fdr/fdr_compile.h" -#include "nfa/shufticompile.h" -#include "nfa/trufflecompile.h" -#include "util/alloc.h" -#include "util/bitutils.h" -#include "util/charreach.h" -#include "util/compare.h" #include "util/compile_context.h" #include "util/compile_error.h" -#include "util/dump_charclass.h" -#include "util/target_info.h" #include "util/ue2string.h" -#include "util/verify_types.h" #include -#include -#include #include #include @@ -62,431 +53,6 @@ using namespace std; namespace ue2 { -static const unsigned int MAX_ACCEL_OFFSET = 16; -static const unsigned int MAX_SHUFTI_WIDTH = 240; - -static -size_t mask_overhang(const hwlmLiteral &lit) { - size_t msk_true_size = lit.msk.size(); - assert(msk_true_size <= HWLM_MASKLEN); - assert(HWLM_MASKLEN <= MAX_ACCEL_OFFSET); - for (u8 c : lit.msk) { - if (!c) { - msk_true_size--; - } else { - break; - } - } - - if (lit.s.length() >= msk_true_size) { - return 0; - } - - /* only short literals should be able to have a mask which overhangs */ - assert(lit.s.length() < MAX_ACCEL_OFFSET); - return msk_true_size - lit.s.length(); -} - -static -bool findDVerm(const vector &lits, AccelAux *aux) { - const hwlmLiteral &first = *lits.front(); - - struct candidate { - candidate(void) - : c1(0), c2(0), max_offset(0), b5insens(false), valid(false) {} - candidate(const hwlmLiteral &base, u32 offset) - : c1(base.s[offset]), c2(base.s[offset + 1]), max_offset(0), - b5insens(false), valid(true) {} - char c1; - char c2; - u32 max_offset; - bool b5insens; - bool valid; - - bool operator>(const candidate &other) const { - if (!valid) { - return false; - } - - if (!other.valid) { - return true; - } - - if (other.cdiffers() && !cdiffers()) { - return false; - } - - if (!other.cdiffers() && cdiffers()) { - return true; - } - - if (!other.b5insens && b5insens) { - return false; - } - - if (other.b5insens && !b5insens) { - return true; - } - - if (max_offset > other.max_offset) { - return false; - } - - return true; - } - - bool cdiffers(void) const { - if (!b5insens) { - return c1 != c2; - } - return (c1 & CASE_CLEAR) != (c2 & CASE_CLEAR); - } - }; - - candidate best; - - for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()) - 1; i++) { - candidate curr(first, i); - - /* check to see if this pair appears in each string */ - for (const auto &lit_ptr : lits) { - const hwlmLiteral &lit = *lit_ptr; - if (lit.nocase && (ourisalpha(curr.c1) || ourisalpha(curr.c2))) { - curr.b5insens = true; /* no choice but to be case insensitive */ - } - - bool found = false; - bool found_nc = false; - for (u32 j = 0; - !found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; j++) { - found |= curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1]; - found_nc |= (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR) - && (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR); - - if (curr.b5insens) { - found = found_nc; - } - } - - if (!curr.b5insens && !found && found_nc) { - curr.b5insens = true; - found = true; - } - - if (!found) { - goto next_candidate; - } - } - - /* check to find the max offset where this appears */ - for (const auto &lit_ptr : lits) { - const hwlmLiteral &lit = *lit_ptr; - for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; - j++) { - bool found = false; - if (curr.b5insens) { - found = (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR) - && (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR); - } else { - found = curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1]; - } - - if (found) { - assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET); - ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit)); - break; - } - } - } - - if (curr > best) { - best = curr; - } - - next_candidate:; - } - - if (!best.valid) { - return false; - } - - aux->dverm.offset = verify_u8(best.max_offset); - - if (!best.b5insens) { - aux->dverm.accel_type = ACCEL_DVERM; - aux->dverm.c1 = best.c1; - aux->dverm.c2 = best.c2; - DEBUG_PRINTF("built dverm for %02hhx%02hhx\n", - aux->dverm.c1, aux->dverm.c2); - } else { - aux->dverm.accel_type = ACCEL_DVERM_NOCASE; - aux->dverm.c1 = best.c1 & CASE_CLEAR; - aux->dverm.c2 = best.c2 & CASE_CLEAR; - DEBUG_PRINTF("built dverm nc for %02hhx%02hhx\n", - aux->dverm.c1, aux->dverm.c2); - } - return true; -} - -static -bool findSVerm(const vector &lits, AccelAux *aux) { - const hwlmLiteral &first = *lits.front(); - - struct candidate { - candidate(void) - : c(0), max_offset(0), b5insens(false), valid(false) {} - candidate(const hwlmLiteral &base, u32 offset) - : c(base.s[offset]), max_offset(0), - b5insens(false), valid(true) {} - char c; - u32 max_offset; - bool b5insens; - bool valid; - - bool operator>(const candidate &other) const { - if (!valid) { - return false; - } - - if (!other.valid) { - return true; - } - - if (!other.b5insens && b5insens) { - return false; - } - - if (other.b5insens && !b5insens) { - return true; - } - - if (max_offset > other.max_offset) { - return false; - } - - return true; - } - }; - - candidate best; - - for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()); i++) { - candidate curr(first, i); - - /* check to see if this pair appears in each string */ - for (const auto &lit_ptr : lits) { - const hwlmLiteral &lit = *lit_ptr; - if (lit.nocase && ourisalpha(curr.c)) { - curr.b5insens = true; /* no choice but to be case insensitive */ - } - - bool found = false; - bool found_nc = false; - for (u32 j = 0; - !found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) { - found |= curr.c == lit.s[j]; - found_nc |= (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR); - - if (curr.b5insens) { - found = found_nc; - } - } - - if (!curr.b5insens && !found && found_nc) { - curr.b5insens = true; - found = true; - } - - if (!found) { - goto next_candidate; - } - } - - /* check to find the max offset where this appears */ - for (const auto &lit_ptr : lits) { - const hwlmLiteral &lit = *lit_ptr; - for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) { - bool found = false; - if (curr.b5insens) { - found = (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR); - } else { - found = curr.c == lit.s[j]; - } - - if (found) { - assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET); - ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit)); - } - } - } - - if (curr > best) { - best = curr; - } - - next_candidate:; - } - - if (!best.valid) { - return false; - } - - if (!best.b5insens) { - aux->verm.accel_type = ACCEL_VERM; - aux->verm.c = best.c; - DEBUG_PRINTF("built verm for %02hhx\n", aux->verm.c); - } else { - aux->verm.accel_type = ACCEL_VERM_NOCASE; - aux->verm.c = best.c & CASE_CLEAR; - DEBUG_PRINTF("built verm nc for %02hhx\n", aux->verm.c); - } - aux->verm.offset = verify_u8(best.max_offset); - - return true; -} - -static -void filterLits(const vector &lits, hwlm_group_t expected_groups, - vector *filtered_lits, u32 *min_len) { - *min_len = MAX_ACCEL_OFFSET; - - for (const auto &lit : lits) { - if (!(lit.groups & expected_groups)) { - continue; - } - - const size_t lit_len = lit.s.length(); - if (lit_len < *min_len) { - *min_len = verify_u32(lit_len); - } - - filtered_lits->push_back(&lit); - -#ifdef DEBUG - DEBUG_PRINTF("lit:"); - for (u32 i = 0; i < lit.s.length(); i++) { - printf("%02hhx", lit.s[i]); - } - printf("\n"); -#endif - } -} - -static -bool litGuardedByCharReach(const CharReach &cr, const hwlmLiteral &lit, - u32 max_offset) { - for (u32 i = 0; i <= max_offset && i < lit.s.length(); i++) { - unsigned char c = lit.s[i]; - if (lit.nocase) { - if (cr.test(mytoupper(c)) && cr.test(mytolower(c))) { - return true; - } - } else { - if (cr.test(c)) { - return true; - } - } - } - - return false; -} - -static -void findForwardAccelScheme(const vector &lits, - hwlm_group_t expected_groups, AccelAux *aux) { - DEBUG_PRINTF("building accel expected=%016llx\n", expected_groups); - u32 min_len = MAX_ACCEL_OFFSET; - vector filtered_lits; - - filterLits(lits, expected_groups, &filtered_lits, &min_len); - if (filtered_lits.empty()) { - return; - } - - if (findDVerm(filtered_lits, aux) - || findSVerm(filtered_lits, aux)) { - return; - } - - /* look for shufti/truffle */ - - vector reach(MAX_ACCEL_OFFSET, CharReach()); - for (const auto &lit : lits) { - if (!(lit.groups & expected_groups)) { - continue; - } - - u32 overhang = mask_overhang(lit); - for (u32 i = 0; i < overhang; i++) { - /* this offset overhangs the start of the real literal; look at the - * msk/cmp */ - for (u32 j = 0; j < N_CHARS; j++) { - if ((j & lit.msk[i]) == lit.cmp[i]) { - reach[i].set(j); - } - } - } - for (u32 i = overhang; i < MAX_ACCEL_OFFSET; i++) { - CharReach &reach_i = reach[i]; - u32 i_effective = i - overhang; - - if (litGuardedByCharReach(reach_i, lit, i_effective)) { - continue; - } - unsigned char c = i_effective < lit.s.length() ? lit.s[i_effective] - : lit.s.back(); - if (lit.nocase) { - reach_i.set(mytoupper(c)); - reach_i.set(mytolower(c)); - } else { - reach_i.set(c); - } - } - } - - u32 min_count = ~0U; - u32 min_offset = ~0U; - for (u32 i = 0; i < MAX_ACCEL_OFFSET; i++) { - size_t count = reach[i].count(); - DEBUG_PRINTF("offset %u is %s (reach %zu)\n", i, - describeClass(reach[i]).c_str(), count); - if (count < min_count) { - min_count = (u32)count; - min_offset = i; - } - } - - if (min_count > MAX_SHUFTI_WIDTH) { - DEBUG_PRINTF("FAIL: min shufti with %u chars is too wide\n", min_count); - return; - } - - const CharReach &cr = reach[min_offset]; - if (-1 != - shuftiBuildMasks(cr, (u8 *)&aux->shufti.lo, (u8 *)&aux->shufti.hi)) { - DEBUG_PRINTF("built shufti for %s (%zu chars, offset %u)\n", - describeClass(cr).c_str(), cr.count(), min_offset); - aux->shufti.accel_type = ACCEL_SHUFTI; - aux->shufti.offset = verify_u8(min_offset); - return; - } - - truffleBuildMasks(cr, (u8 *)&aux->truffle.mask1, (u8 *)&aux->truffle.mask2); - DEBUG_PRINTF("built truffle for %s (%zu chars, offset %u)\n", - describeClass(cr).c_str(), cr.count(), min_offset); - aux->truffle.accel_type = ACCEL_TRUFFLE; - aux->truffle.offset = verify_u8(min_offset); -} - -static -void buildForwardAccel(HWLM *h, const vector &lits, - hwlm_group_t expected_groups) { - findForwardAccelScheme(lits, expected_groups, &h->accel1); - findForwardAccelScheme(lits, HWLM_ALL_GROUPS, &h->accel0); - - h->accel1_groups = expected_groups; -} - static void dumpLits(UNUSED const vector &lits) { #ifdef DEBUG @@ -533,7 +99,7 @@ bool isNoodleable(const vector &lits, aligned_unique_ptr hwlmBuild(const vector &lits, bool make_small, const CompileContext &cc, - hwlm_group_t expected_groups) { + UNUSED hwlm_group_t expected_groups) { assert(!lits.empty()); dumpLits(lits); @@ -604,10 +170,6 @@ aligned_unique_ptr hwlmBuild(const vector &lits, h->type = engType; memcpy(HWLM_DATA(h.get()), eng.get(), engSize); - if (engType == HWLM_ENGINE_FDR && cc.grey.hamsterAccelForward) { - buildForwardAccel(h.get(), lits, expected_groups); - } - return h; } diff --git a/src/rose/rose_build_lit_accel.cpp b/src/rose/rose_build_lit_accel.cpp new file mode 100644 index 00000000..1388069d --- /dev/null +++ b/src/rose/rose_build_lit_accel.cpp @@ -0,0 +1,471 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rose_build_lit_accel.h" + +#include "grey.h" +#include "ue2common.h" +#include "hwlm/hwlm_build.h" +#include "hwlm/hwlm_internal.h" +#include "hwlm/hwlm_literal.h" +#include "nfa/accel.h" +#include "nfa/shufticompile.h" +#include "nfa/trufflecompile.h" +#include "util/compare.h" +#include "util/dump_charclass.h" +#include "util/verify_types.h" + +using namespace std; + +namespace ue2 { + +static const unsigned int MAX_ACCEL_OFFSET = 16; +static const unsigned int MAX_SHUFTI_WIDTH = 240; + +static +size_t mask_overhang(const AccelString &lit) { + size_t msk_true_size = lit.msk.size(); + assert(msk_true_size <= HWLM_MASKLEN); + assert(HWLM_MASKLEN <= MAX_ACCEL_OFFSET); + for (u8 c : lit.msk) { + if (!c) { + msk_true_size--; + } else { + break; + } + } + + if (lit.s.length() >= msk_true_size) { + return 0; + } + + /* only short literals should be able to have a mask which overhangs */ + assert(lit.s.length() < MAX_ACCEL_OFFSET); + return msk_true_size - lit.s.length(); +} + +static +bool findDVerm(const vector &lits, AccelAux *aux) { + const AccelString &first = *lits.front(); + + struct candidate { + candidate(void) + : c1(0), c2(0), max_offset(0), b5insens(false), valid(false) {} + candidate(const AccelString &base, u32 offset) + : c1(base.s[offset]), c2(base.s[offset + 1]), max_offset(0), + b5insens(false), valid(true) {} + char c1; + char c2; + u32 max_offset; + bool b5insens; + bool valid; + + bool operator>(const candidate &other) const { + if (!valid) { + return false; + } + + if (!other.valid) { + return true; + } + + if (other.cdiffers() && !cdiffers()) { + return false; + } + + if (!other.cdiffers() && cdiffers()) { + return true; + } + + if (!other.b5insens && b5insens) { + return false; + } + + if (other.b5insens && !b5insens) { + return true; + } + + if (max_offset > other.max_offset) { + return false; + } + + return true; + } + + bool cdiffers(void) const { + if (!b5insens) { + return c1 != c2; + } + return (c1 & CASE_CLEAR) != (c2 & CASE_CLEAR); + } + }; + + candidate best; + + for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()) - 1; i++) { + candidate curr(first, i); + + /* check to see if this pair appears in each string */ + for (const auto &lit_ptr : lits) { + const AccelString &lit = *lit_ptr; + if (lit.nocase && (ourisalpha(curr.c1) || ourisalpha(curr.c2))) { + curr.b5insens = true; /* no choice but to be case insensitive */ + } + + bool found = false; + bool found_nc = false; + for (u32 j = 0; + !found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; j++) { + found |= curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1]; + found_nc |= (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR) + && (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR); + + if (curr.b5insens) { + found = found_nc; + } + } + + if (!curr.b5insens && !found && found_nc) { + curr.b5insens = true; + found = true; + } + + if (!found) { + goto next_candidate; + } + } + + /* check to find the max offset where this appears */ + for (const auto &lit_ptr : lits) { + const AccelString &lit = *lit_ptr; + for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; + j++) { + bool found = false; + if (curr.b5insens) { + found = (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR) + && (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR); + } else { + found = curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1]; + } + + if (found) { + assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET); + ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit)); + break; + } + } + } + + if (curr > best) { + best = curr; + } + + next_candidate:; + } + + if (!best.valid) { + return false; + } + + aux->dverm.offset = verify_u8(best.max_offset); + + if (!best.b5insens) { + aux->dverm.accel_type = ACCEL_DVERM; + aux->dverm.c1 = best.c1; + aux->dverm.c2 = best.c2; + DEBUG_PRINTF("built dverm for %02hhx%02hhx\n", + aux->dverm.c1, aux->dverm.c2); + } else { + aux->dverm.accel_type = ACCEL_DVERM_NOCASE; + aux->dverm.c1 = best.c1 & CASE_CLEAR; + aux->dverm.c2 = best.c2 & CASE_CLEAR; + DEBUG_PRINTF("built dverm nc for %02hhx%02hhx\n", + aux->dverm.c1, aux->dverm.c2); + } + return true; +} + +static +bool findSVerm(const vector &lits, AccelAux *aux) { + const AccelString &first = *lits.front(); + + struct candidate { + candidate(void) + : c(0), max_offset(0), b5insens(false), valid(false) {} + candidate(const AccelString &base, u32 offset) + : c(base.s[offset]), max_offset(0), + b5insens(false), valid(true) {} + char c; + u32 max_offset; + bool b5insens; + bool valid; + + bool operator>(const candidate &other) const { + if (!valid) { + return false; + } + + if (!other.valid) { + return true; + } + + if (!other.b5insens && b5insens) { + return false; + } + + if (other.b5insens && !b5insens) { + return true; + } + + if (max_offset > other.max_offset) { + return false; + } + + return true; + } + }; + + candidate best; + + for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()); i++) { + candidate curr(first, i); + + /* check to see if this pair appears in each string */ + for (const auto &lit_ptr : lits) { + const AccelString &lit = *lit_ptr; + if (lit.nocase && ourisalpha(curr.c)) { + curr.b5insens = true; /* no choice but to be case insensitive */ + } + + bool found = false; + bool found_nc = false; + for (u32 j = 0; + !found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) { + found |= curr.c == lit.s[j]; + found_nc |= (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR); + + if (curr.b5insens) { + found = found_nc; + } + } + + if (!curr.b5insens && !found && found_nc) { + curr.b5insens = true; + found = true; + } + + if (!found) { + goto next_candidate; + } + } + + /* check to find the max offset where this appears */ + for (const auto &lit_ptr : lits) { + const AccelString &lit = *lit_ptr; + for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) { + bool found = false; + if (curr.b5insens) { + found = (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR); + } else { + found = curr.c == lit.s[j]; + } + + if (found) { + assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET); + ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit)); + } + } + } + + if (curr > best) { + best = curr; + } + + next_candidate:; + } + + if (!best.valid) { + return false; + } + + if (!best.b5insens) { + aux->verm.accel_type = ACCEL_VERM; + aux->verm.c = best.c; + DEBUG_PRINTF("built verm for %02hhx\n", aux->verm.c); + } else { + aux->verm.accel_type = ACCEL_VERM_NOCASE; + aux->verm.c = best.c & CASE_CLEAR; + DEBUG_PRINTF("built verm nc for %02hhx\n", aux->verm.c); + } + aux->verm.offset = verify_u8(best.max_offset); + + return true; +} + +static +void filterLits(const vector &lits, hwlm_group_t expected_groups, + vector *filtered_lits, u32 *min_len) { + *min_len = MAX_ACCEL_OFFSET; + + for (const auto &lit : lits) { + if (!(lit.groups & expected_groups)) { + continue; + } + + const size_t lit_len = lit.s.length(); + if (lit_len < *min_len) { + *min_len = verify_u32(lit_len); + } + + filtered_lits->push_back(&lit); + +#ifdef DEBUG + DEBUG_PRINTF("lit:"); + for (u32 i = 0; i < lit.s.length(); i++) { + printf("%02hhx", lit.s[i]); + } + printf("\n"); +#endif + } +} + +static +bool litGuardedByCharReach(const CharReach &cr, const AccelString &lit, + u32 max_offset) { + for (u32 i = 0; i <= max_offset && i < lit.s.length(); i++) { + unsigned char c = lit.s[i]; + if (lit.nocase) { + if (cr.test(mytoupper(c)) && cr.test(mytolower(c))) { + return true; + } + } else { + if (cr.test(c)) { + return true; + } + } + } + + return false; +} + +static +void findForwardAccelScheme(const vector &lits, + hwlm_group_t expected_groups, AccelAux *aux) { + DEBUG_PRINTF("building accel expected=%016llx\n", expected_groups); + u32 min_len = MAX_ACCEL_OFFSET; + vector filtered_lits; + + filterLits(lits, expected_groups, &filtered_lits, &min_len); + if (filtered_lits.empty()) { + return; + } + + if (findDVerm(filtered_lits, aux) + || findSVerm(filtered_lits, aux)) { + return; + } + + /* look for shufti/truffle */ + + vector reach(MAX_ACCEL_OFFSET, CharReach()); + for (const auto &lit : lits) { + if (!(lit.groups & expected_groups)) { + continue; + } + + u32 overhang = mask_overhang(lit); + for (u32 i = 0; i < overhang; i++) { + /* this offset overhangs the start of the real literal; look at the + * msk/cmp */ + for (u32 j = 0; j < N_CHARS; j++) { + if ((j & lit.msk[i]) == lit.cmp[i]) { + reach[i].set(j); + } + } + } + for (u32 i = overhang; i < MAX_ACCEL_OFFSET; i++) { + CharReach &reach_i = reach[i]; + u32 i_effective = i - overhang; + + if (litGuardedByCharReach(reach_i, lit, i_effective)) { + continue; + } + unsigned char c = i_effective < lit.s.length() ? lit.s[i_effective] + : lit.s.back(); + if (lit.nocase) { + reach_i.set(mytoupper(c)); + reach_i.set(mytolower(c)); + } else { + reach_i.set(c); + } + } + } + + u32 min_count = ~0U; + u32 min_offset = ~0U; + for (u32 i = 0; i < MAX_ACCEL_OFFSET; i++) { + size_t count = reach[i].count(); + DEBUG_PRINTF("offset %u is %s (reach %zu)\n", i, + describeClass(reach[i]).c_str(), count); + if (count < min_count) { + min_count = (u32)count; + min_offset = i; + } + } + + if (min_count > MAX_SHUFTI_WIDTH) { + DEBUG_PRINTF("FAIL: min shufti with %u chars is too wide\n", min_count); + return; + } + + const CharReach &cr = reach[min_offset]; + if (-1 != + shuftiBuildMasks(cr, (u8 *)&aux->shufti.lo, (u8 *)&aux->shufti.hi)) { + DEBUG_PRINTF("built shufti for %s (%zu chars, offset %u)\n", + describeClass(cr).c_str(), cr.count(), min_offset); + aux->shufti.accel_type = ACCEL_SHUFTI; + aux->shufti.offset = verify_u8(min_offset); + return; + } + + truffleBuildMasks(cr, (u8 *)&aux->truffle.mask1, (u8 *)&aux->truffle.mask2); + DEBUG_PRINTF("built truffle for %s (%zu chars, offset %u)\n", + describeClass(cr).c_str(), cr.count(), min_offset); + aux->truffle.accel_type = ACCEL_TRUFFLE; + aux->truffle.offset = verify_u8(min_offset); +} + +void buildForwardAccel(HWLM *h, const vector &lits, + hwlm_group_t expected_groups) { + findForwardAccelScheme(lits, expected_groups, &h->accel1); + findForwardAccelScheme(lits, HWLM_ALL_GROUPS, &h->accel0); + + h->accel1_groups = expected_groups; +} + +} // namespace ue2 diff --git a/src/rose/rose_build_lit_accel.h b/src/rose/rose_build_lit_accel.h new file mode 100644 index 00000000..f0c01434 --- /dev/null +++ b/src/rose/rose_build_lit_accel.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ROSE_BUILD_LIT_ACCEL_H +#define ROSE_BUILD_LIT_ACCEL_H + +#include "hwlm/hwlm.h" + +#include +#include +#include +#include + +struct HWLM; + +namespace ue2 { + +struct AccelString { + AccelString(std::string s_in, bool nocase_in, std::vector msk_in, + std::vector cmp_in, hwlm_group_t groups_in) + : s(std::move(s_in)), nocase(nocase_in), msk(std::move(msk_in)), + cmp(std::move(cmp_in)), groups(groups_in) {} + + std::string s; + bool nocase; + std::vector msk; + std::vector cmp; + hwlm_group_t groups; + + bool operator==(const AccelString &a) const { + return s == a.s && nocase == a.nocase && msk == a.msk && cmp == a.cmp && + groups == a.groups; + } + + bool operator<(const AccelString &a) const { + return std::tie(s, nocase, msk, cmp, groups) < + std::tie(a.s, a.nocase, a.msk, a.cmp, a.groups); + } +}; + +void buildForwardAccel(HWLM *h, const std::vector &lits, + hwlm_group_t expected_groups); + +} // namespace ue2 + +#endif // ROSE_BUILD_LIT_ACCEL_H diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index c51905ca..8d6f68df 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -34,6 +34,7 @@ #include "rose_build_matchers.h" #include "rose_build_impl.h" +#include "rose_build_lit_accel.h" #include "rose_build_width.h" #include "hwlm/hwlm_build.h" #include "hwlm/hwlm_literal.h" @@ -801,6 +802,8 @@ buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, throw CompileError("Unable to generate bytecode."); } + buildForwardAccel(hwlm.get(), mp.lits, build.getInitialGroups()); + if (build.cc.streaming) { DEBUG_PRINTF("history_required=%zu\n", mp.history_required); assert(mp.history_required <= build.cc.grey.maxHistoryAvailable); @@ -866,6 +869,8 @@ buildSmallBlockMatcher(const RoseBuildImpl &build, throw CompileError("Unable to generate bytecode."); } + buildForwardAccel(hwlm.get(), mp.lits, build.getInitialGroups()); + *sbsize = hwlmSize(hwlm.get()); assert(*sbsize); DEBUG_PRINTF("built small block literal table size %zu bytes\n", *sbsize); @@ -893,6 +898,8 @@ buildEodAnchoredMatcher(const RoseBuildImpl &build, throw CompileError("Unable to generate bytecode."); } + buildForwardAccel(hwlm.get(), mp.lits, build.getInitialGroups()); + *esize = hwlmSize(hwlm.get()); assert(*esize); DEBUG_PRINTF("built eod-anchored literal table size %zu bytes\n", *esize); From 68a35ff3b8a35c5c1552823c9ef11b4b9474d747 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 22 Dec 2016 16:33:14 +1100 Subject: [PATCH 014/326] rose: give longer literals to accel analysis --- src/rose/rose_build_matchers.cpp | 52 +++++++++++++++++++++++++++----- src/rose/rose_build_matchers.h | 14 +++++++-- src/util/container.h | 10 +++++- 3 files changed, 66 insertions(+), 10 deletions(-) diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index 8d6f68df..2e7305f7 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -59,6 +59,8 @@ using boost::adaptors::map_values; namespace ue2 { +static const size_t MAX_ACCEL_STRING_LEN = 16; + #ifdef DEBUG static UNUSED string dumpMask(const vector &v) { @@ -652,6 +654,16 @@ map makeFragGroupMap(const RoseBuildImpl &build, return frag_to_group; } +template +void trim_to_suffix(Container &c, size_t len) { + if (c.size() <= len) { + return; + } + + size_t suffix_len = c.size() - len; + c.erase(c.begin(), c.begin() + suffix_len); +} + MatcherProto makeMatcherProto(const RoseBuildImpl &build, const map &final_to_frag_map, rose_literal_table table, size_t max_len, @@ -726,6 +738,7 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, continue; } + mp.accel_lits.emplace_back(s, nocase, msk, cmp, groups); mp.history_required = max(mp.history_required, lit_hist_len); mp.lits.emplace_back(move(s), nocase, noruns, final_id, groups, msk, cmp); @@ -756,6 +769,8 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, continue; } + mp.accel_lits.emplace_back(lit.get_string(), lit.any_nocase(), msk, + cmp, groups); mp.history_required = max(mp.history_required, lit_hist_len); mp.lits.emplace_back(move(s), nocase, noruns, final_id, groups, msk, cmp); @@ -772,12 +787,30 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, lit.groups = frag_group_map.at(lit.id); } - sort(begin(mp.lits), end(mp.lits)); - mp.lits.erase(unique(begin(mp.lits), end(mp.lits)), end(mp.lits)); + sort_and_unique(mp.lits); + + // Literals used for acceleration must be limited to max_len, as that's all + // we can see in history. + for_each(begin(mp.accel_lits), end(mp.accel_lits), + [&max_len](AccelString &a) { + trim_to_suffix(a.s, max_len); + trim_to_suffix(a.msk, max_len); + trim_to_suffix(a.cmp, max_len); + }); + + sort_and_unique(mp.accel_lits); return mp; } +void MatcherProto::insert(const MatcherProto &a) { + ::ue2::insert(&lits, lits.end(), a.lits); + ::ue2::insert(&accel_lits, accel_lits.end(), a.accel_lits); + sort_and_unique(lits); + sort_and_unique(accel_lits); + history_required = max(history_required, a.history_required); +} + aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, const map &final_to_frag_map, @@ -802,7 +835,9 @@ buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, throw CompileError("Unable to generate bytecode."); } - buildForwardAccel(hwlm.get(), mp.lits, build.getInitialGroups()); + if (build.cc.grey.hamsterAccelForward) { + buildForwardAccel(hwlm.get(), mp.accel_lits, build.getInitialGroups()); + } if (build.cc.streaming) { DEBUG_PRINTF("history_required=%zu\n", mp.history_required); @@ -851,8 +886,7 @@ buildSmallBlockMatcher(const RoseBuildImpl &build, return nullptr; } - mp.lits.insert(mp.lits.end(), mp_anchored.lits.begin(), - mp_anchored.lits.end()); + mp.insert(mp_anchored); // None of our literals should be longer than the small block limit. assert(all_of(begin(mp.lits), end(mp.lits), [](const hwlmLiteral &lit) { @@ -869,7 +903,9 @@ buildSmallBlockMatcher(const RoseBuildImpl &build, throw CompileError("Unable to generate bytecode."); } - buildForwardAccel(hwlm.get(), mp.lits, build.getInitialGroups()); + if (build.cc.grey.hamsterAccelForward) { + buildForwardAccel(hwlm.get(), mp.accel_lits, build.getInitialGroups()); + } *sbsize = hwlmSize(hwlm.get()); assert(*sbsize); @@ -898,7 +934,9 @@ buildEodAnchoredMatcher(const RoseBuildImpl &build, throw CompileError("Unable to generate bytecode."); } - buildForwardAccel(hwlm.get(), mp.lits, build.getInitialGroups()); + if (build.cc.grey.hamsterAccelForward) { + buildForwardAccel(hwlm.get(), mp.accel_lits, build.getInitialGroups()); + } *esize = hwlmSize(hwlm.get()); assert(*esize); diff --git a/src/rose/rose_build_matchers.h b/src/rose/rose_build_matchers.h index 742e8a14..a1817307 100644 --- a/src/rose/rose_build_matchers.h +++ b/src/rose/rose_build_matchers.h @@ -35,6 +35,8 @@ #define ROSE_BUILD_MATCHERS_H #include "rose_build_impl.h" +#include "rose_build_lit_accel.h" +#include "hwlm/hwlm_literal.h" #include #include @@ -43,11 +45,19 @@ struct HWLM; namespace ue2 { -struct hwlmLiteral; - +/** \brief Prototype for literal matcher construction. */ struct MatcherProto { + /** \brief Literal fragments used to construct the literal matcher. */ std::vector lits; + + /** \brief Longer literals used for acceleration analysis. */ + std::vector accel_lits; + + /** \brief The history required by the literal matcher. */ size_t history_required = 0; + + /** \brief Insert the contents of another MatcherProto. */ + void insert(const MatcherProto &a); }; /** diff --git a/src/util/container.h b/src/util/container.h index e2cfb485..24f01fd2 100644 --- a/src/util/container.h +++ b/src/util/container.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -89,6 +89,14 @@ auto make_vector_from(const std::pair &range) return std::vector(range.first, range.second); } +/** \brief Sort a sequence container and remove duplicates. */ +template +void sort_and_unique(C &container) { + std::sort(std::begin(container), std::end(container)); + container.erase(std::unique(std::begin(container), std::end(container)), + std::end(container)); +} + /** \brief Returns a set containing the keys in the given associative * container. */ template From 1584da90254bd7601a86a258779f08ba4a4ac905 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 10 Jan 2017 10:07:12 +1100 Subject: [PATCH 015/326] rose: don't accel noodle engines --- src/rose/rose_build_matchers.cpp | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index 2e7305f7..dfbde375 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -37,6 +37,7 @@ #include "rose_build_lit_accel.h" #include "rose_build_width.h" #include "hwlm/hwlm_build.h" +#include "hwlm/hwlm_internal.h" #include "hwlm/hwlm_literal.h" #include "nfa/castlecompile.h" #include "nfa/nfa_api_queue.h" @@ -811,6 +812,20 @@ void MatcherProto::insert(const MatcherProto &a) { history_required = max(history_required, a.history_required); } +static +void buildAccel(const RoseBuildImpl &build, const MatcherProto &mp, + HWLM &hwlm) { + if (!build.cc.grey.hamsterAccelForward) { + return; + } + + if (hwlm.type == HWLM_ENGINE_NOOD) { + return; + } + + buildForwardAccel(&hwlm, mp.accel_lits, build.getInitialGroups()); +} + aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, const map &final_to_frag_map, @@ -835,9 +850,7 @@ buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, throw CompileError("Unable to generate bytecode."); } - if (build.cc.grey.hamsterAccelForward) { - buildForwardAccel(hwlm.get(), mp.accel_lits, build.getInitialGroups()); - } + buildAccel(build, mp, *hwlm); if (build.cc.streaming) { DEBUG_PRINTF("history_required=%zu\n", mp.history_required); @@ -903,9 +916,7 @@ buildSmallBlockMatcher(const RoseBuildImpl &build, throw CompileError("Unable to generate bytecode."); } - if (build.cc.grey.hamsterAccelForward) { - buildForwardAccel(hwlm.get(), mp.accel_lits, build.getInitialGroups()); - } + buildAccel(build, mp, *hwlm); *sbsize = hwlmSize(hwlm.get()); assert(*sbsize); @@ -934,9 +945,7 @@ buildEodAnchoredMatcher(const RoseBuildImpl &build, throw CompileError("Unable to generate bytecode."); } - if (build.cc.grey.hamsterAccelForward) { - buildForwardAccel(hwlm.get(), mp.accel_lits, build.getInitialGroups()); - } + buildAccel(build, mp, *hwlm); *esize = hwlmSize(hwlm.get()); assert(*esize); From 12edb07f6ee9dc9c3df32d5dff1e47057166dcab Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 23 Jan 2017 15:10:55 +1100 Subject: [PATCH 016/326] fdr_compile: remove dead var --- src/fdr/fdr_compile.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index c9d6cbcb..85342f9a 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -593,7 +593,6 @@ aligned_unique_ptr fdrBuildTableHinted(const vector &lits, bool make_small, u32 hint, const target_t &target, const Grey &grey) { - pair link(nullptr, 0); return fdrBuildTableInternal(lits, make_small, target, grey, hint); } From a55bbe657ca106987ba5a391609548f6992ba982 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 23 Jan 2017 16:20:09 +1100 Subject: [PATCH 017/326] fdr: remove clamp on confirm size --- src/fdr/fdr_confirm_compile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index 4aa2df0d..30f682d1 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -166,7 +166,7 @@ getFDRConfirm(const vector &lits, bool applyOneCharOpt, if (make_small) { nBits = min(10U, lg2(lits.size()) + 1); } else { - nBits = min(13U, lg2(lits.size()) + 4); + nBits = lg2(lits.size() + 4); } CONF_TYPE mult = (CONF_TYPE)0x0b4e0ef37bc32127ULL; From 988ea6b4e14e12d99e2e981f10fc7978f285db7a Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 18 Jan 2017 10:06:10 +1100 Subject: [PATCH 018/326] rose: role aliasing improvements These changes improve the performance of the role aliasing passes on very large cases and fix a couple of small errors in the left and right merge passes as well. --- src/rose/rose_build_role_aliasing.cpp | 453 ++++++++++++-------------- src/util/ue2_containers.h | 15 +- 2 files changed, 225 insertions(+), 243 deletions(-) diff --git a/src/rose/rose_build_role_aliasing.cpp b/src/rose/rose_build_role_aliasing.cpp index c6139097..60142156 100644 --- a/src/rose/rose_build_role_aliasing.cpp +++ b/src/rose/rose_build_role_aliasing.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -47,6 +47,7 @@ #include "util/container.h" #include "util/graph.h" #include "util/graph_range.h" +#include "util/hash.h" #include "util/order_check.h" #include "util/ue2_containers.h" @@ -111,11 +112,14 @@ struct AliasInEdge : EdgeAndVertex { class CandidateSet { public: - typedef set::iterator iterator; - typedef RoseVertex key_type; + using key_type = RoseVertex; + using iterator = set::iterator; + using const_iterator = set::const_iterator; iterator begin() { return main_cont.begin(); } iterator end() { return main_cont.end(); } + const_iterator begin() const { return main_cont.begin(); } + const_iterator end() const { return main_cont.end(); } bool contains(RoseVertex a) const { return hash_cont.find(a) != hash_cont.end(); @@ -451,37 +455,6 @@ bool sameRightRoleProperties(const RoseBuildImpl &build, RoseVertex a, return true; } -/** - * Hash on some deterministic props checked in sameRoleProperties + properties - * required for right equivalence. - */ -static -size_t hashRightRoleProperties(RoseVertex v, const RoseGraph &g) { - using boost::hash_combine; - using boost::hash_range; - - const RoseVertexProps &props = g[v]; - - size_t val = 0; - hash_combine(val, hash_range(begin(props.reports), end(props.reports))); - - if (props.suffix) { - const auto &suffix = props.suffix; - if (suffix.castle) { - hash_combine(val, suffix.castle->reach()); - hash_combine(val, suffix.castle->repeats.size()); - } - if (suffix.graph) { - hash_combine(val, num_vertices(*suffix.graph)); - } - if (suffix.haig) { - hash_combine(val, hash_dfa(*suffix.haig)); - } - } - - return val; -} - static void mergeEdgeAdd(RoseVertex u, RoseVertex v, const RoseEdge &from_edge, const RoseEdge *to_edge, RoseGraph &g) { @@ -684,16 +657,6 @@ void findCandidates(const RoseBuildImpl &build, CandidateSet *candidates) { num_vertices(build.g)); } -static -RoseVertex pickSucc(const RoseVertex v, const RoseGraph &g) { - RoseGraph::adjacency_iterator ai, ae; - tie(ai, ae) = adjacent_vertices(v, g); - if (ai == ae) { - return RoseGraph::null_vertex(); - } - return *ai; -} - static RoseVertex pickPred(const RoseVertex v, const RoseGraph &g, const RoseBuildImpl &build) { @@ -854,7 +817,7 @@ void pruneUnusedTops(NGHolder &h, const RoseGraph &g, return; } assert(isCorrectlyTopped(h)); - DEBUG_PRINTF("prunning unused tops\n"); + DEBUG_PRINTF("pruning unused tops\n"); ue2::flat_set used_tops; for (auto v : verts) { assert(g[v].left.graph.get() == &h); @@ -1427,62 +1390,95 @@ bool attemptRoseMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, return false; } +/** + * \brief Buckets that only contain one vertex are never going to lead to a + * merge. + */ static -void splitByReportSuffixBehaviour(const RoseGraph &g, - vector> &buckets, - ue2::unordered_map &inv) { - /* vertices with different report/suffixes can never be considered for right - * merge. */ - vector> out; - for (const vector &b : buckets) { - assert(!b.empty()); - map, RoseSuffixInfo>, size_t> dest_map; - for (RoseVertex v : b) { - auto key = decltype(dest_map)::key_type(g[v].reports, g[v].suffix); - size_t out_bucket; - if (contains(dest_map, key)) { - out_bucket = dest_map[key]; - } else { - out_bucket = out.size(); - out.push_back(vector()); - dest_map[key] = out_bucket; - } - out[out_bucket].push_back(v); - inv[v] = out_bucket; - } +void removeSingletonBuckets(vector> &buckets) { + auto it = remove_if( + begin(buckets), end(buckets), + [](const vector &bucket) { return bucket.size() < 2; }); + if (it != end(buckets)) { + DEBUG_PRINTF("deleting %zu singleton buckets\n", + distance(it, end(buckets))); + buckets.erase(it, end(buckets)); + } +} +static +void buildInvBucketMap(const vector> &buckets, + ue2::unordered_map &inv) { + inv.clear(); + for (size_t i = 0; i < buckets.size(); i++) { + for (auto v : buckets[i]) { + assert(!contains(inv, v)); + inv.emplace(v, i); + } + } +} + +/** + * \brief Generic splitter that will use the given split function to partition + * the vector of buckets, then remove buckets with <= 1 entry. + */ +template +void splitAndFilterBuckets(vector> &buckets, + const SplitFunction &make_split_key) { + if (buckets.empty()) { + return; } - buckets.swap(out); + vector> out; + + // Mapping from split key value to new bucket index. + using key_type = decltype(make_split_key(RoseGraph::null_vertex())); + unordered_map dest_map; + dest_map.reserve(buckets.front().size()); + + for (const auto &bucket : buckets) { + assert(!bucket.empty()); + dest_map.clear(); + for (RoseVertex v : bucket) { + auto p = dest_map.emplace(make_split_key(v), out.size()); + if (p.second) { // New key, add a bucket. + out.emplace_back(); + } + auto out_bucket = p.first->second; + out[out_bucket].push_back(v); + } + } + + if (out.size() == buckets.size()) { + return; // No new buckets created. + } + + buckets = move(out); + removeSingletonBuckets(buckets); +} + +static +void splitByReportSuffixBehaviour(const RoseGraph &g, + vector> &buckets) { + // Split by report set and suffix info. + auto make_split_key = [&g](RoseVertex v) { + return hash_all(g[v].reports, g[v].suffix); + }; + splitAndFilterBuckets(buckets, make_split_key); } static void splitByLiteralTable(const RoseBuildImpl &build, - vector> &buckets, - ue2::unordered_map &inv) { + vector> &buckets) { const RoseGraph &g = build.g; - vector> out; - - for (const auto &bucket : buckets) { - assert(!bucket.empty()); - map dest_map; - for (RoseVertex v : bucket) { - auto table = build.literals.right.at(*g[v].literals.begin()).table; - size_t out_bucket; - if (contains(dest_map, table)) { - out_bucket = dest_map[table]; - } else { - out_bucket = out.size(); - out.push_back(vector()); - dest_map[table] = out_bucket; - } - out[out_bucket].push_back(v); - inv[v] = out_bucket; - } - } - - buckets.swap(out); + // Split by literal table. + auto make_split_key = [&](RoseVertex v) { + const auto &lits = g[v].literals; + assert(!lits.empty()); + return build.literals.right.at(*lits.begin()).table; + }; + splitAndFilterBuckets(buckets, make_split_key); } static @@ -1543,6 +1539,9 @@ void splitByNeighbour(const RoseGraph &g, vector> &buckets, } insert(&buckets, buckets.end(), extras); } + + removeSingletonBuckets(buckets); + buildInvBucketMap(buckets, inv); } static @@ -1551,16 +1550,35 @@ splitDiamondMergeBuckets(CandidateSet &candidates, const RoseBuildImpl &build) { const RoseGraph &g = build.g; vector> buckets(1); - ue2::unordered_map inv; - for (RoseVertex v : candidates) { - buckets[0].push_back(v); - inv[v] = 0; + buckets[0].reserve(candidates.size()); + insert(&buckets[0], buckets[0].end(), candidates); + + DEBUG_PRINTF("at start, %zu candidates in 1 bucket\n", candidates.size()); + + splitByReportSuffixBehaviour(g, buckets); + DEBUG_PRINTF("split by report/suffix, %zu buckets\n", buckets.size()); + if (buckets.empty()) { + return buckets; } - splitByReportSuffixBehaviour(g, buckets, inv); - splitByLiteralTable(build, buckets, inv); + splitByLiteralTable(build, buckets); + DEBUG_PRINTF("split by lit table, %zu buckets\n", buckets.size()); + if (buckets.empty()) { + return buckets; + } + + // Neighbour splits require inverse map. + ue2::unordered_map inv; + buildInvBucketMap(buckets, inv); + splitByNeighbour(g, buckets, inv, true); + DEBUG_PRINTF("split by successor, %zu buckets\n", buckets.size()); + if (buckets.empty()) { + return buckets; + } + splitByNeighbour(g, buckets, inv, false); + DEBUG_PRINTF("split by predecessor, %zu buckets\n", buckets.size()); return buckets; } @@ -1677,55 +1695,62 @@ vector::iterator findLeftMergeSibling( return end; } +static +void getLeftMergeSiblings(const RoseBuildImpl &build, RoseVertex a, + vector &siblings) { + // We have to find a sibling to merge `a' with, and we select between + // two approaches to minimize the number of vertices we have to + // examine; which we use depends on the shape of the graph. + + const RoseGraph &g = build.g; + assert(!g[a].literals.empty()); + u32 lit_id = *g[a].literals.begin(); + const auto &verts = build.literal_info.at(lit_id).vertices; + RoseVertex pred = pickPred(a, g, build); + + siblings.clear(); + + if (pred == RoseGraph::null_vertex() || build.isAnyStart(pred) || + out_degree(pred, g) > verts.size()) { + // Select sibling from amongst the vertices that share a literal. + insert(&siblings, siblings.end(), verts); + } else { + // Select sibling from amongst the vertices that share a + // predecessor. + insert(&siblings, siblings.end(), adjacent_vertices(pred, g)); + } +} + static never_inline void leftMergePass(CandidateSet &candidates, RoseBuildImpl &build, vector *dead, RoseAliasingInfo &rai) { DEBUG_PRINTF("begin (%zu)\n", candidates.size()); - RoseGraph &g = build.g; vector siblings; - CandidateSet::iterator it = candidates.begin(); + auto it = candidates.begin(); while (it != candidates.end()) { RoseVertex a = *it; CandidateSet::iterator ait = it; ++it; - // We have to find a sibling to merge `a' with, and we select between - // two approaches to minimize the number of vertices we have to - // examine; which we use depends on the shape of the graph. + getLeftMergeSiblings(build, a, siblings); - assert(!g[a].literals.empty()); - u32 lit_id = *g[a].literals.begin(); - const auto &verts = build.literal_info.at(lit_id).vertices; - RoseVertex pred = pickPred(a, g, build); - - siblings.clear(); - if (pred == RoseGraph::null_vertex() || build.isAnyStart(pred) - || out_degree(pred, g) > verts.size()) { - // Select sibling from amongst the vertices that share a literal. - siblings.insert(siblings.end(), verts.begin(), verts.end()); - } else { - // Select sibling from amongst the vertices that share a - // predecessor. - insert(&siblings, siblings.end(), adjacent_vertices(pred, g)); + auto jt = siblings.begin(); + while (jt != siblings.end()) { + jt = findLeftMergeSibling(jt, siblings.end(), a, build, rai, + candidates); + if (jt == siblings.end()) { + break; + } + RoseVertex b = *jt; + if (attemptRoseMerge(build, true, a, b, false, rai)) { + mergeVerticesLeft(a, b, build, rai); + dead->push_back(a); + candidates.erase(ait); + break; // consider next a + } + ++jt; } - - auto jt = findLeftMergeSibling(siblings.begin(), siblings.end(), a, - build, rai, candidates); - if (jt == siblings.end()) { - continue; - } - - RoseVertex b = *jt; - - if (!attemptRoseMerge(build, true, a, b, 0, rai)) { - DEBUG_PRINTF("rose fail\n"); - continue; - } - - mergeVerticesLeft(a, b, build, rai); - dead->push_back(a); - candidates.erase(ait); } DEBUG_PRINTF("%zu candidates remaining\n", candidates.size()); @@ -1810,91 +1835,49 @@ vector::const_iterator findRightMergeSibling( return end; } -template static -void split(map &keys, size_t *next_key, Iter it, - const Iter end) { - map new_keys; +void splitByRightProps(const RoseGraph &g, + vector> &buckets) { + // Successor vector used in make_split_key. We declare it here so we can + // reuse storage. + vector succ; - for (; it != end; ++it) { - RoseVertex v = *it; - size_t ok = keys[v]; - size_t nk; - if (contains(new_keys, ok)) { - nk = new_keys[ok]; - } else { - nk = (*next_key)++; - new_keys[ok] = nk; - } - keys[v] = nk; - } + // Split by {successors, literals, reports}. + auto make_split_key = [&](RoseVertex v) { + succ.clear(); + insert(&succ, succ.end(), adjacent_vertices(v, g)); + sort(succ.begin(), succ.end()); + return hash_all(g[v].literals, g[v].reports, succ); + }; + splitAndFilterBuckets(buckets, make_split_key); } static never_inline -void buildCandidateRightSiblings(CandidateSet &candidates, RoseBuildImpl &build, - map> &sibling_cache, - map &keys_ext) { - RoseGraph &g = build.g; +vector> +splitRightMergeBuckets(const CandidateSet &candidates, + const RoseBuildImpl &build) { + const RoseGraph &g = build.g; - size_t next_key = 1; - map keys; + vector> buckets(1); + buckets[0].reserve(candidates.size()); + insert(&buckets[0], buckets[0].end(), candidates); - for (const auto &c : candidates) { - keys[c] = 0; + DEBUG_PRINTF("at start, %zu candidates in 1 bucket\n", candidates.size()); + + splitByReportSuffixBehaviour(g, buckets); + DEBUG_PRINTF("split by report/suffix, %zu buckets\n", buckets.size()); + if (buckets.empty()) { + return buckets; } - set done_succ; - set done_lit; - - for (auto a : candidates) { - assert(!g[a].literals.empty()); - u32 lit_id = *g[a].literals.begin(); - RoseVertex succ = pickSucc(a, g); - const auto &verts = build.literal_info.at(lit_id).vertices; - if (succ != RoseGraph::null_vertex() - && in_degree(succ, g) < verts.size()) { - if (!done_succ.insert(succ).second) { - continue; // succ already in done_succ. - } - RoseGraph::inv_adjacency_iterator ai, ae; - tie (ai, ae) = inv_adjacent_vertices(succ, g); - split(keys, &next_key, ai, ae); - } else { - if (!done_lit.insert(lit_id).second) { - continue; // lit_id already in done_lit. - } - split(keys, &next_key, verts.begin(), verts.end()); - } + splitByRightProps(g, buckets); + DEBUG_PRINTF("split by right-merge properties, %zu buckets\n", + buckets.size()); + if (buckets.empty()) { + return buckets; } - map> int_to_ext; - - for (const auto &key : keys) { - RoseVertex v = key.first; - u32 ext; - size_t rph = hashRightRoleProperties(v, g); - if (contains(int_to_ext[key.second], rph)) { - ext = int_to_ext[key.second][rph]; - } else { - ext = keys_ext.size(); - int_to_ext[key.second][rph] = ext; - } - - keys_ext[v] = ext; - sibling_cache[ext].push_back(v); - } - - for (auto &siblings : sibling_cache | map_values) { - sort(siblings.begin(), siblings.end()); - } -} - -static -const vector &getCandidateRightSiblings( - const map> &sibling_cache, - map &keys, RoseVertex a) { - size_t key = keys.at(a); - return sibling_cache.at(key); + return buckets; } static never_inline @@ -1903,45 +1886,31 @@ void rightMergePass(CandidateSet &candidates, RoseBuildImpl &build, RoseAliasingInfo &rai) { DEBUG_PRINTF("begin\n"); - map> sibling_cache; - map keys; + if (candidates.empty()) { + return; + } - buildCandidateRightSiblings(candidates, build, sibling_cache, keys); + auto buckets = splitRightMergeBuckets(candidates, build); - CandidateSet::iterator it = candidates.begin(); - while (it != candidates.end()) { - RoseVertex a = *it; - CandidateSet::iterator ait = it; - ++it; - - // We have to find a sibling to merge `a' with, and we select between - // two approaches to minimize the number of vertices we have to - // examine; which we use depends on the shape of the graph. - - const vector &siblings - = getCandidateRightSiblings(sibling_cache, keys, a); - - auto jt = siblings.begin(); - while (jt != siblings.end()) { - jt = findRightMergeSibling(jt, siblings.end(), a, build, rai, - candidates); - if (jt == siblings.end()) { - break; + for (const auto &bucket : buckets) { + assert(!bucket.empty()); + for (auto it = bucket.begin(); it != bucket.end(); it++) { + RoseVertex a = *it; + for (auto jt = bucket.begin(); jt != bucket.end(); jt++) { + jt = findRightMergeSibling(jt, bucket.end(), a, build, rai, + candidates); + if (jt == bucket.end()) { + break; + } + RoseVertex b = *jt; + if (attemptRoseMerge(build, false, a, b, !mergeRoses, rai)) { + mergeVerticesRight(a, b, build, rai); + dead->push_back(a); + candidates.erase(a); + break; // consider next a + } } - if (attemptRoseMerge(build, false, a, *jt, !mergeRoses, rai)) { - break; - } - ++jt; } - - if (jt == siblings.end()) { - continue; - } - - RoseVertex b = *jt; - mergeVerticesRight(a, b, build, rai); - dead->push_back(a); - candidates.erase(ait); } DEBUG_PRINTF("%zu candidates remaining\n", candidates.size()); diff --git a/src/util/ue2_containers.h b/src/util/ue2_containers.h index 5bbf4cfe..91115b18 100644 --- a/src/util/ue2_containers.h +++ b/src/util/ue2_containers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,6 +36,7 @@ #include #include +#include #include #include #include @@ -318,6 +319,12 @@ public: friend void swap(flat_set &a, flat_set &b) { a.swap(b); } + + // Free hash function. + friend size_t hash_value(const flat_set &a) { + using boost::hash_value; + return hash_value(a.data); + } }; /** @@ -604,6 +611,12 @@ public: friend void swap(flat_map &a, flat_map &b) { a.swap(b); } + + // Free hash function. + friend size_t hash_value(const flat_map &a) { + using boost::hash_value; + return hash_value(a.data); + } }; } // namespace From c31c1c366e668cedca1c32c126ad4d8aa3c237a8 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 24 Jan 2017 09:41:07 +1100 Subject: [PATCH 019/326] filterLits: better debug output --- src/rose/rose_build_lit_accel.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/rose/rose_build_lit_accel.cpp b/src/rose/rose_build_lit_accel.cpp index 1388069d..b389f493 100644 --- a/src/rose/rose_build_lit_accel.cpp +++ b/src/rose/rose_build_lit_accel.cpp @@ -38,6 +38,7 @@ #include "nfa/trufflecompile.h" #include "util/compare.h" #include "util/dump_charclass.h" +#include "util/ue2string.h" #include "util/verify_types.h" using namespace std; @@ -342,15 +343,10 @@ void filterLits(const vector &lits, hwlm_group_t expected_groups, *min_len = verify_u32(lit_len); } + DEBUG_PRINTF("lit: '%s', nocase=%d, groups=0x%llx\n", + escapeString(lit.s).c_str(), lit.nocase ? 1 : 0, + lit.groups); filtered_lits->push_back(&lit); - -#ifdef DEBUG - DEBUG_PRINTF("lit:"); - for (u32 i = 0; i < lit.s.length(); i++) { - printf("%02hhx", lit.s[i]); - } - printf("\n"); -#endif } } From f3079565844c34b5b50070a764526b6bb9805411 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 24 Jan 2017 10:01:59 +1100 Subject: [PATCH 020/326] rose: do not combine fragments which squash groups --- src/rose/rose_build_bytecode.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 6f996979..c87946f2 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4667,6 +4667,12 @@ map groupByFragment(const RoseBuildImpl &build) { continue; } + // Combining fragments that squash their groups is unsafe. + if (info.squash_group) { + final_to_frag.emplace(final_id, frag_id++); + continue; + } + DEBUG_PRINTF("fragment candidate: final_id=%u %s\n", final_id, dumpString(lit.s).c_str()); auto frag = getFragment(lit); From 843ead266f8a644c2fab9634cfc99f3388a61173 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 5 Dec 2016 12:39:25 +1100 Subject: [PATCH 021/326] flat_set: modernize iter_wrapper ctors --- src/util/ue2_containers.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/util/ue2_containers.h b/src/util/ue2_containers.h index 91115b18..0292741c 100644 --- a/src/util/ue2_containers.h +++ b/src/util/ue2_containers.h @@ -58,8 +58,8 @@ class iter_wrapper : public boost::iterator_facade, Value, boost::random_access_traversal_tag> { public: - iter_wrapper() {} - explicit iter_wrapper(const WrappedIter &it_in) : it(it_in) {} + iter_wrapper() = default; + explicit iter_wrapper(WrappedIter it_in) : it(std::move(it_in)) {} // Templated copy-constructor to allow for interoperable iterator and // const_iterator. @@ -68,10 +68,10 @@ private: public: template - iter_wrapper(const iter_wrapper &other, + iter_wrapper(iter_wrapper other, typename std::enable_if::value>::type * = nullptr) - : it(other.it) {} + : it(std::move(other.it)) {} WrappedIter get() const { return it; } From 5f60812c9b17482ad228a8f0007685d5095d2b49 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 2 Dec 2016 16:40:06 +1100 Subject: [PATCH 022/326] flat_set: cheesy tuple-based ebo --- src/util/ue2_containers.h | 226 +++++++++++++++++++++----------------- 1 file changed, 125 insertions(+), 101 deletions(-) diff --git a/src/util/ue2_containers.h b/src/util/ue2_containers.h index 0292741c..c76dd88a 100644 --- a/src/util/ue2_containers.h +++ b/src/util/ue2_containers.h @@ -91,6 +91,21 @@ private: Value &dereference() const { return *it; } }; +template +class flat_base { +protected: + // Underlying storage is a sorted std::vector. + using storage_type = std::vector; + + // Putting our storage and comparator in a tuple allows us to make use of + // the empty base class optimization (if this STL implements it for + // std::tuple). + std::tuple storage; + + flat_base(const Compare &compare, const Allocator &alloc) + : storage(storage_type(alloc), compare) {} +}; + } // namespace flat_detail /** @@ -103,33 +118,37 @@ private: */ template , class Allocator = std::allocator> -class flat_set { - // Underlying storage is a sorted std::vector. - using StorageT = std::vector; +class flat_set : flat_detail::flat_base { + using base_type = flat_detail::flat_base; + using storage_type = typename base_type::storage_type; - Compare comp; - StorageT data; + storage_type &data() { return std::get<0>(this->storage); } + const storage_type &data() const { return std::get<0>(this->storage); } + + Compare &comp() { return std::get<1>(this->storage); } + const Compare &comp() const { return std::get<1>(this->storage); } public: // Member types. using key_type = T; using value_type = T; - using size_type = typename StorageT::size_type; - using difference_type = typename StorageT::difference_type; + using size_type = typename storage_type::size_type; + using difference_type = typename storage_type::difference_type; using key_compare = Compare; using value_compare = Compare; using allocator_type = Allocator; using reference = value_type &; using const_reference = const value_type &; - using pointer = typename std::allocator_traits::pointer; - using const_pointer = typename std::allocator_traits::const_pointer; + using allocator_traits_type = typename std::allocator_traits; + using pointer = typename allocator_traits_type::pointer; + using const_pointer = typename allocator_traits_type::const_pointer; // Iterator types. - using iterator = flat_detail::iter_wrapper; using const_iterator = - flat_detail::iter_wrapper; using reverse_iterator = std::reverse_iterator; @@ -139,19 +158,19 @@ public: flat_set(const Compare &compare = Compare(), const Allocator &alloc = Allocator()) - : comp(compare), data(alloc) {} + : base_type(compare, alloc) {} template flat_set(InputIt first, InputIt last, const Compare &compare = Compare(), const Allocator &alloc = Allocator()) - : comp(compare), data(alloc) { + : flat_set(compare, alloc) { insert(first, last); } flat_set(std::initializer_list init, const Compare &compare = Compare(), const Allocator &alloc = Allocator()) - : comp(compare), data(alloc) { + : flat_set(compare, alloc) { insert(init.begin(), init.end()); } @@ -163,17 +182,17 @@ public: // Other members. allocator_type get_allocator() const { - return data.get_allocator(); + return data().get_allocator(); } // Iterators. - iterator begin() { return iterator(data.begin()); } - const_iterator cbegin() const { return const_iterator(data.cbegin()); } + iterator begin() { return iterator(data().begin()); } + const_iterator cbegin() const { return const_iterator(data().cbegin()); } const_iterator begin() const { return cbegin(); } - iterator end() { return iterator(data.end()); } - const_iterator cend() const { return const_iterator(data.cend()); } + iterator end() { return iterator(data().end()); } + const_iterator cend() const { return const_iterator(data().cend()); } const_iterator end() const { return cend(); } reverse_iterator rbegin() { return reverse_iterator(end()); } @@ -190,20 +209,20 @@ public: // Capacity. - bool empty() const { return data.empty(); } - size_t size() const { return data.size(); } - size_t max_size() const { return data.max_size(); } + bool empty() const { return data().empty(); } + size_t size() const { return data().size(); } + size_t max_size() const { return data().max_size(); } // Modifiers. void clear() { - data.clear(); + data().clear(); } std::pair insert(const value_type &value) { - auto it = std::lower_bound(data.begin(), data.end(), value, comp); - if (it == data.end() || comp(value, *it)) { - return std::make_pair(iterator(data.insert(it, value)), true); + auto it = std::lower_bound(data().begin(), data().end(), value, comp()); + if (it == data().end() || comp()(value, *it)) { + return std::make_pair(iterator(data().insert(it, value)), true); } return std::make_pair(iterator(it), false); } @@ -213,9 +232,9 @@ public: } std::pair insert(value_type &&value) { - auto it = std::lower_bound(data.begin(), data.end(), value, comp); - if (it == data.end() || comp(value, *it)) { - return std::make_pair(iterator(data.insert(it, std::move(value))), + auto it = std::lower_bound(data().begin(), data().end(), value, comp()); + if (it == data().end() || comp()(value, *it)) { + return std::make_pair(iterator(data().insert(it, std::move(value))), true); } return std::make_pair(iterator(it), false); @@ -242,11 +261,11 @@ public: } void erase(iterator pos) { - data.erase(pos.get()); + data().erase(pos.get()); } void erase(iterator first, iterator last) { - data.erase(first.get(), last.get()); + data().erase(first.get(), last.get()); } void erase(const key_type &key) { @@ -258,8 +277,8 @@ public: void swap(flat_set &a) { using std::swap; - swap(comp, a.comp); - swap(data, a.data); + swap(comp(), a.comp()); + swap(data(), a.data()); } // Lookup. @@ -269,17 +288,17 @@ public: } iterator find(const value_type &value) { - auto it = std::lower_bound(data.begin(), data.end(), value, comp); - if (it != data.end() && comp(value, *it)) { - it = data.end(); + auto it = std::lower_bound(data().begin(), data().end(), value, comp()); + if (it != data().end() && comp()(value, *it)) { + it = data().end(); } return iterator(it); } const_iterator find(const value_type &value) const { - auto it = std::lower_bound(data.begin(), data.end(), value, comp); - if (it != data.end() && comp(value, *it)) { - it = data.end(); + auto it = std::lower_bound(data().begin(), data().end(), value, comp()); + if (it != data().end() && comp()(value, *it)) { + it = data().end(); } return const_iterator(it); } @@ -287,32 +306,32 @@ public: // Observers. key_compare key_comp() const { - return comp; + return comp(); } value_compare value_comp() const { - return comp; + return comp(); } // Operators. bool operator==(const flat_set &a) const { - return data == a.data; + return data() == a.data(); } bool operator!=(const flat_set &a) const { - return data != a.data; + return data() != a.data(); } bool operator<(const flat_set &a) const { - return data < a.data; + return data() < a.data(); } bool operator<=(const flat_set &a) const { - return data <= a.data; + return data() <= a.data(); } bool operator>(const flat_set &a) const { - return data > a.data; + return data() > a.data(); } bool operator>=(const flat_set &a) const { - return data >= a.data; + return data() >= a.data(); } // Free swap function for ADL. @@ -343,7 +362,7 @@ public: */ template , class Allocator = std::allocator>> -class flat_map { +class flat_map : flat_detail::flat_base, Compare, Allocator> { public: // Member types. using key_type = Key; @@ -351,28 +370,33 @@ public: using value_type = std::pair; private: - // Underlying storage is a sorted std::vector. - using storage_type = std::pair; - using StorageT = std::vector; + using base_type = + flat_detail::flat_base, Compare, Allocator>; + using keyval_storage_type = std::pair; + using storage_type = typename base_type::storage_type; - Compare comp; - StorageT data; + storage_type &data() { return std::get<0>(this->storage); } + const storage_type &data() const { return std::get<0>(this->storage); } + + Compare &comp() { return std::get<1>(this->storage); } + const Compare &comp() const { return std::get<1>(this->storage); } public: // More Member types. - using size_type = typename StorageT::size_type; - using difference_type = typename StorageT::difference_type; + using size_type = typename storage_type::size_type; + using difference_type = typename storage_type::difference_type; using key_compare = Compare; using allocator_type = Allocator; using reference = value_type &; using const_reference = const value_type &; - using pointer = typename std::allocator_traits::pointer; - using const_pointer = typename std::allocator_traits::const_pointer; + using allocator_traits_type = typename std::allocator_traits; + using pointer = typename allocator_traits_type::pointer; + using const_pointer = typename allocator_traits_type::const_pointer; public: using const_iterator = - flat_detail::iter_wrapper; + flat_detail::iter_wrapper; using const_reverse_iterator = std::reverse_iterator; @@ -384,19 +408,19 @@ public: flat_map(const Compare &compare = Compare(), const Allocator &alloc = Allocator()) - : comp(compare), data(alloc) {} + : base_type(compare, alloc) {} template flat_map(InputIt first, InputIt last, const Compare &compare = Compare(), const Allocator &alloc = Allocator()) - : comp(compare), data(alloc) { + : flat_map(compare, alloc) { insert(first, last); } flat_map(std::initializer_list init, const Compare &compare = Compare(), const Allocator &alloc = Allocator()) - : comp(compare), data(alloc) { + : flat_map(compare, alloc) { insert(init.begin(), init.end()); } @@ -408,15 +432,15 @@ public: // Other members. allocator_type get_allocator() const { - return data.get_allocator(); + return data().get_allocator(); } // Iterators. - const_iterator cbegin() const { return const_iterator(data.cbegin()); } + const_iterator cbegin() const { return const_iterator(data().cbegin()); } const_iterator begin() const { return cbegin(); } - const_iterator cend() const { return const_iterator(data.cend()); } + const_iterator cend() const { return const_iterator(data().cend()); } const_iterator end() const { return cend(); } const_reverse_iterator crbegin() const { @@ -431,59 +455,59 @@ public: // Capacity. - bool empty() const { return data.empty(); } - size_t size() const { return data.size(); } - size_t max_size() const { return data.max_size(); } + bool empty() const { return data().empty(); } + size_t size() const { return data().size(); } + size_t max_size() const { return data().max_size(); } private: - using storage_iterator = typename StorageT::iterator; - using storage_const_iterator = typename StorageT::const_iterator; + using storage_iterator = typename storage_type::iterator; + using storage_const_iterator = typename storage_type::const_iterator; storage_iterator data_lower_bound(const key_type &key) { return std::lower_bound( - data.begin(), data.end(), key, - [&](const storage_type &elem, const key_type &k) { - return comp(elem.first, k); + data().begin(), data().end(), key, + [&](const keyval_storage_type &elem, const key_type &k) { + return comp()(elem.first, k); }); } storage_const_iterator data_lower_bound(const key_type &key) const { return std::lower_bound( - data.begin(), data.end(), key, - [&](const storage_type &elem, const key_type &k) { - return comp(elem.first, k); + data().begin(), data().end(), key, + [&](const keyval_storage_type &elem, const key_type &k) { + return comp()(elem.first, k); }); } std::pair data_insert(const value_type &value) { auto it = data_lower_bound(value.first); - if (it == data.end() || comp(value.first, it->first)) { - return std::make_pair(data.insert(it, value), true); + if (it == data().end() || comp()(value.first, it->first)) { + return std::make_pair(data().insert(it, value), true); } return std::make_pair(it, false); } std::pair data_insert(value_type &&value) { auto it = data_lower_bound(value.first); - if (it == data.end() || comp(value.first, it->first)) { - return std::make_pair(data.insert(it, std::move(value)), true); + if (it == data().end() || comp()(value.first, it->first)) { + return std::make_pair(data().insert(it, std::move(value)), true); } return std::make_pair(it, false); } storage_iterator data_find(const key_type &key) { auto it = data_lower_bound(key); - if (it != data.end() && comp(key, it->first)) { - it = data.end(); + if (it != data().end() && comp()(key, it->first)) { + it = data().end(); } return it; } storage_const_iterator data_find(const key_type &key) const { auto it = data_lower_bound(key); - if (it != data.end() && comp(key, it->first)) { - it = data.end(); + if (it != data().end() && comp()(key, it->first)) { + it = data().end(); } return it; } @@ -492,7 +516,7 @@ public: // Modifiers. void clear() { - data.clear(); + data().clear(); } std::pair insert(const value_type &value) { @@ -523,15 +547,15 @@ public: void erase(iterator pos) { // Convert to a non-const storage iterator via pointer arithmetic. - storage_iterator it = data.begin() + distance(begin(), pos); - data.erase(it); + storage_iterator it = data().begin() + distance(begin(), pos); + data().erase(it); } void erase(iterator first, iterator last) { // Convert to a non-const storage iterator via pointer arithmetic. - storage_iterator data_first = data.begin() + distance(begin(), first); - storage_iterator data_last = data.begin() + distance(begin(), last); - data.erase(data_first, data_last); + storage_iterator data_first = data().begin() + distance(begin(), first); + storage_iterator data_last = data().begin() + distance(begin(), last); + data().erase(data_first, data_last); } void erase(const key_type &key) { @@ -543,8 +567,8 @@ public: void swap(flat_map &a) { using std::swap; - swap(comp, a.comp); - swap(data, a.data); + swap(comp(), a.comp()); + swap(data(), a.data()); } // Lookup. @@ -561,7 +585,7 @@ public: mapped_type &at(const key_type &key) { auto it = data_find(key); - if (it == data.end()) { + if (it == data().end()) { throw std::out_of_range("element not found"); } return it->second; @@ -569,7 +593,7 @@ public: const mapped_type &at(const key_type &key) const { auto it = data_find(key); - if (it == data.end()) { + if (it == data().end()) { throw std::out_of_range("element not found"); } return it->second; @@ -583,28 +607,28 @@ public: // Observers. key_compare key_comp() const { - return comp; + return comp(); } // Operators. bool operator==(const flat_map &a) const { - return data == a.data; + return data() == a.data(); } bool operator!=(const flat_map &a) const { - return data != a.data; + return data() != a.data(); } bool operator<(const flat_map &a) const { - return data < a.data; + return data() < a.data(); } bool operator<=(const flat_map &a) const { - return data <= a.data; + return data() <= a.data(); } bool operator>(const flat_map &a) const { - return data > a.data; + return data() > a.data(); } bool operator>=(const flat_map &a) const { - return data >= a.data; + return data() >= a.data(); } // Free swap function for ADL. From a55c03d1f4afe29734acea616709b9e89f33fba5 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 22 Dec 2016 13:37:00 +1100 Subject: [PATCH 023/326] flat_set/map: back with small_vector --- src/util/ue2_containers.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/util/ue2_containers.h b/src/util/ue2_containers.h index c76dd88a..5af1ad8d 100644 --- a/src/util/ue2_containers.h +++ b/src/util/ue2_containers.h @@ -36,7 +36,8 @@ #include #include -#include +#include +#include #include #include #include @@ -94,8 +95,9 @@ private: template class flat_base { protected: - // Underlying storage is a sorted std::vector. - using storage_type = std::vector; + // Underlying storage is a small vector with local space for one element. + using storage_type = boost::container::small_vector; + using storage_alloc_type = typename storage_type::allocator_type; // Putting our storage and comparator in a tuple allows us to make use of // the empty base class optimization (if this STL implements it for @@ -103,7 +105,7 @@ protected: std::tuple storage; flat_base(const Compare &compare, const Allocator &alloc) - : storage(storage_type(alloc), compare) {} + : storage(storage_type(storage_alloc_type(alloc)), compare) {} }; } // namespace flat_detail @@ -341,8 +343,7 @@ public: // Free hash function. friend size_t hash_value(const flat_set &a) { - using boost::hash_value; - return hash_value(a.data); + return boost::hash_range(a.begin(), a.end()); } }; @@ -638,8 +639,7 @@ public: // Free hash function. friend size_t hash_value(const flat_map &a) { - using boost::hash_value; - return hash_value(a.data); + return boost::hash_range(a.begin(), a.end()); } }; From 27adea22246fc966e455a9de58f7bc6254ec917d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 17 Jan 2017 14:22:21 +1100 Subject: [PATCH 024/326] flat_map: small doc fix --- src/util/ue2_containers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/ue2_containers.h b/src/util/ue2_containers.h index 5af1ad8d..672cb811 100644 --- a/src/util/ue2_containers.h +++ b/src/util/ue2_containers.h @@ -349,7 +349,7 @@ public: /** * \brief Map container implemented internally as a sorted vector. Use this - * rather than std::map for small sets as it's faster, uses less memory and + * rather than std::map for small maps as it's faster, uses less memory and * incurs less malloc time. * * Note: we used to use boost::flat_map, but have run into problems with all From e37fdb240ae40108390bc4affd95ce5f77ae66a9 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 17 Jan 2017 14:31:21 +1100 Subject: [PATCH 025/326] flat_set/map: add hash_value tests --- src/nfagraph/ng_equivalence.cpp | 4 ++-- unit/internal/flat_map.cpp | 38 +++++++++++++++++++++++++++++++++ unit/internal/flat_set.cpp | 28 ++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 2 deletions(-) diff --git a/src/nfagraph/ng_equivalence.cpp b/src/nfagraph/ng_equivalence.cpp index 32a392a6..b9e2bd0d 100644 --- a/src/nfagraph/ng_equivalence.cpp +++ b/src/nfagraph/ng_equivalence.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -133,7 +133,7 @@ public: friend size_t hash_value(const ClassInfo &c) { size_t val = 0; - boost::hash_combine(val, boost::hash_range(begin(c.rs), end(c.rs))); + boost::hash_combine(val, c.rs); boost::hash_combine(val, c.vertex_flags); boost::hash_combine(val, c.cr); boost::hash_combine(val, c.adjacent_cr); diff --git a/unit/internal/flat_map.cpp b/unit/internal/flat_map.cpp index 54372dec..966a5d8f 100644 --- a/unit/internal/flat_map.cpp +++ b/unit/internal/flat_map.cpp @@ -401,3 +401,41 @@ TEST(flat_map, max_size) { flat_map f; ASSERT_LE(1ULL << 24, f.max_size()); } + +TEST(flat_map, hash_value) { + const vector> input = { + {0, 0}, {3, 1}, {76, 2}, {132, 3}, {77, 4}, {99999, 5}, {100, 6}}; + for (size_t len = 0; len < input.size(); len++) { + flat_map f1(input.begin(), input.begin() + len); + flat_map f2(input.rbegin() + input.size() - len, + input.rend()); + EXPECT_EQ(hash_value(f1), hash_value(f2)); + + // Try removing an element. + auto f3 = f1; + EXPECT_EQ(hash_value(f1), hash_value(f3)); + EXPECT_EQ(hash_value(f2), hash_value(f3)); + if (!f3.empty()) { + f3.erase(f3.begin()); + EXPECT_NE(hash_value(f1), hash_value(f3)); + EXPECT_NE(hash_value(f2), hash_value(f3)); + } + + // Try adding an element. + f3 = f1; + EXPECT_EQ(hash_value(f1), hash_value(f3)); + EXPECT_EQ(hash_value(f2), hash_value(f3)); + f3.emplace(32767, 7); + EXPECT_NE(hash_value(f1), hash_value(f3)); + EXPECT_NE(hash_value(f2), hash_value(f3)); + + // Change a value, but not a key. + f3 = f1; + EXPECT_EQ(hash_value(f1), hash_value(f3)); + EXPECT_EQ(hash_value(f2), hash_value(f3)); + f3.erase(77); + f3.emplace(77, 10); + EXPECT_NE(hash_value(f1), hash_value(f3)); + EXPECT_NE(hash_value(f2), hash_value(f3)); + } +} diff --git a/unit/internal/flat_set.cpp b/unit/internal/flat_set.cpp index 7d45cbb2..3bee0edb 100644 --- a/unit/internal/flat_set.cpp +++ b/unit/internal/flat_set.cpp @@ -392,3 +392,31 @@ TEST(flat_set, max_size) { flat_set f; ASSERT_LE(1ULL << 24, f.max_size()); } + +TEST(flat_set, hash_value) { + const vector input = {0, 15, 3, 1, 20, 32768, + 24000000, 17, 100, 101, 104, 99999}; + for (size_t len = 0; len < input.size(); len++) { + flat_set f1(input.begin(), input.begin() + len); + flat_set f2(input.rbegin() + input.size() - len, input.rend()); + EXPECT_EQ(hash_value(f1), hash_value(f2)); + + // Try removing an element. + auto f3 = f1; + EXPECT_EQ(hash_value(f1), hash_value(f3)); + EXPECT_EQ(hash_value(f2), hash_value(f3)); + if (!f3.empty()) { + f3.erase(f3.begin()); + EXPECT_NE(hash_value(f1), hash_value(f3)); + EXPECT_NE(hash_value(f2), hash_value(f3)); + } + + // Try adding an element. + f3 = f1; + EXPECT_EQ(hash_value(f1), hash_value(f3)); + EXPECT_EQ(hash_value(f2), hash_value(f3)); + f3.insert(32767); + EXPECT_NE(hash_value(f1), hash_value(f3)); + EXPECT_NE(hash_value(f2), hash_value(f3)); + } +} From f520599ab75b369feaf2eaace102ab01fcd60591 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 20 Jan 2017 15:45:47 +1100 Subject: [PATCH 026/326] flat_map: add value_comp() --- src/util/ue2_containers.h | 15 +++++++++++++++ unit/internal/flat_map.cpp | 1 + 2 files changed, 16 insertions(+) diff --git a/src/util/ue2_containers.h b/src/util/ue2_containers.h index 672cb811..75e0a0a4 100644 --- a/src/util/ue2_containers.h +++ b/src/util/ue2_containers.h @@ -611,6 +611,21 @@ public: return comp(); } + class value_compare { + friend class flat_map; + protected: + Compare c; + value_compare(Compare c_in) : c(c_in) {} + public: + bool operator()(const value_type &lhs, const value_type &rhs) { + return c(lhs.first, rhs.first); + } + }; + + value_compare value_comp() const { + return value_compare(comp()); + } + // Operators. bool operator==(const flat_map &a) const { diff --git a/unit/internal/flat_map.cpp b/unit/internal/flat_map.cpp index 966a5d8f..6a81bbfe 100644 --- a/unit/internal/flat_map.cpp +++ b/unit/internal/flat_map.cpp @@ -211,6 +211,7 @@ TEST(flat_map, custom_compare) { ASSERT_EQ(10, f.rbegin()->second); ASSERT_TRUE(flat_map_is_sorted(f)); + ASSERT_TRUE(std::is_sorted(f.begin(), f.end(), f.value_comp())); ASSERT_TRUE(flat_map_is_sorted_cmp(f, std::greater())); } From 3fb5a3702ef38410b28e6dfd54783f5db61311f1 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 20 Jan 2017 15:52:55 +1100 Subject: [PATCH 027/326] flat_set/map: use boost::totally_ordered --- src/util/ue2_containers.h | 35 +++++++---------------------------- 1 file changed, 7 insertions(+), 28 deletions(-) diff --git a/src/util/ue2_containers.h b/src/util/ue2_containers.h index 75e0a0a4..7034dc6f 100644 --- a/src/util/ue2_containers.h +++ b/src/util/ue2_containers.h @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -120,7 +121,8 @@ protected: */ template , class Allocator = std::allocator> -class flat_set : flat_detail::flat_base { +class flat_set : flat_detail::flat_base, + boost::totally_ordered> { using base_type = flat_detail::flat_base; using storage_type = typename base_type::storage_type; @@ -315,26 +317,14 @@ public: return comp(); } - // Operators. + // Operators. All others provided by boost::totally_ordered. bool operator==(const flat_set &a) const { return data() == a.data(); } - bool operator!=(const flat_set &a) const { - return data() != a.data(); - } bool operator<(const flat_set &a) const { return data() < a.data(); } - bool operator<=(const flat_set &a) const { - return data() <= a.data(); - } - bool operator>(const flat_set &a) const { - return data() > a.data(); - } - bool operator>=(const flat_set &a) const { - return data() >= a.data(); - } // Free swap function for ADL. friend void swap(flat_set &a, flat_set &b) { @@ -363,7 +353,8 @@ public: */ template , class Allocator = std::allocator>> -class flat_map : flat_detail::flat_base, Compare, Allocator> { +class flat_map : flat_detail::flat_base, Compare, Allocator>, + boost::totally_ordered> { public: // Member types. using key_type = Key; @@ -626,26 +617,14 @@ public: return value_compare(comp()); } - // Operators. + // Operators. All others provided by boost::totally_ordered. bool operator==(const flat_map &a) const { return data() == a.data(); } - bool operator!=(const flat_map &a) const { - return data() != a.data(); - } bool operator<(const flat_map &a) const { return data() < a.data(); } - bool operator<=(const flat_map &a) const { - return data() <= a.data(); - } - bool operator>(const flat_map &a) const { - return data() > a.data(); - } - bool operator>=(const flat_map &a) const { - return data() >= a.data(); - } // Free swap function for ADL. friend void swap(flat_map &a, flat_map &b) { From adfb9fe3f98ac40712f50ec2c6a503f39c1fc214 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 20 Jan 2017 16:06:06 +1100 Subject: [PATCH 028/326] flat_set: move comp/data helpers to base --- src/util/ue2_containers.h | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/src/util/ue2_containers.h b/src/util/ue2_containers.h index 7034dc6f..47477ed7 100644 --- a/src/util/ue2_containers.h +++ b/src/util/ue2_containers.h @@ -107,6 +107,12 @@ protected: flat_base(const Compare &compare, const Allocator &alloc) : storage(storage_type(storage_alloc_type(alloc)), compare) {} + + storage_type &data() { return std::get<0>(this->storage); } + const storage_type &data() const { return std::get<0>(this->storage); } + + Compare &comp() { return std::get<1>(this->storage); } + const Compare &comp() const { return std::get<1>(this->storage); } }; } // namespace flat_detail @@ -125,12 +131,8 @@ class flat_set : flat_detail::flat_base, boost::totally_ordered> { using base_type = flat_detail::flat_base; using storage_type = typename base_type::storage_type; - - storage_type &data() { return std::get<0>(this->storage); } - const storage_type &data() const { return std::get<0>(this->storage); } - - Compare &comp() { return std::get<1>(this->storage); } - const Compare &comp() const { return std::get<1>(this->storage); } + using base_type::data; + using base_type::comp; public: // Member types. @@ -366,12 +368,8 @@ private: flat_detail::flat_base, Compare, Allocator>; using keyval_storage_type = std::pair; using storage_type = typename base_type::storage_type; - - storage_type &data() { return std::get<0>(this->storage); } - const storage_type &data() const { return std::get<0>(this->storage); } - - Compare &comp() { return std::get<1>(this->storage); } - const Compare &comp() const { return std::get<1>(this->storage); } + using base_type::data; + using base_type::comp; public: // More Member types. From a38ac6a52f153d1015a0fa589d74a22c1ec25687 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 20 Jan 2017 16:12:59 +1100 Subject: [PATCH 029/326] flat_base: take more common operations --- src/util/ue2_containers.h | 45 +++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/src/util/ue2_containers.h b/src/util/ue2_containers.h index 47477ed7..3e0d1555 100644 --- a/src/util/ue2_containers.h +++ b/src/util/ue2_containers.h @@ -113,6 +113,18 @@ protected: Compare &comp() { return std::get<1>(this->storage); } const Compare &comp() const { return std::get<1>(this->storage); } + +public: + // Common member types. + using key_compare = Compare; + + Allocator get_allocator() const { + return data().get_allocator(); + } + + key_compare key_comp() const { + return comp(); + } }; } // namespace flat_detail @@ -127,8 +139,9 @@ protected: */ template , class Allocator = std::allocator> -class flat_set : flat_detail::flat_base, - boost::totally_ordered> { +class flat_set + : public flat_detail::flat_base, + public boost::totally_ordered> { using base_type = flat_detail::flat_base; using storage_type = typename base_type::storage_type; using base_type::data; @@ -140,7 +153,7 @@ public: using value_type = T; using size_type = typename storage_type::size_type; using difference_type = typename storage_type::difference_type; - using key_compare = Compare; + using key_compare = typename base_type::key_compare; using value_compare = Compare; using allocator_type = Allocator; using reference = value_type &; @@ -185,11 +198,6 @@ public: flat_set &operator=(const flat_set &) = default; flat_set &operator=(flat_set &&) = default; - // Other members. - - allocator_type get_allocator() const { - return data().get_allocator(); - } // Iterators. @@ -311,10 +319,6 @@ public: // Observers. - key_compare key_comp() const { - return comp(); - } - value_compare value_comp() const { return comp(); } @@ -355,8 +359,9 @@ public: */ template , class Allocator = std::allocator>> -class flat_map : flat_detail::flat_base, Compare, Allocator>, - boost::totally_ordered> { +class flat_map + : public flat_detail::flat_base, Compare, Allocator>, + public boost::totally_ordered> { public: // Member types. using key_type = Key; @@ -375,7 +380,7 @@ public: // More Member types. using size_type = typename storage_type::size_type; using difference_type = typename storage_type::difference_type; - using key_compare = Compare; + using key_compare = typename base_type::key_compare; using allocator_type = Allocator; using reference = value_type &; using const_reference = const value_type &; @@ -419,12 +424,6 @@ public: flat_map &operator=(const flat_map &) = default; flat_map &operator=(flat_map &&) = default; - // Other members. - - allocator_type get_allocator() const { - return data().get_allocator(); - } - // Iterators. const_iterator cbegin() const { return const_iterator(data().cbegin()); } @@ -596,10 +595,6 @@ public: // Observers. - key_compare key_comp() const { - return comp(); - } - class value_compare { friend class flat_map; protected: From f8166fac25cbbe08bfe4b1b29df3e2cf9789240a Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 20 Jan 2017 16:17:48 +1100 Subject: [PATCH 030/326] flat_base: take more common operations --- src/util/ue2_containers.h | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/src/util/ue2_containers.h b/src/util/ue2_containers.h index 3e0d1555..43376a0f 100644 --- a/src/util/ue2_containers.h +++ b/src/util/ue2_containers.h @@ -125,6 +125,18 @@ public: key_compare key_comp() const { return comp(); } + + // Capacity. + + bool empty() const { return data().empty(); } + size_t size() const { return data().size(); } + size_t max_size() const { return data().max_size(); } + + // Modifiers. + + void clear() { + data().clear(); + } }; } // namespace flat_detail @@ -221,18 +233,8 @@ public: } const_reverse_iterator rend() const { return crend(); } - // Capacity. - - bool empty() const { return data().empty(); } - size_t size() const { return data().size(); } - size_t max_size() const { return data().max_size(); } - // Modifiers. - void clear() { - data().clear(); - } - std::pair insert(const value_type &value) { auto it = std::lower_bound(data().begin(), data().end(), value, comp()); if (it == data().end() || comp()(value, *it)) { @@ -442,12 +444,6 @@ public: } const_reverse_iterator rend() const { return crend(); } - // Capacity. - - bool empty() const { return data().empty(); } - size_t size() const { return data().size(); } - size_t max_size() const { return data().max_size(); } - private: using storage_iterator = typename storage_type::iterator; using storage_const_iterator = typename storage_type::const_iterator; @@ -504,10 +500,6 @@ private: public: // Modifiers. - void clear() { - data().clear(); - } - std::pair insert(const value_type &value) { auto rv = data_insert(value); return std::make_pair(iterator(rv.first), rv.second); From 1db7f3029611db2d4a66699a887426d69ee0b411 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 20 Jan 2017 16:20:22 +1100 Subject: [PATCH 031/326] flat_base: move swap() --- src/util/ue2_containers.h | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/src/util/ue2_containers.h b/src/util/ue2_containers.h index 43376a0f..924cae95 100644 --- a/src/util/ue2_containers.h +++ b/src/util/ue2_containers.h @@ -137,6 +137,12 @@ public: void clear() { data().clear(); } + + void swap(flat_base &a) { + using std::swap; + swap(comp(), a.comp()); + swap(data(), a.data()); + } }; } // namespace flat_detail @@ -210,7 +216,6 @@ public: flat_set &operator=(const flat_set &) = default; flat_set &operator=(flat_set &&) = default; - // Iterators. iterator begin() { return iterator(data().begin()); } @@ -291,12 +296,6 @@ public: } } - void swap(flat_set &a) { - using std::swap; - swap(comp(), a.comp()); - swap(data(), a.data()); - } - // Lookup. size_type count(const value_type &value) const { @@ -546,12 +545,6 @@ public: } } - void swap(flat_map &a) { - using std::swap; - swap(comp(), a.comp()); - swap(data(), a.data()); - } - // Lookup. size_type count(const key_type &key) const { From 7c2627f2c2ea895c0ad1149181840b8a70cbe0e4 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 23 Jan 2017 15:00:22 +1100 Subject: [PATCH 032/326] flat_set/map: make erase() take const_iterator --- src/util/ue2_containers.h | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/src/util/ue2_containers.h b/src/util/ue2_containers.h index 924cae95..ea8ff7bc 100644 --- a/src/util/ue2_containers.h +++ b/src/util/ue2_containers.h @@ -281,11 +281,11 @@ public: return insert(value_type(std::forward(args)...)); } - void erase(iterator pos) { + void erase(const_iterator pos) { data().erase(pos.get()); } - void erase(iterator first, iterator last) { + void erase(const_iterator first, const_iterator last) { data().erase(first.get(), last.get()); } @@ -525,17 +525,12 @@ public: return insert(value_type(std::forward(args)...)); } - void erase(iterator pos) { - // Convert to a non-const storage iterator via pointer arithmetic. - storage_iterator it = data().begin() + distance(begin(), pos); - data().erase(it); + void erase(const_iterator pos) { + data().erase(pos.get()); } - void erase(iterator first, iterator last) { - // Convert to a non-const storage iterator via pointer arithmetic. - storage_iterator data_first = data().begin() + distance(begin(), first); - storage_iterator data_last = data().begin() + distance(begin(), last); - data().erase(data_first, data_last); + void erase(const_iterator first, const_iterator last) { + data().erase(first.get(), last.get()); } void erase(const key_type &key) { From df7bc22ae0cc7989b7fa016bd6df9bc00965161b Mon Sep 17 00:00:00 2001 From: "Wang, Xiang W" Date: Wed, 14 Dec 2016 21:38:03 -0500 Subject: [PATCH 033/326] fdr: remove confirm split and pull-back --- src/fdr/engine_description.h | 16 +---- src/fdr/fdr.c | 10 ++- src/fdr/fdr_compile.cpp | 2 +- src/fdr/fdr_compile_internal.h | 4 +- src/fdr/fdr_confirm_compile.cpp | 104 +++++---------------------- src/fdr/fdr_confirm_runtime.h | 12 ++-- src/fdr/fdr_engine_description.cpp | 7 +- src/fdr/fdr_engine_description.h | 5 +- src/fdr/teddy_avx2.c | 9 +-- src/fdr/teddy_compile.cpp | 4 +- src/fdr/teddy_engine_description.cpp | 41 ++++++----- src/fdr/teddy_engine_description.h | 4 +- src/fdr/teddy_runtime_common.h | 8 +-- 13 files changed, 68 insertions(+), 158 deletions(-) diff --git a/src/fdr/engine_description.h b/src/fdr/engine_description.h index 09b16179..b545e647 100644 --- a/src/fdr/engine_description.h +++ b/src/fdr/engine_description.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,29 +38,19 @@ class EngineDescription { u32 id; target_t code_target; // the target that we built this code for u32 numBuckets; - u32 confirmPullBackDistance; - u32 confirmTopLevelSplit; public: EngineDescription(u32 id_in, const target_t &code_target_in, - u32 numBuckets_in, u32 confirmPullBackDistance_in, - u32 confirmTopLevelSplit_in) - : id(id_in), code_target(code_target_in), numBuckets(numBuckets_in), - confirmPullBackDistance(confirmPullBackDistance_in), - confirmTopLevelSplit(confirmTopLevelSplit_in) {} + u32 numBuckets_in) + : id(id_in), code_target(code_target_in), numBuckets(numBuckets_in) {} virtual ~EngineDescription(); u32 getID() const { return id; } u32 getNumBuckets() const { return numBuckets; } - u32 getConfirmPullBackDistance() const { return confirmPullBackDistance; } - u32 getConfirmTopLevelSplit() const { return confirmTopLevelSplit; } - void setConfirmTopLevelSplit(u32 split) { confirmTopLevelSplit = split; } bool isValidOnTarget(const target_t &target_in) const; virtual u32 getDefaultFloodSuffixLength() const = 0; - - virtual bool typicallyHoldsOneCharLits() const { return true; } }; /** Returns a target given a CPU feature set value. */ diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c index 23416c70..5ac8388c 100644 --- a/src/fdr/fdr.c +++ b/src/fdr/fdr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -315,7 +315,6 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control, const u32 *confBase, const struct FDR_Runtime_Args *a, const u8 *ptr, u32 *last_match_id, struct zone *z) { const u8 bucket = 8; - const u8 pullback = 1; if (likely(!*conf)) { return; @@ -332,8 +331,7 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control, u32 bit = findAndClearLSB_64(conf); u32 byte = bit / bucket + offset; u32 bitRem = bit % bucket; - u32 confSplit = *(ptr + byte); - u32 idx = confSplit * bucket + bitRem; + u32 idx = bitRem; u32 cf = confBase[idx]; if (!cf) { continue; @@ -353,8 +351,8 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control, id, a->ctxt); continue; } - u64a confVal = unaligned_load_u64a(confLoc + byte - sizeof(u64a)); - confWithBit(fdrc, a, ptr_main - a->buf + byte, pullback, control, + u64a confVal = unaligned_load_u64a(confLoc + byte - sizeof(u64a) + 1); + confWithBit(fdrc, a, ptr_main - a->buf + byte, control, last_match_id, confVal); } while (unlikely(!!*conf)); } diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index 85342f9a..cd3b57de 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -147,7 +147,7 @@ FDRCompiler::setupFDR(pair, size_t> &link) { size_t tabSize = eng.getTabSizeBytes(); auto floodControlTmp = setupFDRFloodControl(lits, eng); - auto confirmTmp = setupFullMultiConfs(lits, eng, bucketToLits, make_small); + auto confirmTmp = setupFullConfs(lits, eng, bucketToLits, make_small); assert(ISALIGNED_16(tabSize)); assert(ISALIGNED_16(confirmTmp.second)); diff --git a/src/fdr/fdr_compile_internal.h b/src/fdr/fdr_compile_internal.h index 48e2ed6f..0fd59902 100644 --- a/src/fdr/fdr_compile_internal.h +++ b/src/fdr/fdr_compile_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -56,7 +56,7 @@ class EngineDescription; class FDREngineDescription; struct hwlmStreamingControl; -std::pair, size_t> setupFullMultiConfs( +std::pair, size_t> setupFullConfs( const std::vector &lits, const EngineDescription &eng, std::map> &bucketToLits, bool make_small); diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index 30f682d1..e5969261 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -45,9 +45,7 @@ using namespace std; namespace ue2 { -using ConfSplitType = u8; -using BucketSplitPair = pair; -using BC2CONF = map, size_t>>; // return the number of bytes beyond a length threshold in all strings in lits @@ -151,8 +149,8 @@ void fillLitInfo(const vector &lits, vector &tmpLitInfo, //#define FDR_CONFIRM_DUMP 1 static pair, size_t> -getFDRConfirm(const vector &lits, bool applyOneCharOpt, - bool make_small, bool make_confirm) { +getFDRConfirm(const vector &lits, bool make_small, + bool make_confirm) { vector tmpLitInfo(lits.size()); CONF_TYPE andmsk; fillLitInfo(lits, tmpLitInfo, andmsk); @@ -177,8 +175,7 @@ getFDRConfirm(const vector &lits, bool applyOneCharOpt, u32 soleLitCmp = 0; u32 soleLitMsk = 0; - if ((applyOneCharOpt && lits.size() == 1 && lits[0].s.size() == 0 && - lits[0].msk.empty()) || make_confirm == false) { + if (!make_confirm) { flags = FDRC_FLAG_NO_CONFIRM; if (lits[0].noruns) { flags |= NoRepeat; // messy - need to clean this up later as flags is sorta kinda obsoleted @@ -345,15 +342,11 @@ getFDRConfirm(const vector &lits, bool applyOneCharOpt, return {move(fdrc), actual_size}; } -static -u32 setupMultiConfirms(const vector &lits, - const EngineDescription &eng, BC2CONF &bc2Conf, - map > &bucketToLits, - bool make_small) { - u32 pullBack = eng.getConfirmPullBackDistance(); - u32 splitMask = eng.getConfirmTopLevelSplit() - 1; - bool splitHasCase = splitMask & 0x20; - +pair, size_t> +setupFullConfs(const vector &lits, + const EngineDescription &eng, + map> &bucketToLits, + bool make_small) { bool makeConfirm = true; unique_ptr teddyDescr = getTeddyDescription(eng.getID()); @@ -361,81 +354,24 @@ u32 setupMultiConfirms(const vector &lits, makeConfirm = teddyDescr->needConfirm(lits); } + BC2CONF bc2Conf; u32 totalConfirmSize = 0; for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) { if (!bucketToLits[b].empty()) { - vector> vl(eng.getConfirmTopLevelSplit()); + vector vl; for (const LiteralIndex &lit_idx : bucketToLits[b]) { - hwlmLiteral lit = lits[lit_idx]; // copy - // c is last char of this literal - u8 c = *(lit.s.rbegin()); - - bool suppressSplit = false; - if (pullBack) { - // make a shorter string to work over if we're pulling back - // getFDRConfirm doesn't know about that stuff - assert(lit.s.size() >= pullBack); - lit.s.resize(lit.s.size() - pullBack); - - u8 c_sub, c_sub_msk; - if (lit.msk.empty()) { - c_sub = 0; - c_sub_msk = 0; - } else { - c_sub = *(lit.cmp.rbegin()); - c_sub_msk = *(lit.msk.rbegin()); - size_t len = lit.msk.size() - - min(lit.msk.size(), (size_t)pullBack); - lit.msk.resize(len); - lit.cmp.resize(len); - } - - // if c_sub_msk is 0xff and lit.nocase - // resteer 'c' to an exact value and set suppressSplit - if ((c_sub_msk == 0xff) && (lit.nocase)) { - suppressSplit = true; - c = c_sub; - } - } - - if (!suppressSplit && splitHasCase && lit.nocase && - ourisalpha(c)) { - vl[(u8)(mytoupper(c) & splitMask)].push_back(lit); - vl[(u8)(mytolower(c) & splitMask)].push_back(lit); - } else { - vl[c & splitMask].push_back(lit); - } + vl.push_back(lits[lit_idx]); } - for (u32 c = 0; c < eng.getConfirmTopLevelSplit(); c++) { - if (vl[c].empty()) { - continue; - } - DEBUG_PRINTF("b %d c %02x sz %zu\n", b, c, vl[c].size()); - auto key = make_pair(b, c); - auto fc = getFDRConfirm(vl[c], eng.typicallyHoldsOneCharLits(), - make_small, makeConfirm); - totalConfirmSize += fc.second; - assert(bc2Conf.find(key) == end(bc2Conf)); - bc2Conf.emplace(key, move(fc)); - } + DEBUG_PRINTF("b %d sz %zu\n", b, vl.size()); + auto fc = getFDRConfirm(vl, make_small, makeConfirm); + totalConfirmSize += fc.second; + bc2Conf.emplace(b, move(fc)); } } - return totalConfirmSize; -} -pair, size_t> -setupFullMultiConfs(const vector &lits, - const EngineDescription &eng, - map> &bucketToLits, - bool make_small) { - BC2CONF bc2Conf; - u32 totalConfirmSize = setupMultiConfirms(lits, eng, bc2Conf, bucketToLits, - make_small); - - u32 primarySwitch = eng.getConfirmTopLevelSplit(); u32 nBuckets = eng.getNumBuckets(); - u32 totalConfSwitchSize = primarySwitch * nBuckets * sizeof(u32); + u32 totalConfSwitchSize = nBuckets * sizeof(u32); u32 totalSize = ROUNDUP_16(totalConfSwitchSize + totalConfirmSize); auto buf = aligned_zmalloc_unique(totalSize); @@ -445,14 +381,12 @@ setupFullMultiConfs(const vector &lits, u8 *ptr = buf.get() + totalConfSwitchSize; for (const auto &m : bc2Conf) { - const BucketIndex &b = m.first.first; - const u8 &c = m.first.second; + const BucketIndex &idx = m.first; const pair, size_t> &p = m.second; // confirm offset is relative to the base of this structure, now u32 confirm_offset = verify_u32(ptr - buf.get()); memcpy(ptr, p.first.get(), p.second); ptr += p.second; - u32 idx = c * nBuckets + b; confBase[idx] = confirm_offset; } return {move(buf), totalSize}; diff --git a/src/fdr/fdr_confirm_runtime.h b/src/fdr/fdr_confirm_runtime.h index 55985846..a0603c92 100644 --- a/src/fdr/fdr_confirm_runtime.h +++ b/src/fdr/fdr_confirm_runtime.h @@ -40,8 +40,8 @@ // the whole confirmation procedure static really_inline void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a, - size_t i, u32 pullBackAmount, hwlmcb_rv_t *control, - u32 *last_match, u64a conf_key) { + size_t i, hwlmcb_rv_t *control, u32 *last_match, + u64a conf_key) { assert(i < a->len); assert(ISALIGNED(fdrc)); @@ -68,7 +68,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a goto out; } - const u8 *loc = buf + i - li->size + 1 - pullBackAmount; + const u8 *loc = buf + i - li->size + 1; if (loc < buf) { u32 full_overhang = buf - loc; @@ -87,7 +87,7 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a } if (unlikely(li->flags & ComplexConfirm)) { - const u8 *loc2 = buf + i - li->extended_size + 1 - pullBackAmount; + const u8 *loc2 = buf + i - li->extended_size + 1; if (loc2 < buf) { u32 full_overhang = buf - loc2; size_t len_history = a->len_history; @@ -116,7 +116,7 @@ void confWithBit1(const struct FDRConfirm *fdrc, assert(ISALIGNED(fdrc)); if (unlikely(fdrc->mult)) { - confWithBit(fdrc, a, i, 0, control, last_match, conf_key); + confWithBit(fdrc, a, i, control, last_match, conf_key); return; } else { u32 id = fdrc->nBitsOrSoleID; @@ -144,7 +144,7 @@ void confWithBitMany(const struct FDRConfirm *fdrc, } if (unlikely(fdrc->mult)) { - confWithBit(fdrc, a, i, 0, control, last_match, conf_key); + confWithBit(fdrc, a, i, control, last_match, conf_key); return; } else { const u32 id = fdrc->nBitsOrSoleID; diff --git a/src/fdr/fdr_engine_description.cpp b/src/fdr/fdr_engine_description.cpp index 5e923b08..e44bfbb5 100644 --- a/src/fdr/fdr_engine_description.cpp +++ b/src/fdr/fdr_engine_description.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -44,8 +44,7 @@ namespace ue2 { FDREngineDescription::FDREngineDescription(const FDREngineDef &def) : EngineDescription(def.id, targetByArchFeatures(def.cpu_features), - def.numBuckets, def.confirmPullBackDistance, - def.confirmTopLevelSplit), + def.numBuckets), schemeWidth(def.schemeWidth), stride(0), bits(0) {} u32 FDREngineDescription::getDefaultFloodSuffixLength() const { @@ -55,7 +54,7 @@ u32 FDREngineDescription::getDefaultFloodSuffixLength() const { } void getFdrDescriptions(vector *out) { - static const FDREngineDef def = {0, 128, 8, 0, 1, 256}; + static const FDREngineDef def = {0, 128, 8, 0}; out->clear(); out->emplace_back(def); } diff --git a/src/fdr/fdr_engine_description.h b/src/fdr/fdr_engine_description.h index d4e70d4b..09c5ce86 100644 --- a/src/fdr/fdr_engine_description.h +++ b/src/fdr/fdr_engine_description.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -43,8 +43,6 @@ struct FDREngineDef { u32 schemeWidth; u32 numBuckets; u64a cpu_features; - u32 confirmPullBackDistance; - u32 confirmTopLevelSplit; }; class FDREngineDescription : public EngineDescription { @@ -64,7 +62,6 @@ public: explicit FDREngineDescription(const FDREngineDef &def); u32 getDefaultFloodSuffixLength() const override; - bool typicallyHoldsOneCharLits() const override { return stride == 1; } }; std::unique_ptr diff --git a/src/fdr/teddy_avx2.c b/src/fdr/teddy_avx2.c index e4a836d4..129b99c7 100644 --- a/src/fdr/teddy_avx2.c +++ b/src/fdr/teddy_avx2.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -345,10 +345,7 @@ void do_confWithBit_fast_teddy(u16 bits, const u32 *confBase, const struct FDR_Runtime_Args *a, const u8 *ptr, hwlmcb_rv_t *control, u32 *last_match) { u32 byte = bits / 8; - u32 bitRem = bits % 8; - u32 confSplit = *(ptr+byte) & 0x1f; - u32 idx = confSplit * 8 + bitRem; - u32 cf = confBase[idx]; + u32 cf = confBase[bits % 8]; if (!cf) { return; } @@ -358,7 +355,7 @@ void do_confWithBit_fast_teddy(u16 bits, const u32 *confBase, return; } u64a confVal = getConfVal(a, ptr, byte, reason); - confWithBit(fdrc, a, ptr - a->buf + byte, 0, control, last_match, confVal); + confWithBit(fdrc, a, ptr - a->buf + byte, control, last_match, confVal); } static really_inline diff --git a/src/fdr/teddy_compile.cpp b/src/fdr/teddy_compile.cpp index 15b9665b..ac3a0203 100644 --- a/src/fdr/teddy_compile.cpp +++ b/src/fdr/teddy_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -309,7 +309,7 @@ TeddyCompiler::build(pair, size_t> &link) { size_t maskLen = eng.numMasks * 16 * 2 * maskWidth; auto floodControlTmp = setupFDRFloodControl(lits, eng); - auto confirmTmp = setupFullMultiConfs(lits, eng, bucketToLits, make_small); + auto confirmTmp = setupFullConfs(lits, eng, bucketToLits, make_small); size_t size = ROUNDUP_N(sizeof(Teddy) + maskLen + diff --git a/src/fdr/teddy_engine_description.cpp b/src/fdr/teddy_engine_description.cpp index d95f4937..9e876b0b 100644 --- a/src/fdr/teddy_engine_description.cpp +++ b/src/fdr/teddy_engine_description.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -44,8 +44,7 @@ namespace ue2 { TeddyEngineDescription::TeddyEngineDescription(const TeddyEngineDef &def) : EngineDescription(def.id, targetByArchFeatures(def.cpu_features), - def.numBuckets, def.confirmPullBackDistance, - def.confirmTopLevelSplit), + def.numBuckets), numMasks(def.numMasks), packed(def.packed) {} u32 TeddyEngineDescription::getDefaultFloodSuffixLength() const { @@ -66,24 +65,24 @@ bool TeddyEngineDescription::needConfirm(const vector &lits) const void getTeddyDescriptions(vector *out) { static const TeddyEngineDef defns[] = { - { 1, 0 | HS_CPU_FEATURES_AVX2, 1, 8, false, 0, 1 }, - { 2, 0 | HS_CPU_FEATURES_AVX2, 1, 8, true, 0, 32 }, - { 3, 0 | HS_CPU_FEATURES_AVX2, 1, 16, false, 0, 1 }, - { 4, 0 | HS_CPU_FEATURES_AVX2, 1, 16, true, 0, 32 }, - { 5, 0 | HS_CPU_FEATURES_AVX2, 2, 16, false, 0, 1 }, - { 6, 0 | HS_CPU_FEATURES_AVX2, 2, 16, true, 0, 32 }, - { 7, 0 | HS_CPU_FEATURES_AVX2, 3, 16, false, 0, 1 }, - { 8, 0 | HS_CPU_FEATURES_AVX2, 3, 16, true, 0, 32 }, - { 9, 0 | HS_CPU_FEATURES_AVX2, 4, 16, false, 0, 1 }, - { 10, 0 | HS_CPU_FEATURES_AVX2, 4, 16, true, 0, 32 }, - { 11, 0, 1, 8, false, 0, 1 }, - { 12, 0, 1, 8, true, 0, 32 }, - { 13, 0, 2, 8, false, 0, 1 }, - { 14, 0, 2, 8, true, 0, 32 }, - { 15, 0, 3, 8, false, 0, 1 }, - { 16, 0, 3, 8, true, 0, 32 }, - { 17, 0, 4, 8, false, 0, 1 }, - { 18, 0, 4, 8, true, 0, 32 }, + { 1, 0 | HS_CPU_FEATURES_AVX2, 1, 8, false }, + { 2, 0 | HS_CPU_FEATURES_AVX2, 1, 8, true }, + { 3, 0 | HS_CPU_FEATURES_AVX2, 1, 16, false }, + { 4, 0 | HS_CPU_FEATURES_AVX2, 1, 16, true }, + { 5, 0 | HS_CPU_FEATURES_AVX2, 2, 16, false }, + { 6, 0 | HS_CPU_FEATURES_AVX2, 2, 16, true }, + { 7, 0 | HS_CPU_FEATURES_AVX2, 3, 16, false }, + { 8, 0 | HS_CPU_FEATURES_AVX2, 3, 16, true }, + { 9, 0 | HS_CPU_FEATURES_AVX2, 4, 16, false }, + { 10, 0 | HS_CPU_FEATURES_AVX2, 4, 16, true }, + { 11, 0, 1, 8, false }, + { 12, 0, 1, 8, true }, + { 13, 0, 2, 8, false }, + { 14, 0, 2, 8, true }, + { 15, 0, 3, 8, false }, + { 16, 0, 3, 8, true }, + { 17, 0, 4, 8, false }, + { 18, 0, 4, 8, true }, }; out->clear(); for (const auto &def : defns) { diff --git a/src/fdr/teddy_engine_description.h b/src/fdr/teddy_engine_description.h index 88d20139..3979a5d3 100644 --- a/src/fdr/teddy_engine_description.h +++ b/src/fdr/teddy_engine_description.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -45,8 +45,6 @@ struct TeddyEngineDef { u32 numMasks; u32 numBuckets; bool packed; - u32 confirmPullBackDistance; - u32 confirmTopLevelSplit; }; class TeddyEngineDescription : public EngineDescription { diff --git a/src/fdr/teddy_runtime_common.h b/src/fdr/teddy_runtime_common.h index dc65c70a..c5f0885f 100644 --- a/src/fdr/teddy_runtime_common.h +++ b/src/fdr/teddy_runtime_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -180,9 +180,7 @@ void do_confWithBit_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset, do { u32 bit = TEDDY_FIND_AND_CLEAR_LSB(conf); u32 byte = bit / bucket + offset; - u32 bitRem = bit % bucket; - u32 confSplit = *(ptr+byte) & 0x1f; - u32 idx = confSplit * bucket + bitRem; + u32 idx = bit % bucket; u32 cf = confBase[idx]; if (!cf) { continue; @@ -193,7 +191,7 @@ void do_confWithBit_teddy(TEDDY_CONF_TYPE *conf, u8 bucket, u8 offset, continue; } u64a confVal = getConfVal(a, ptr, byte, reason); - confWithBit(fdrc, a, ptr - a->buf + byte, 0, control, + confWithBit(fdrc, a, ptr - a->buf + byte, control, last_match, confVal); } while (unlikely(*conf)); } From f9648019239ffb6f7e63ce70f490dcc17298070e Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 14 Dec 2016 16:16:59 +1100 Subject: [PATCH 034/326] rose: explode mixed-case literals early --- src/rose/rose_build_compile.cpp | 78 +++++++++++++++++++++++++++++++-- 1 file changed, 75 insertions(+), 3 deletions(-) diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 0505a71e..a46b3a16 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -40,6 +40,7 @@ #include "rose_build_role_aliasing.h" #include "rose_build_util.h" #include "ue2common.h" +#include "hwlm/hwlm_literal.h" #include "nfa/nfa_internal.h" #include "nfa/rdfa.h" #include "nfagraph/ng_holder.h" @@ -102,7 +103,73 @@ bool limited_explosion(const ue2_literal &s) { return nc_count <= MAX_EXPLOSION_NC; } +static +void removeLiteralFromGraph(RoseBuildImpl &build, u32 id) { + assert(id < build.literal_info.size()); + auto &info = build.literal_info.at(id); + for (const auto &v : info.vertices) { + build.g[v].literals.erase(id); + } + info.vertices.clear(); +} + +/** + * \brief Replace the given mixed-case literal with the set of its caseless + * variants. + */ +static +void explodeLiteral(RoseBuildImpl &build, u32 id) { + const auto &lit = build.literals.right.at(id); + auto &info = build.literal_info[id]; + + assert(!info.group_mask); // not set yet + assert(info.undelayed_id == id); // we do not explode delayed literals + + for (auto it = caseIterateBegin(lit.s); it != caseIterateEnd(); ++it) { + ue2_literal new_str(*it, false); + + if (!maskIsConsistent(new_str.get_string(), false, lit.msk, lit.cmp)) { + DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); + continue; + } + + u32 new_id = + build.getLiteralId(new_str, lit.msk, lit.cmp, lit.delay, lit.table); + + DEBUG_PRINTF("adding exploded lit %u: '%s'\n", new_id, + dumpString(new_str).c_str()); + + const auto &new_lit = build.literals.right.at(new_id); + auto &new_info = build.literal_info.at(new_id); + insert(&new_info.vertices, info.vertices); + for (const auto &v : info.vertices) { + build.g[v].literals.insert(new_id); + } + + build.literal_info[new_id].undelayed_id = new_id; + if (!info.delayed_ids.empty()) { + flat_set &del_ids = new_info.delayed_ids; + for (u32 delay_id : info.delayed_ids) { + const auto &dlit = build.literals.right.at(delay_id); + u32 new_delay_id = + build.getLiteralId(new_lit.s, new_lit.msk, new_lit.cmp, + dlit.delay, dlit.table); + del_ids.insert(new_delay_id); + build.literal_info[new_delay_id].undelayed_id = new_id; + } + } + } + + // Remove the old literal and any old delay variants. + removeLiteralFromGraph(build, id); + for (u32 delay_id : info.delayed_ids) { + removeLiteralFromGraph(build, delay_id); + } + info.delayed_ids.clear(); +} + void RoseBuildImpl::handleMixedSensitivity(void) { + vector explode; for (const auto &e : literals.right) { u32 id = e.first; const rose_literal_id &lit = e.second; @@ -123,15 +190,20 @@ void RoseBuildImpl::handleMixedSensitivity(void) { // with a CHECK_LONG_LIT instruction and need unique final_ids. // TODO: we could allow explosion for literals where the prefixes // covered by CHECK_LONG_LIT are identical. - if (lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX && - limited_explosion(lit.s)) { + + if (lit.s.length() <= ROSE_LONG_LITERAL_THRESHOLD_MIN && + limited_explosion(lit.s) && literal_info[id].delayed_ids.empty()) { DEBUG_PRINTF("need to explode existing string '%s'\n", dumpString(lit.s).c_str()); - literal_info[id].requires_explode = true; + explode.push_back(id); } else { literal_info[id].requires_benefits = true; } } + + for (u32 id : explode) { + explodeLiteral(*this, id); + } } // Returns the length of the longest prefix of s that is (a) also a suffix of s From c6b2563df62492a6ac0a33b7d68943ec223e7bcc Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 14 Dec 2016 17:13:49 +1100 Subject: [PATCH 035/326] rose: delete literal_info requires_explode flag --- src/rose/rose_build_bytecode.cpp | 8 +-- src/rose/rose_build_compile.cpp | 2 - src/rose/rose_build_impl.h | 1 - src/rose/rose_build_matchers.cpp | 86 ++++++++++---------------------- 4 files changed, 28 insertions(+), 69 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index c87946f2..b4270278 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4660,14 +4660,8 @@ map groupByFragment(const RoseBuildImpl &build) { continue; } - // Combining exploded fragments with others is unsafe. - const auto &info = build.literal_info[lit_id]; - if (info.requires_explode) { - final_to_frag.emplace(final_id, frag_id++); - continue; - } - // Combining fragments that squash their groups is unsafe. + const auto &info = build.literal_info[lit_id]; if (info.squash_group) { final_to_frag.emplace(final_id, frag_id++); continue; diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index a46b3a16..24df8427 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -568,8 +568,6 @@ void promoteEodToFloating(RoseBuildImpl &tbi, const vector &eodLiterals) { tbi.g[v].literals.insert(floating_id); } - tbi.literal_info[floating_id].requires_explode - = tbi.literal_info[eod_id].requires_explode; tbi.literal_info[floating_id].requires_benefits = tbi.literal_info[eod_id].requires_benefits; } diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 7421dbfa..d5f37b5a 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -265,7 +265,6 @@ struct rose_literal_info { u32 undelayed_id = MO_INVALID_IDX; u32 final_id = MO_INVALID_IDX; /* id reported by fdr */ bool squash_group = false; - bool requires_explode = false; bool requires_benefits = false; }; diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index dfbde375..e16a0ac7 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -408,7 +408,6 @@ void findMoreLiteralMasks(RoseBuildImpl &build) { lit_info.vertices.clear(); // Preserve other properties. - new_info.requires_explode = lit_info.requires_explode; new_info.requires_benefits = lit_info.requires_benefits; } } @@ -716,66 +715,35 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, DEBUG_PRINTF("lit requires %zu bytes of history\n", lit_hist_len); assert(lit_hist_len <= build.cc.grey.maxHistoryAvailable); - if (info.requires_explode) { - DEBUG_PRINTF("exploding lit\n"); + auto lit_final = lit; // copy - // We do not require_explode for literals that need confirm - // (long/medium length literals). - assert(lit.length() <= ROSE_SHORT_LITERAL_LEN_MAX); - - case_iter cit = caseIterateBegin(lit); - case_iter cite = caseIterateEnd(); - for (; cit != cite; ++cit) { - string s = *cit; - bool nocase = false; - - DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d msk=%s, " - "cmp=%s (exploded)\n", - final_id, escapeString(s).c_str(), nocase, noruns, - dumpMask(msk).c_str(), dumpMask(cmp).c_str()); - - if (!maskIsConsistent(s, nocase, msk, cmp)) { - DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); - continue; - } - - mp.accel_lits.emplace_back(s, nocase, msk, cmp, groups); - mp.history_required = max(mp.history_required, lit_hist_len); - mp.lits.emplace_back(move(s), nocase, noruns, final_id, groups, - msk, cmp); - } - } else { - auto lit_final = lit; // copy - - if (lit_final.length() > ROSE_SHORT_LITERAL_LEN_MAX) { - DEBUG_PRINTF("truncating to tail of length %zu\n", - size_t{ROSE_SHORT_LITERAL_LEN_MAX}); - lit_final.erase(0, lit_final.length() - - ROSE_SHORT_LITERAL_LEN_MAX); - // We shouldn't have set a threshold below 8 chars. - assert(msk.size() <= ROSE_SHORT_LITERAL_LEN_MAX); - assert(!noruns); - } - - const auto &s = lit_final.get_string(); - bool nocase = lit_final.any_nocase(); - - DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, " - "cmp=%s\n", - final_id, escapeString(s).c_str(), (int)nocase, noruns, - dumpMask(msk).c_str(), dumpMask(cmp).c_str()); - - if (!maskIsConsistent(s, nocase, msk, cmp)) { - DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); - continue; - } - - mp.accel_lits.emplace_back(lit.get_string(), lit.any_nocase(), msk, - cmp, groups); - mp.history_required = max(mp.history_required, lit_hist_len); - mp.lits.emplace_back(move(s), nocase, noruns, final_id, groups, msk, - cmp); + if (lit_final.length() > ROSE_SHORT_LITERAL_LEN_MAX) { + DEBUG_PRINTF("truncating to tail of length %zu\n", + size_t{ROSE_SHORT_LITERAL_LEN_MAX}); + lit_final.erase(0, lit_final.length() - ROSE_SHORT_LITERAL_LEN_MAX); + // We shouldn't have set a threshold below 8 chars. + assert(msk.size() <= ROSE_SHORT_LITERAL_LEN_MAX); + assert(!noruns); } + + const auto &s = lit_final.get_string(); + bool nocase = lit_final.any_nocase(); + + DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, " + "cmp=%s\n", + final_id, escapeString(s).c_str(), (int)nocase, noruns, + dumpMask(msk).c_str(), dumpMask(cmp).c_str()); + + if (!maskIsConsistent(s, nocase, msk, cmp)) { + DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); + continue; + } + + mp.accel_lits.emplace_back(lit.get_string(), lit.any_nocase(), msk, cmp, + groups); + mp.history_required = max(mp.history_required, lit_hist_len); + mp.lits.emplace_back(move(s), nocase, noruns, final_id, groups, msk, + cmp); } auto frag_group_map = makeFragGroupMap(build, final_to_frag_map); From 2bb0295c50cf98c3d0a3f226f0ca0e327627b0d2 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 25 Jan 2017 10:48:23 +1100 Subject: [PATCH 036/326] PCRE version updated to 8.40 --- doc/dev-reference/compilation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/dev-reference/compilation.rst b/doc/dev-reference/compilation.rst index 8f44c15c..de679422 100644 --- a/doc/dev-reference/compilation.rst +++ b/doc/dev-reference/compilation.rst @@ -64,7 +64,7 @@ libpcre are supported. The use of unsupported constructs will result in compilation errors. The version of PCRE used to validate Hyperscan's interpretation of this syntax -is 8.38. +is 8.40. ==================== Supported Constructs From 05b5265affa43532283703074be46cfbce3dd254 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 25 Jan 2017 11:49:53 +1100 Subject: [PATCH 037/326] fdr: remove dead code to do with link structures --- src/fdr/fdr_compile.cpp | 28 ++++++++-------------------- src/fdr/fdr_internal.h | 7 +------ src/fdr/teddy_compile.cpp | 29 ++++++++++------------------- src/fdr/teddy_compile.h | 6 ++---- 4 files changed, 21 insertions(+), 49 deletions(-) diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index cd3b57de..953b2dab 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -84,7 +84,7 @@ private: void dumpMasks(const u8 *defaultMask); #endif void setupTab(); - aligned_unique_ptr setupFDR(pair, size_t> &link); + aligned_unique_ptr setupFDR(); void createInitialState(FDR *fdr); public: @@ -93,7 +93,7 @@ public: : eng(eng_in), tab(eng_in.getTabSizeBytes()), lits(move(lits_in)), make_small(make_small_in) {} - aligned_unique_ptr build(pair, size_t> &link); + aligned_unique_ptr build(); }; u8 *FDRCompiler::tabIndexToMask(u32 indexInTable) { @@ -142,8 +142,7 @@ void FDRCompiler::createInitialState(FDR *fdr) { } } -aligned_unique_ptr -FDRCompiler::setupFDR(pair, size_t> &link) { +aligned_unique_ptr FDRCompiler::setupFDR() { size_t tabSize = eng.getTabSizeBytes(); auto floodControlTmp = setupFDRFloodControl(lits, eng); @@ -152,10 +151,9 @@ FDRCompiler::setupFDR(pair, size_t> &link) { assert(ISALIGNED_16(tabSize)); assert(ISALIGNED_16(confirmTmp.second)); assert(ISALIGNED_16(floodControlTmp.second)); - assert(ISALIGNED_16(link.second)); size_t headerSize = ROUNDUP_16(sizeof(FDR)); size_t size = ROUNDUP_16(headerSize + tabSize + confirmTmp.second + - floodControlTmp.second + link.second); + floodControlTmp.second); DEBUG_PRINTF("sizes base=%zu tabSize=%zu confirm=%zu floodControl=%zu " "total=%zu\n", @@ -189,13 +187,6 @@ FDRCompiler::setupFDR(pair, size_t> &link) { fdr->tabSize = (1 << eng.bits) * (eng.schemeWidth / 8); fdr->stride = eng.stride; - if (link.first) { - fdr->link = verify_u32(ptr - fdr_base); - memcpy(ptr, link.first.get(), link.second); - } else { - fdr->link = 0; - } - return fdr; } @@ -535,11 +526,10 @@ void FDRCompiler::setupTab() { #endif } -aligned_unique_ptr -FDRCompiler::build(pair, size_t> &link) { +aligned_unique_ptr FDRCompiler::build() { assignStringsToBuckets(); setupTab(); - return setupFDR(link); + return setupFDR(); } } // namespace @@ -549,12 +539,10 @@ aligned_unique_ptr fdrBuildTableInternal(const vector &lits, bool make_small, const target_t &target, const Grey &grey, u32 hint) { - pair, size_t> link(nullptr, 0); - DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2"); if (grey.fdrAllowTeddy) { - auto fdr = teddyBuildTableHinted(lits, make_small, hint, target, link); + auto fdr = teddyBuildTableHinted(lits, make_small, hint, target); if (fdr) { DEBUG_PRINTF("build with teddy succeeded\n"); return fdr; @@ -578,7 +566,7 @@ aligned_unique_ptr fdrBuildTableInternal(const vector &lits, } FDRCompiler fc(lits, *des, make_small); - return fc.build(link); + return fc.build(); } aligned_unique_ptr fdrBuildTable(const vector &lits, diff --git a/src/fdr/fdr_internal.h b/src/fdr/fdr_internal.h index 3bf82837..a425d78c 100644 --- a/src/fdr/fdr_internal.h +++ b/src/fdr/fdr_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -71,11 +71,6 @@ struct FDR { u32 maxStringLen; u32 floodOffset; - /** link is the relative offset of a secondary included FDR table for - * stream handling if we're a primary FDR table or the subsidiary tertiary - * structures (spillover strings and hash table) if we're a secondary - * structure. */ - u32 link; u8 stride; /* stride - how frequeuntly the data is consulted by the first * stage matcher */ u8 domain; /* number of bits used to index into main FDR table. This value diff --git a/src/fdr/teddy_compile.cpp b/src/fdr/teddy_compile.cpp index ac3a0203..66466e6c 100644 --- a/src/fdr/teddy_compile.cpp +++ b/src/fdr/teddy_compile.cpp @@ -74,7 +74,7 @@ public: const TeddyEngineDescription &eng_in, bool make_small_in) : eng(eng_in), lits(lits_in), make_small(make_small_in) {} - aligned_unique_ptr build(pair, size_t> &link); + aligned_unique_ptr build(); bool pack(map > &bucketToLits); }; @@ -274,8 +274,7 @@ bool TeddyCompiler::pack(map -TeddyCompiler::build(pair, size_t> &link) { +aligned_unique_ptr TeddyCompiler::build() { if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) { DEBUG_PRINTF("too many literals: %zu\n", lits.size()); return nullptr; @@ -312,10 +311,10 @@ TeddyCompiler::build(pair, size_t> &link) { auto confirmTmp = setupFullConfs(lits, eng, bucketToLits, make_small); size_t size = ROUNDUP_N(sizeof(Teddy) + - maskLen + - confirmTmp.second + - floodControlTmp.second + - link.second, 16 * maskWidth); + maskLen + + confirmTmp.second + + floodControlTmp.second, + 16 * maskWidth); aligned_unique_ptr fdr = aligned_zmalloc_unique(size); assert(fdr); // otherwise would have thrown std::bad_alloc @@ -334,13 +333,6 @@ TeddyCompiler::build(pair, size_t> &link) { memcpy(ptr, floodControlTmp.first.get(), floodControlTmp.second); ptr += floodControlTmp.second; - if (link.first) { - teddy->link = verify_u32(ptr - teddy_base); - memcpy(ptr, link.first.get(), link.second); - } else { - teddy->link = 0; - } - u8 *baseMsk = teddy_base + sizeof(Teddy); for (const auto &b2l : bucketToLits) { @@ -423,10 +415,9 @@ TeddyCompiler::build(pair, size_t> &link) { } // namespace -aligned_unique_ptr -teddyBuildTableHinted(const vector &lits, bool make_small, - u32 hint, const target_t &target, - pair, size_t> &link) { +aligned_unique_ptr teddyBuildTableHinted(const vector &lits, + bool make_small, u32 hint, + const target_t &target) { unique_ptr des; if (hint == HINT_INVALID) { des = chooseTeddyEngine(target, lits); @@ -437,7 +428,7 @@ teddyBuildTableHinted(const vector &lits, bool make_small, return nullptr; } TeddyCompiler tc(lits, *des, make_small); - return tc.build(link); + return tc.build(); } } // namespace ue2 diff --git a/src/fdr/teddy_compile.h b/src/fdr/teddy_compile.h index 276c1347..bdd15865 100644 --- a/src/fdr/teddy_compile.h +++ b/src/fdr/teddy_compile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,7 +37,6 @@ #include "util/alloc.h" #include -#include // std::pair struct FDR; struct target_t; @@ -48,8 +47,7 @@ struct hwlmLiteral; ue2::aligned_unique_ptr teddyBuildTableHinted(const std::vector &lits, bool make_small, - u32 hint, const target_t &target, - std::pair, size_t> &link); + u32 hint, const target_t &target); } // namespace ue2 From 8af4850d8597d9451fb323215ade9e7575fb596a Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 30 Jan 2017 16:06:48 +1100 Subject: [PATCH 038/326] remove 'fast teddy' models --- src/fdr/fdr.c | 4 +- src/fdr/teddy.h | 11 +- src/fdr/teddy_avx2.c | 401 --------------------------- src/fdr/teddy_engine_description.cpp | 2 - 4 files changed, 3 insertions(+), 415 deletions(-) diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c index 5ac8388c..a965ba14 100644 --- a/src/fdr/fdr.c +++ b/src/fdr/fdr.c @@ -771,8 +771,8 @@ typedef hwlm_error_t (*FDRFUNCTYPE)(const struct FDR *fdr, static const FDRFUNCTYPE funcs[] = { fdr_engine_exec, - ONLY_AVX2(fdr_exec_teddy_avx2_msks1_fast), - ONLY_AVX2(fdr_exec_teddy_avx2_msks1_pck_fast), + NULL, /* old: fast teddy */ + NULL, /* old: fast teddy */ ONLY_AVX2(fdr_exec_teddy_avx2_msks1_fat), ONLY_AVX2(fdr_exec_teddy_avx2_msks1_pck_fat), ONLY_AVX2(fdr_exec_teddy_avx2_msks2_fat), diff --git a/src/fdr/teddy.h b/src/fdr/teddy.h index e2936723..78cba847 100644 --- a/src/fdr/teddy.h +++ b/src/fdr/teddy.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -104,15 +104,6 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr, const struct FDR_Runtime_Args *a, hwlm_group_t control); -hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr, - const struct FDR_Runtime_Args *a, - hwlm_group_t control); - -hwlm_error_t -fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr, - const struct FDR_Runtime_Args *a, - hwlm_group_t control); - #endif /* __AVX2__ */ #endif /* TEDDY_H_ */ diff --git a/src/fdr/teddy_avx2.c b/src/fdr/teddy_avx2.c index 129b99c7..22b74408 100644 --- a/src/fdr/teddy_avx2.c +++ b/src/fdr/teddy_avx2.c @@ -39,75 +39,6 @@ #if defined(__AVX2__) -static const u8 ALIGN_AVX_DIRECTIVE p_mask_arr256[33][64] = { - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff} -}; - #ifdef ARCH_64_BIT #define CONFIRM_FAT_TEDDY(var, bucket, offset, reason, conf_fn) \ do { \ @@ -199,22 +130,6 @@ do { \ } while (0); #endif -#define CONFIRM_FAST_TEDDY(var, offset, reason, conf_fn) \ -do { \ - if (unlikely(isnonzero256(var))) { \ - u32 arrCnt = 0; \ - m128 lo = cast256to128(var); \ - m128 hi = movdq_hi(var); \ - bit_array_fast_teddy(lo, bitArr, &arrCnt, offset); \ - bit_array_fast_teddy(hi, bitArr, &arrCnt, offset + 2); \ - for (u32 i = 0; i < arrCnt; i++) { \ - conf_fn(bitArr[i], confBase, reason, a, ptr, &control, \ - &last_match); \ - CHECK_HWLM_TERMINATE_MATCHING; \ - } \ - } \ -} while (0); - static really_inline m256 vectoredLoad2x128(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi, const u8 *buf_history, size_t len_history, @@ -226,183 +141,6 @@ m256 vectoredLoad2x128(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi, return ret; } -/* - * \brief Copy a block of [0,31] bytes efficiently. - * - * This function is a workaround intended to stop some compilers from - * synthesizing a memcpy function call out of the copy of a small number of - * bytes that we do in vectoredLoad128. - */ -static really_inline -void copyRuntBlock256(u8 *dst, const u8 *src, size_t len) { - switch (len) { - case 0: - break; - case 1: - *dst = *src; - break; - case 2: - unaligned_store_u16(dst, unaligned_load_u16(src)); - break; - case 3: - unaligned_store_u16(dst, unaligned_load_u16(src)); - dst[2] = src[2]; - break; - case 4: - unaligned_store_u32(dst, unaligned_load_u32(src)); - break; - case 5: - case 6: - case 7: - /* Perform copy with two overlapping 4-byte chunks. */ - unaligned_store_u32(dst + len - 4, unaligned_load_u32(src + len - 4)); - unaligned_store_u32(dst, unaligned_load_u32(src)); - break; - case 8: - unaligned_store_u64a(dst, unaligned_load_u64a(src)); - break; - case 9: - case 10: - case 11: - case 12: - case 13: - case 14: - case 15: - /* Perform copy with two overlapping 8-byte chunks. */ - unaligned_store_u64a(dst + len - 8, unaligned_load_u64a(src + len - 8)); - unaligned_store_u64a(dst, unaligned_load_u64a(src)); - break; - case 16: - storeu128(dst, loadu128(src)); - break; - default: - /* Perform copy with two overlapping 16-byte chunks. */ - assert(len < 32); - storeu128(dst + len - 16, loadu128(src + len - 16)); - storeu128(dst, loadu128(src)); - break; - } -} - -static really_inline -m256 vectoredLoad256(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi, - const u8 *buf_history, size_t len_history) { - union { - u8 val8[32]; - m256 val256; - } u; - - uintptr_t copy_start; - uintptr_t copy_len; - - if (ptr >= lo) { - uintptr_t avail = (uintptr_t)(hi - ptr); - if (avail >= 32) { - *p_mask = load256(p_mask_arr256[32] + 32); - return loadu256(ptr); - } - *p_mask = load256(p_mask_arr256[avail] + 32); - copy_start = 0; - copy_len = avail; - } else { - // need contains "how many chars to pull from history" - // calculate based on what we need, what we have in the buffer - // and only what we need to make primary confirm work - uintptr_t start = (uintptr_t)(lo - ptr); - uintptr_t i; - for (i = start; ptr + i < lo; i++) { - u.val8[i] = buf_history[len_history - (lo - (ptr + i))]; - } - uintptr_t end = MIN(32, (uintptr_t)(hi - ptr)); - *p_mask = loadu256(p_mask_arr256[end - start] + 32 - start); - copy_start = i; - copy_len = end - i; - } - - // Runt block from the buffer. - copyRuntBlock256(&u.val8[copy_start], &ptr[copy_start], copy_len); - - return u.val256; -} - -static really_inline -void do_confWithBit1_fast_teddy(u16 bits, const u32 *confBase, - CautionReason reason, - const struct FDR_Runtime_Args *a, - const u8 *ptr, hwlmcb_rv_t *control, - u32 *last_match) { - u32 byte = bits / 8; - u32 cf = confBase[bits % 8]; - const struct FDRConfirm *fdrc = (const struct FDRConfirm *) - ((const u8 *)confBase + cf); - u64a confVal = getConfVal(a, ptr, byte, reason); - confWithBit1(fdrc, a, ptr - a->buf + byte, control, last_match, confVal); -} - -static really_inline -void do_confWithBit_fast_teddy(u16 bits, const u32 *confBase, - CautionReason reason, - const struct FDR_Runtime_Args *a, const u8 *ptr, - hwlmcb_rv_t *control, u32 *last_match) { - u32 byte = bits / 8; - u32 cf = confBase[bits % 8]; - if (!cf) { - return; - } - const struct FDRConfirm *fdrc = (const struct FDRConfirm *) - ((const u8 *)confBase + cf); - if (!(fdrc->groups & *control)) { - return; - } - u64a confVal = getConfVal(a, ptr, byte, reason); - confWithBit(fdrc, a, ptr - a->buf + byte, control, last_match, confVal); -} - -static really_inline -void bit_array_fast_teddy(m128 var, u16 *bitArr, u32 *arrCnt, u32 offset) { - if (unlikely(isnonzero128(var))) { -#ifdef ARCH_64_BIT - u64a part_0 = movq(var); - while (unlikely(part_0)) { - bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_0) + - 64 * (offset); - *arrCnt += 1; - } - u64a part_1 = movq(rshiftbyte_m128(var, 8)); - while (unlikely(part_1)) { - bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_1) + - 64 * (offset + 1); - *arrCnt += 1; - } -#else - u32 part_0 = movd(var); - while (unlikely(part_0)) { - bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_0) + - 32 * (offset * 2); - *arrCnt += 1; - } - u32 part_1 = movd(rshiftbyte_m128(var, 4)); - while (unlikely(part_1)) { - bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_1) + - 32 * (offset * 2 + 1); - *arrCnt += 1; - } - u32 part_2 = movd(rshiftbyte_m128(var, 8)); - while (unlikely(part_2)) { - bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_2) + - 32 * (offset * 2 + 2); - *arrCnt += 1; - } - u32 part_3 = movd(rshiftbyte_m128(var, 12)); - while (unlikely(part_3)) { - bitArr[*arrCnt] = (u16) TEDDY_FIND_AND_CLEAR_LSB(&part_3) + - 32 * (offset * 2 + 3); - *arrCnt += 1; - } -#endif - } -} - static really_inline m256 prep_conf_fat_teddy_m1(const m256 *maskBase, m256 val) { m256 mask = set32x8(0xf); @@ -456,13 +194,6 @@ m256 prep_conf_fat_teddy_m4(const m256 *maskBase, m256 *old_1, m256 *old_2, return and256(r, res_shifted_3); } -static really_inline -m256 prep_conf_fast_teddy_m1(m256 val, m256 mask, m256 maskLo, m256 maskHi) { - m256 lo = and256(val, mask); - m256 hi = and256(rshift64_m256(val, 4), mask); - return and256(vpshufb(maskLo, lo), vpshufb(maskHi, hi)); -} - static really_inline const m256 * getMaskBase_avx2(const struct Teddy *teddy) { return (const m256 *)((const u8 *)teddy + sizeof(struct Teddy)); @@ -956,136 +687,4 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr, return HWLM_SUCCESS; } -hwlm_error_t fdr_exec_teddy_avx2_msks1_fast(const struct FDR *fdr, - const struct FDR_Runtime_Args *a, - hwlm_group_t control) { - const u8 *buf_end = a->buf + a->len; - const u8 *ptr = a->buf + a->start_offset; - u32 floodBackoff = FLOOD_BACKOFF_START; - const u8 *tryFloodDetect = a->firstFloodDetect; - u32 last_match = (u32)-1; - const struct Teddy *teddy = (const struct Teddy *)fdr; - const size_t iterBytes = 64; - DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", - a->buf, a->len, a->start_offset); - - const m128 *maskBase = getMaskBase(teddy); - const u32 *confBase = getConfBase(teddy, 1); - - const m256 maskLo = set2x128(maskBase[0]); - const m256 maskHi = set2x128(maskBase[1]); - const m256 mask = set32x8(0xf); - u16 bitArr[512]; - - const u8 *mainStart = ROUNDUP_PTR(ptr, 32); - DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); - if (ptr < mainStart) { - ptr = mainStart - 32; - m256 p_mask; - m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset, - buf_end, a->buf_history, a->len_history); - m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi); - res_0 = and256(res_0, p_mask); - CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy); - ptr += 32; - } - - if (ptr + 32 < buf_end) { - m256 val_0 = load256(ptr + 0); - m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi); - CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy); - ptr += 32; - } - - for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { - __builtin_prefetch(ptr + (iterBytes*4)); - CHECK_FLOOD; - - m256 val_0 = load256(ptr + 0); - m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi); - CONFIRM_FAST_TEDDY(res_0, 0, NOT_CAUTIOUS, do_confWithBit1_fast_teddy); - - m256 val_1 = load256(ptr + 32); - m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi); - CONFIRM_FAST_TEDDY(res_1, 4, NOT_CAUTIOUS, do_confWithBit1_fast_teddy); - } - - for (; ptr < buf_end; ptr += 32) { - m256 p_mask; - m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset, - buf_end, a->buf_history, a->len_history); - m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi); - res_0 = and256(res_0, p_mask); - CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit1_fast_teddy); - } - - return HWLM_SUCCESS; -} - -hwlm_error_t fdr_exec_teddy_avx2_msks1_pck_fast(const struct FDR *fdr, - const struct FDR_Runtime_Args *a, - hwlm_group_t control) { - const u8 *buf_end = a->buf + a->len; - const u8 *ptr = a->buf + a->start_offset; - u32 floodBackoff = FLOOD_BACKOFF_START; - const u8 *tryFloodDetect = a->firstFloodDetect; - u32 last_match = (u32)-1; - const struct Teddy *teddy = (const struct Teddy *)fdr; - const size_t iterBytes = 64; - DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n", - a->buf, a->len, a->start_offset); - - const m128 *maskBase = getMaskBase(teddy); - const u32 *confBase = getConfBase(teddy, 1); - - const m256 maskLo = set2x128(maskBase[0]); - const m256 maskHi = set2x128(maskBase[1]); - const m256 mask = set32x8(0xf); - u16 bitArr[512]; - - const u8 *mainStart = ROUNDUP_PTR(ptr, 32); - DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart); - if (ptr < mainStart) { - ptr = mainStart - 32; - m256 p_mask; - m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset, - buf_end, a->buf_history, a->len_history); - m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi); - res_0 = and256(res_0, p_mask); - CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy); - ptr += 32; - } - - if (ptr + 32 < buf_end) { - m256 val_0 = load256(ptr + 0); - m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi); - CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy); - ptr += 32; - } - - for ( ; ptr + iterBytes <= buf_end; ptr += iterBytes) { - __builtin_prefetch(ptr + (iterBytes*4)); - CHECK_FLOOD; - - m256 val_0 = load256(ptr + 0); - m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi); - CONFIRM_FAST_TEDDY(res_0, 0, NOT_CAUTIOUS, do_confWithBit_fast_teddy); - - m256 val_1 = load256(ptr + 32); - m256 res_1 = prep_conf_fast_teddy_m1(val_1, mask, maskLo, maskHi); - CONFIRM_FAST_TEDDY(res_1, 4, NOT_CAUTIOUS, do_confWithBit_fast_teddy); - } - - for (; ptr < buf_end; ptr += 32) { - m256 p_mask; - m256 val_0 = vectoredLoad256(&p_mask, ptr, a->buf + a->start_offset, - buf_end, a->buf_history, a->len_history); - m256 res_0 = prep_conf_fast_teddy_m1(val_0, mask, maskLo, maskHi); - res_0 = and256(res_0, p_mask); - CONFIRM_FAST_TEDDY(res_0, 0, VECTORING, do_confWithBit_fast_teddy); - } - - return HWLM_SUCCESS; -} - #endif // __AVX2__ diff --git a/src/fdr/teddy_engine_description.cpp b/src/fdr/teddy_engine_description.cpp index 9e876b0b..f7559b13 100644 --- a/src/fdr/teddy_engine_description.cpp +++ b/src/fdr/teddy_engine_description.cpp @@ -65,8 +65,6 @@ bool TeddyEngineDescription::needConfirm(const vector &lits) const void getTeddyDescriptions(vector *out) { static const TeddyEngineDef defns[] = { - { 1, 0 | HS_CPU_FEATURES_AVX2, 1, 8, false }, - { 2, 0 | HS_CPU_FEATURES_AVX2, 1, 8, true }, { 3, 0 | HS_CPU_FEATURES_AVX2, 1, 16, false }, { 4, 0 | HS_CPU_FEATURES_AVX2, 1, 16, true }, { 5, 0 | HS_CPU_FEATURES_AVX2, 2, 16, false }, From caa46201f071a371051038910b5c08dc007388cd Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Tue, 17 Jan 2017 10:23:23 +1100 Subject: [PATCH 039/326] add information on the runtime impl to rose.txt --- src/rose/rose_dump.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index d3bf980e..96f49688 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -1184,6 +1184,12 @@ void roseDumpText(const RoseEngine *t, FILE *f) { if (t->hasSom) { fprintf(f, " hasSom"); } + if (t->runtimeImpl == ROSE_RUNTIME_PURE_LITERAL) { + fprintf(f, " pureLiteral"); + } + if (t->runtimeImpl == ROSE_RUNTIME_SINGLE_OUTFIX) { + fprintf(f, " soleOutfix"); + } fprintf(f, "\n"); fprintf(f, "dkey count : %u\n", t->dkeyCount); From 512c049493a6048ff176505df7efd715d671d06e Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 1 Dec 2016 16:10:34 +1100 Subject: [PATCH 040/326] shift early_dfa construction earlier --- src/nfagraph/ng.cpp | 6 +- src/nfagraph/ng_reports.cpp | 22 +++++- src/nfagraph/ng_reports.h | 6 +- src/nfagraph/ng_violet.cpp | 120 +++++++++++++++++++++++++++++-- src/nfagraph/ng_violet.h | 4 +- src/rose/rose_build_add.cpp | 22 ++++-- src/rose/rose_build_add_mask.cpp | 4 +- src/rose/rose_build_convert.cpp | 2 +- src/rose/rose_build_impl.h | 2 - src/rose/rose_build_misc.cpp | 21 ------ src/rose/rose_in_graph.h | 8 ++- 11 files changed, 171 insertions(+), 46 deletions(-) diff --git a/src/nfagraph/ng.cpp b/src/nfagraph/ng.cpp index dff9c7e8..4525fd3e 100644 --- a/src/nfagraph/ng.cpp +++ b/src/nfagraph/ng.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -251,7 +251,7 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, return true; } - if (doViolet(*ng.rose, g, w.prefilter, cc)) { + if (doViolet(*ng.rose, g, w.prefilter, false, ng.rm, cc)) { return true; } @@ -272,7 +272,7 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, return true; } - if (doViolet(*ng.rose, g, w.prefilter, cc)) { + if (doViolet(*ng.rose, g, w.prefilter, true, ng.rm, cc)) { return true; } diff --git a/src/nfagraph/ng_reports.cpp b/src/nfagraph/ng_reports.cpp index 3d18a620..4e9b498d 100644 --- a/src/nfagraph/ng_reports.cpp +++ b/src/nfagraph/ng_reports.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -65,6 +65,26 @@ bool can_exhaust(const NGHolder &g, const ReportManager &rm) { return true; } +void set_report(NGHolder &g, ReportID internal_report) { + // First, wipe the report IDs on all vertices. + for (auto v : vertices_range(g)) { + g[v].reports.clear(); + } + + // Any predecessors of accept get our id. + for (auto v : inv_adjacent_vertices_range(g.accept, g)) { + g[v].reports.insert(internal_report); + } + + // Same for preds of acceptEod, except accept itself. + for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { + if (v == g.accept) { + continue; + } + g[v].reports.insert(internal_report); + } +} + /** Derive a maximum offset for the graph from the max_offset values of its * reports. Returns MAX_OFFSET for inf. */ u64a findMaxOffset(const NGHolder &g, const ReportManager &rm) { diff --git a/src/nfagraph/ng_reports.h b/src/nfagraph/ng_reports.h index 3047ff0b..31c95308 100644 --- a/src/nfagraph/ng_reports.h +++ b/src/nfagraph/ng_reports.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -48,6 +48,10 @@ std::set all_reports(const NGHolder &g); /** True if *all* reports in the graph are exhaustible. */ bool can_exhaust(const NGHolder &g, const ReportManager &rm); +/** Replaces all existing reports on the holder with the provided internal + * report id. */ +void set_report(NGHolder &g, ReportID internal_report); + /** Derive a maximum offset for the graph from the max_offset values of its * reports. Returns MAX_OFFSET for inf. */ u64a findMaxOffset(const NGHolder &g, const ReportManager &rm); diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index 985246f0..66c0253a 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,6 +38,8 @@ #include "ng_holder.h" #include "ng_is_equal.h" #include "ng_literal_analysis.h" +#include "ng_limex.h" +#include "ng_mcclellan.h" #include "ng_netflow.h" #include "ng_prune.h" #include "ng_redundancy.h" @@ -47,6 +49,7 @@ #include "ng_split.h" #include "ng_util.h" #include "ng_width.h" +#include "nfa/rdfa.h" #include "rose/rose_build.h" #include "rose/rose_build_util.h" #include "rose/rose_in_dump.h" @@ -2616,7 +2619,110 @@ void rehomeEodSuffixes(RoseInGraph &vg) { /* old accept vertices will be tidied up by final pruneUseless() call */ } +static +bool tryForEarlyDfa(const NGHolder &h, const CompileContext &cc) { + switch (h.kind) { + case NFA_OUTFIX: /* 'prefix' of eod */ + case NFA_PREFIX: + return cc.grey.earlyMcClellanPrefix; + case NFA_INFIX: + return cc.grey.earlyMcClellanInfix; + case NFA_SUFFIX: + return cc.grey.earlyMcClellanSuffix; + default: + DEBUG_PRINTF("kind %u\n", (u32)h.kind); + assert(0); + return false; + } +} + +static +vector> getDfaTriggers(RoseInGraph &vg, + const vector &edges, + bool *single_trigger) { + vector> triggers; + u32 min_offset = ~0U; + u32 max_offset = 0; + for (const auto &e : edges) { + RoseInVertex s = source(e, vg); + if (vg[s].type == RIV_LITERAL) { + triggers.push_back(as_cr_seq(vg[s].s)); + } + ENSURE_AT_LEAST(&max_offset, vg[s].max_offset); + LIMIT_TO_AT_MOST(&min_offset, vg[s].min_offset); + } + + *single_trigger = min_offset == max_offset; + DEBUG_PRINTF("trigger offset (%u, %u)\n", min_offset, max_offset); + + return triggers; +} + +static +bool doEarlyDfa(RoseBuild &rose, RoseInGraph &vg, NGHolder &h, + const vector &edges, const ReportManager &rm, + const CompileContext &cc) { + DEBUG_PRINTF("trying for dfa\n"); + + bool single_trigger; + for (const auto &e : edges) { + if (vg[target(e, vg)].type == RIV_ACCEPT_EOD) { + /* TODO: support eod prefixes */ + return false; + } + } + + auto triggers = getDfaTriggers(vg, edges, &single_trigger); + + /* TODO: literal delay things */ + if (!generates_callbacks(h)) { + set_report(h, rose.getNewNfaReport()); + } + + shared_ptr dfa = buildMcClellan(h, &rm, single_trigger, triggers, + cc.grey); + + if (!dfa) { + return false; + } + + DEBUG_PRINTF("dfa ok\n"); + for (const auto &e : edges) { + vg[e].dfa = dfa; + } + + return true; +} + +static +void ensureImplementable(RoseBuild &rose, RoseInGraph &vg, + const ReportManager &rm, const CompileContext &cc) { + map > edges_by_graph; + vector graphs; + for (const RoseInEdge &ve : edges_range(vg)) { + if (vg[ve].graph) { + NGHolder *h = vg[ve].graph.get(); + if (!contains(edges_by_graph, h)) { + graphs.push_back(h); + } + edges_by_graph[h].push_back(ve); + } + } + for (NGHolder *h : graphs) { + if (isImplementableNFA(*h, &rm, cc)) { + continue; + } + + if (tryForEarlyDfa(*h, cc) + && doEarlyDfa(rose, vg, *h, edges_by_graph[h], rm, cc)) { + continue; + } + DEBUG_PRINTF("eek\n"); + } +} + bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, + bool last_chance, const ReportManager &rm, const CompileContext &cc) { assert(!can_never_match(h)); @@ -2663,10 +2769,6 @@ bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, decomposeLiteralChains(vg, cc); } - /* Step 5: avoid unimplementable, or overly large engines if possible */ - /* TODO: later - ng_rose is currently acting as a backstop */ - - /* Step 6: send to rose */ rehomeEodSuffixes(vg); removeRedundantLiterals(vg, cc); @@ -2674,6 +2776,14 @@ bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, dumpPreRoseGraph(vg, cc.grey); renumber_vertices(vg); calcVertexOffsets(vg); + + + /* Step 5: avoid unimplementable, or overly large engines if possible */ + if (last_chance) { + ensureImplementable(rose, vg, rm, cc); + } + + /* Step 6: send to rose */ bool rv = rose.addRose(vg, prefilter); DEBUG_PRINTF("violet: %s\n", rv ? "success" : "fail"); return rv; diff --git a/src/nfagraph/ng_violet.h b/src/nfagraph/ng_violet.h index fb62bfc0..6bc42d75 100644 --- a/src/nfagraph/ng_violet.h +++ b/src/nfagraph/ng_violet.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -41,10 +41,12 @@ class NGHolder; class RoseBuild; struct CompileContext; +class ReportManager; /** \brief Attempt to consume the entire pattern in graph \a h with Rose. * Returns true if successful. */ bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, + bool last_chance, const ReportManager &rm, const CompileContext &cc); } // namespace ue2 diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index 8b10bc7d..da26e126 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -1033,8 +1033,8 @@ bool empty(const GraphT &g) { return vi == ve; } -/* We only try to implement as a dfa if a non-nullptr as_dfa is provided to return - * the raw dfa to. */ +/* We only try to implement as a dfa if a non-nullptr as_dfa is provided to + * return the raw dfa to. */ static bool canImplementGraph(RoseBuildImpl *tbi, const RoseInGraph &in, NGHolder &h, const vector &edges, bool prefilter, @@ -1105,7 +1105,7 @@ bool canImplementGraph(RoseBuildImpl *tbi, const RoseInGraph &in, NGHolder &h, } if (!generates_callbacks(h)) { - setReportId(h, tbi->getNewNfaReport()); + set_report(h, tbi->getNewNfaReport()); } bool single_trigger = min_offset == max_offset; @@ -1601,6 +1601,8 @@ bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter, for (const auto &e : edges_range(in)) { if (!in[e].graph) { + assert(!in[e].dfa); + assert(!in[e].haig); continue; // no graph } @@ -1616,6 +1618,11 @@ bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter, ordered_graphs.push_back(h); } graphs[h].push_back(e); + if (in[e].dfa) { + assert(!contains(bd.early_dfas, h) + || bd.early_dfas[h] == in[e].dfa); + bd.early_dfas[h] = in[e].dfa; + } } assert(ordered_graphs.size() == graphs.size()); @@ -1626,8 +1633,9 @@ bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter, const vector &h_edges = graphs.at(h); unique_ptr as_dfa; /* allow finalChance as fallback is basically an outfix at this point */ - if (!canImplementGraph(this, in, *h, h_edges, prefilter, rm, cc, - finalChance, &as_dfa)) { + if (!contains(bd.early_dfas, h) + && !canImplementGraph(this, in, *h, h_edges, prefilter, rm, cc, + finalChance, &as_dfa)) { return false; } if (as_dfa) { @@ -1649,7 +1657,7 @@ bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter, if (!generates_callbacks(whatRoseIsThis(in, e)) && !contains(bd.early_dfas, &h) && in[target(e, in)].type != RIV_ACCEPT_EOD) { - setReportId(h, getNewNfaReport()); + set_report(h, getNewNfaReport()); } } diff --git a/src/rose/rose_build_add_mask.cpp b/src/rose/rose_build_add_mask.cpp index de3bdf0a..bd8eed0c 100644 --- a/src/rose/rose_build_add_mask.cpp +++ b/src/rose/rose_build_add_mask.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -480,7 +480,7 @@ void addTransientMask(RoseBuildImpl &build, const vector &mask, // Everyone gets the same report ID. ReportID mask_report = build.getNewNfaReport(); - setReportId(*mask_graph, mask_report); + set_report(*mask_graph, mask_report); // Build the HWLM literal mask. vector msk, cmp; diff --git a/src/rose/rose_build_convert.cpp b/src/rose/rose_build_convert.cpp index a15d4dc6..14eec7c7 100644 --- a/src/rose/rose_build_convert.cpp +++ b/src/rose/rose_build_convert.cpp @@ -166,7 +166,7 @@ bool delayLiteralWithPrefix(RoseBuildImpl &tbi, RoseVertex v, u32 lit_id, shared_ptr h = makeRosePrefix(lit.s); ReportID prefix_report = 0; - setReportId(*h, prefix_report); + set_report(*h, prefix_report); if (!isImplementableNFA(*h, &tbi.rm, tbi.cc)) { DEBUG_PRINTF("prefix not implementable\n"); diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index d5f37b5a..ce8a859d 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -624,8 +624,6 @@ size_t maxOverlap(const rose_literal_id &a, const rose_literal_id &b); ue2_literal findNonOverlappingTail(const std::set &lits, const ue2_literal &s); -void setReportId(NGHolder &g, ReportID id); - #ifndef NDEBUG bool roseHasTops(const RoseBuildImpl &build, RoseVertex v); bool hasOrphanedTops(const RoseBuildImpl &build); diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 97579111..b33192da 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -859,27 +859,6 @@ bool RoseDedupeAuxImpl::requiresDedupeSupport( return false; } -// Sets the report ID for all vertices connected to an accept to `id`. -void setReportId(NGHolder &g, ReportID id) { - // First, wipe the report IDs on all vertices. - for (auto v : vertices_range(g)) { - g[v].reports.clear(); - } - - // Any predecessors of accept get our id. - for (auto v : inv_adjacent_vertices_range(g.accept, g)) { - g[v].reports.insert(id); - } - - // Same for preds of acceptEod, except accept itself. - for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { - if (v == g.accept) { - continue; - } - g[v].reports.insert(id); - } -} - bool operator<(const RoseEdgeProps &a, const RoseEdgeProps &b) { ORDER_CHECK(minBound); ORDER_CHECK(maxBound); diff --git a/src/rose/rose_in_graph.h b/src/rose/rose_in_graph.h index 0e218576..42c59932 100644 --- a/src/rose/rose_in_graph.h +++ b/src/rose/rose_in_graph.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -55,6 +55,7 @@ namespace ue2 { class NGHolder; struct raw_som_dfa; +struct raw_dfa; enum RoseInVertexType { RIV_LITERAL, @@ -166,9 +167,12 @@ struct RoseInEdgeProps { /** \brief Maximum bound on 'dot' repeat between literals. */ u32 maxBound; - /** \brief Prefix graph. Graph is end to (end - lag). */ + /** \brief Graph on edge. Graph is end to (end - lag). */ std::shared_ptr graph; + /** \brief DFA version of graph, if we have already determinised. */ + std::shared_ptr dfa; + /** \brief Haig version of graph, if required. */ std::shared_ptr haig; From 9d35e2ad36664b70d3de1b0cf8ae49fd503f73a9 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 11 Jan 2017 14:38:18 +1100 Subject: [PATCH 041/326] allow outfixes to be converted to prefixes of the eod event literal --- src/rose/rose_build_add.cpp | 61 +++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index da26e126..b601f943 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -1783,9 +1783,70 @@ void populateOutfixInfo(OutfixInfo &outfix, const NGHolder &h, populateReverseAccelerationInfo(outfix.rev_info, h); } +static +bool addEodOutfix(RoseBuildImpl &build, const NGHolder &h) { + map, ReportID> report_remap; + shared_ptr eod_leftfix + = makeRoseEodPrefix(h, build, report_remap); + + bool nfa_ok = isImplementableNFA(h, &build.rm, build.cc); + + /* TODO: check if early dfa is possible */ + + if (!nfa_ok) { + DEBUG_PRINTF("could not build as NFA\n"); + return false; + } + + u32 eod_event = getEodEventID(build); + + auto &g = build.g; + for (const auto &report_mapping : report_remap) { + RoseVertex v = add_vertex(g); + g[v].literals.insert(eod_event); + build.literal_info[eod_event].vertices.insert(v); + + g[v].left.graph = eod_leftfix; + g[v].left.leftfix_report = report_mapping.second; + g[v].left.lag = 0; + RoseEdge e1 = add_edge(build.anchored_root, v, g); + g[e1].minBound = 0; + g[e1].maxBound = ROSE_BOUND_INF; + g[v].min_offset = findMinWidth(*eod_leftfix); + g[v].max_offset = ROSE_BOUND_INF; + + depth max_width = findMaxWidth(*g[v].left.graph); + if (max_width.is_finite() && isPureAnchored(*eod_leftfix)) { + g[e1].maxBound = max_width; + g[v].max_offset = max_width; + } + + g[e1].history = ROSE_ROLE_HISTORY_NONE; // handled by prefix + RoseVertex w = add_vertex(g); + g[w].eod_accept = true; + g[w].reports = report_mapping.first; + g[w].min_offset = g[v].min_offset; + g[w].max_offset = g[v].max_offset; + RoseEdge e = add_edge(v, w, g); + g[e].minBound = 0; + g[e].maxBound = 0; + g[e].history = ROSE_ROLE_HISTORY_NONE; + DEBUG_PRINTF("accept eod vertex (index=%zu)\n", g[w].index); + } + + return true; +} + bool RoseBuildImpl::addOutfix(const NGHolder &h) { DEBUG_PRINTF("%zu vertices, %zu edges\n", num_vertices(h), num_edges(h)); + /* TODO: handle more than one report */ + if (!in_degree(h.accept, h) + && all_reports(h).size() == 1 + && addEodOutfix(*this, h)) { + return true; + } + const u32 nfa_states = isImplementableNFA(h, &rm, cc); if (nfa_states) { DEBUG_PRINTF("implementable as an NFA in %u states\n", nfa_states); From 7767651b59abe751e023efd610d9fface5bc2474 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 8 Dec 2016 14:05:44 +1100 Subject: [PATCH 042/326] shift all early_dfa creation logic to ng_violet/ng_rose --- src/nfagraph/ng.cpp | 12 ++--- src/nfagraph/ng_rose.cpp | 24 ++++++--- src/nfagraph/ng_rose.h | 6 +-- src/nfagraph/ng_violet.cpp | 91 +++++++++++++++++++++++--------- src/nfagraph/ng_violet.h | 4 ++ src/rose/rose_build.h | 5 +- src/rose/rose_build_add.cpp | 102 +++++------------------------------- src/rose/rose_build_impl.h | 3 +- 8 files changed, 113 insertions(+), 134 deletions(-) diff --git a/src/nfagraph/ng.cpp b/src/nfagraph/ng.cpp index 4525fd3e..caf631f7 100644 --- a/src/nfagraph/ng.cpp +++ b/src/nfagraph/ng.cpp @@ -255,7 +255,7 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, return true; } - if (splitOffRose(*ng.rose, g, w.prefilter, cc)) { + if (splitOffRose(*ng.rose, g, w.prefilter, ng.rm, cc)) { return true; } @@ -276,7 +276,7 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, return true; } - if (splitOffRose(*ng.rose, g, w.prefilter, cc)) { + if (splitOffRose(*ng.rose, g, w.prefilter, ng.rm, cc)) { return true; } @@ -291,7 +291,7 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, } } - if (finalChanceRose(*ng.rose, g, w.prefilter, cc)) { + if (finalChanceRose(*ng.rose, g, w.prefilter, ng.rm, cc)) { return true; } @@ -533,16 +533,16 @@ bool NG::addHolder(NGHolder &w) { return true; } - if (splitOffRose(*rose, w, prefilter, cc)) { + if (splitOffRose(*rose, w, prefilter, rm, cc)) { return true; } if (splitOffPuffs(*rose, rm, w, prefilter, cc)) { return true; } - if (splitOffRose(*rose, w, prefilter, cc)) { + if (splitOffRose(*rose, w, prefilter, rm, cc)) { return true; } - if (finalChanceRose(*rose, w, prefilter, cc)) { + if (finalChanceRose(*rose, w, prefilter, rm, cc)) { return true; } diff --git a/src/nfagraph/ng_rose.cpp b/src/nfagraph/ng_rose.cpp index 7066ab27..1f79b55f 100644 --- a/src/nfagraph/ng_rose.cpp +++ b/src/nfagraph/ng_rose.cpp @@ -48,6 +48,7 @@ #include "ng_reports.h" #include "ng_split.h" #include "ng_util.h" +#include "ng_violet.h" #include "ng_width.h" #include "rose/rose_build.h" #include "rose/rose_build_util.h" @@ -2833,8 +2834,19 @@ void desperationImprove(RoseInGraph &ig, const CompileContext &cc) { calcVertexOffsets(ig); } +static +bool addRose(RoseBuild &rose, RoseInGraph &ig, bool prefilter, + bool final_chance, const ReportManager &rm, + const CompileContext &cc) { + if (!ensureImplementable(rose, ig, false, final_chance, rm, cc) + && !prefilter) { + return false; + } + return rose.addRose(ig, prefilter); +} + bool splitOffRose(RoseBuild &rose, const NGHolder &h, bool prefilter, - const CompileContext &cc) { + const ReportManager &rm, const CompileContext &cc) { if (!cc.grey.allowRose) { return false; } @@ -2843,20 +2855,20 @@ bool splitOffRose(RoseBuild &rose, const NGHolder &h, bool prefilter, assert(in_degree(h.accept, h) || in_degree(h.acceptEod, h) > 1); unique_ptr igp = buildRose(h, false, cc); - if (igp && rose.addRose(*igp, prefilter)) { + if (igp && addRose(rose, *igp, prefilter, false, rm, cc)) { goto ok; } igp = buildRose(h, true, cc); if (igp) { - if (rose.addRose(*igp, prefilter)) { + if (addRose(rose, *igp, prefilter, false, rm, cc)) { goto ok; } desperationImprove(*igp, cc); - if (rose.addRose(*igp, prefilter)) { + if (addRose(rose, *igp, prefilter, false, rm, cc)) { goto ok; } } @@ -2870,7 +2882,7 @@ ok: } bool finalChanceRose(RoseBuild &rose, const NGHolder &h, bool prefilter, - const CompileContext &cc) { + const ReportManager &rm, const CompileContext &cc) { DEBUG_PRINTF("final chance rose\n"); if (!cc.grey.allowRose) { return false; @@ -2935,7 +2947,7 @@ bool finalChanceRose(RoseBuild &rose, const NGHolder &h, bool prefilter, renumber_vertices(ig); calcVertexOffsets(ig); - return rose.addRose(ig, prefilter, true /* final chance */); + return addRose(rose, ig, prefilter, true /* final chance */, rm, cc); } bool checkRose(const ReportManager &rm, const NGHolder &h, bool prefilter, diff --git a/src/nfagraph/ng_rose.h b/src/nfagraph/ng_rose.h index d180e8a5..1634adf0 100644 --- a/src/nfagraph/ng_rose.h +++ b/src/nfagraph/ng_rose.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -50,13 +50,13 @@ struct ue2_literal; /** \brief Attempt to consume the entire pattern in graph \a h with Rose. * Returns true if successful. */ bool splitOffRose(RoseBuild &rose, const NGHolder &h, bool prefilter, - const CompileContext &cc); + const ReportManager &rm, const CompileContext &cc); /** \brief Attempt to consume the entire pattern in graph \a h with Rose. * This is the last attempt to handle a pattern before we resort to an outfix. * Returns true if successful. */ bool finalChanceRose(RoseBuild &rose, const NGHolder &h, bool prefilter, - const CompileContext &cc); + const ReportManager &rm, const CompileContext &cc); /** \brief True if the pattern in \a h is consumable by Rose. This function * may be conservative (return false even if supported) for efficiency. */ diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index 66c0253a..0dc010ea 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -2660,8 +2660,8 @@ vector> getDfaTriggers(RoseInGraph &vg, static bool doEarlyDfa(RoseBuild &rose, RoseInGraph &vg, NGHolder &h, - const vector &edges, const ReportManager &rm, - const CompileContext &cc) { + const vector &edges, bool final_chance, + const ReportManager &rm, const CompileContext &cc) { DEBUG_PRINTF("trying for dfa\n"); bool single_trigger; @@ -2680,7 +2680,7 @@ bool doEarlyDfa(RoseBuild &rose, RoseInGraph &vg, NGHolder &h, } shared_ptr dfa = buildMcClellan(h, &rm, single_trigger, triggers, - cc.grey); + cc.grey, final_chance); if (!dfa) { return false; @@ -2695,30 +2695,72 @@ bool doEarlyDfa(RoseBuild &rose, RoseInGraph &vg, NGHolder &h, } static -void ensureImplementable(RoseBuild &rose, RoseInGraph &vg, - const ReportManager &rm, const CompileContext &cc) { - map > edges_by_graph; - vector graphs; - for (const RoseInEdge &ve : edges_range(vg)) { - if (vg[ve].graph) { - NGHolder *h = vg[ve].graph.get(); - if (!contains(edges_by_graph, h)) { - graphs.push_back(h); +bool splitForImplemtabilty(UNUSED RoseInGraph &vg, UNUSED NGHolder &h, + UNUSED const vector &edges, + UNUSED const CompileContext &cc) { + /* TODO: need to add literals back to the graph? */ + return false; +} + +#define MAX_IMPLEMENTABLE_SPLITS 200 + +bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, + bool final_chance, const ReportManager &rm, + const CompileContext &cc) { + DEBUG_PRINTF("checking for impl\n"); + bool changed = false; + u32 added_count = 0; + do { + map > edges_by_graph; + vector graphs; + for (const RoseInEdge &ve : edges_range(vg)) { + if (vg[ve].graph) { + NGHolder *h = vg[ve].graph.get(); + if (!contains(edges_by_graph, h)) { + graphs.push_back(h); + } + edges_by_graph[h].push_back(ve); } - edges_by_graph[h].push_back(ve); } - } - for (NGHolder *h : graphs) { - if (isImplementableNFA(*h, &rm, cc)) { - continue; + for (NGHolder *h : graphs) { + if (isImplementableNFA(*h, &rm, cc)) { + continue; + } + + if (tryForEarlyDfa(*h, cc) + && doEarlyDfa(rose, vg, *h, edges_by_graph[h], final_chance, rm, + cc)) { + continue; + } + + DEBUG_PRINTF("eek\n"); + if (!allow_changes) { + return false; + } + + if (splitForImplemtabilty(vg, *h, edges_by_graph[h], cc)) { + added_count++; + changed = true; + continue; + } + + return false; } - if (tryForEarlyDfa(*h, cc) - && doEarlyDfa(rose, vg, *h, edges_by_graph[h], rm, cc)) { - continue; + if (added_count > MAX_IMPLEMENTABLE_SPLITS) { + return false; } - DEBUG_PRINTF("eek\n"); - } + + if (changed) { + removeRedundantLiterals(vg, cc); + pruneUseless(vg); + renumber_vertices(vg); + calcVertexOffsets(vg); + } + } while (changed); + + DEBUG_PRINTF("ok!\n"); + return true; } bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, @@ -2779,9 +2821,10 @@ bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, /* Step 5: avoid unimplementable, or overly large engines if possible */ - if (last_chance) { - ensureImplementable(rose, vg, rm, cc); + if (!ensureImplementable(rose, vg, last_chance, last_chance, rm, cc)) { + return false; } + dumpPreRoseGraph(vg, cc.grey, "post_ensure_rose.dot"); /* Step 6: send to rose */ bool rv = rose.addRose(vg, prefilter); diff --git a/src/nfagraph/ng_violet.h b/src/nfagraph/ng_violet.h index 6bc42d75..b6ecd028 100644 --- a/src/nfagraph/ng_violet.h +++ b/src/nfagraph/ng_violet.h @@ -42,6 +42,7 @@ class RoseBuild; struct CompileContext; class ReportManager; +struct RoseInGraph; /** \brief Attempt to consume the entire pattern in graph \a h with Rose. * Returns true if successful. */ @@ -49,6 +50,9 @@ bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, bool last_chance, const ReportManager &rm, const CompileContext &cc); +bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, + bool final_chance, const ReportManager &rm, + const CompileContext &cc); } // namespace ue2 #endif diff --git a/src/rose/rose_build.h b/src/rose/rose_build.h index c71671fa..0af8ba57 100644 --- a/src/rose/rose_build.h +++ b/src/rose/rose_build.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -88,8 +88,7 @@ public: virtual void add(bool anchored, bool eod, const ue2_literal &lit, const ue2::flat_set &ids) = 0; - virtual bool addRose(const RoseInGraph &ig, bool prefilter, - bool finalChance = false) = 0; + virtual bool addRose(const RoseInGraph &ig, bool prefilter) = 0; virtual bool addSombeRose(const RoseInGraph &ig) = 0; virtual bool addOutfix(const NGHolder &h) = 0; diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index b601f943..3ac6836f 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -85,9 +85,6 @@ struct RoseBuildData : boost::noncopyable { /** Input rose graph. */ const RoseInGraph &ig; - /** Mapping from engine graph to constructed DFA for pre-build DFAs. */ - ue2::unordered_map > early_dfas; - /** Edges we've transformed (in \ref transformAnchoredLiteralOverlap) which * require ANCH history to prevent overlap. */ ue2::unordered_set anch_history_edges; @@ -281,8 +278,8 @@ void createVertices(RoseBuildImpl *tbi, if (prefix_graph) { g[w].left.graph = prefix_graph; - if (contains(bd.early_dfas, prefix_graph.get())) { - g[w].left.dfa = bd.early_dfas.at(prefix_graph.get()); + if (edge_props.dfa) { + g[w].left.dfa = edge_props.dfa; } g[w].left.haig = edge_props.haig; g[w].left.lag = prefix_lag; @@ -769,9 +766,9 @@ void doRoseAcceptVertex(RoseBuildImpl *tbi, assert(!g[u].suffix); if (ig[iv].type == RIV_ACCEPT) { assert(!tbi->isAnyStart(u)); - if (contains(bd.early_dfas, edge_props.graph.get())) { + if (edge_props.dfa) { DEBUG_PRINTF("adding early dfa suffix to i%zu\n", g[u].index); - g[u].suffix.rdfa = bd.early_dfas.at(edge_props.graph.get()); + g[u].suffix.rdfa = edge_props.dfa; g[u].suffix.dfa_min_width = findMinWidth(*edge_props.graph); g[u].suffix.dfa_max_width = findMaxWidth(*edge_props.graph); } else if (edge_props.graph) { @@ -1033,13 +1030,10 @@ bool empty(const GraphT &g) { return vi == ve; } -/* We only try to implement as a dfa if a non-nullptr as_dfa is provided to - * return the raw dfa to. */ static -bool canImplementGraph(RoseBuildImpl *tbi, const RoseInGraph &in, NGHolder &h, +bool canImplementGraph(const RoseInGraph &in, NGHolder &h, const vector &edges, bool prefilter, - const ReportManager &rm, const CompileContext &cc, - bool finalChance, unique_ptr *as_dfa) { + const ReportManager &rm, const CompileContext &cc) { assert(!edges.empty()); assert(&*in[edges[0]].graph == &h); @@ -1061,64 +1055,6 @@ bool canImplementGraph(RoseBuildImpl *tbi, const RoseInGraph &in, NGHolder &h, } } - if (as_dfa) { - switch (h.kind) { - case NFA_OUTFIX: /* 'prefix' of eod */ - case NFA_PREFIX: - if (!cc.grey.earlyMcClellanPrefix) { - return false; - } - break; - case NFA_INFIX: - if (!cc.grey.earlyMcClellanInfix) { - return false; - } - break; - case NFA_SUFFIX: - if (!cc.grey.earlyMcClellanSuffix) { - return false; - } - break; - case NFA_EAGER_PREFIX: - case NFA_REV_PREFIX: - case NFA_OUTFIX_RAW: - DEBUG_PRINTF("kind %u\n", (u32)h.kind); - assert(0); - } - assert(!*as_dfa); - assert(tbi); - vector > triggers; - u32 min_offset = ~0U; - u32 max_offset = 0; - for (const auto &e : edges) { - RoseInVertex s = source(e, in); - RoseInVertex t = target(e, in); - if (in[s].type == RIV_LITERAL) { - triggers.push_back(as_cr_seq(in[s].s)); - } - if (in[t].type == RIV_ACCEPT_EOD) { - /* TODO: support eod prefixes */ - return false; - } - ENSURE_AT_LEAST(&max_offset, in[s].max_offset); - LIMIT_TO_AT_MOST(&min_offset, in[s].min_offset); - } - - if (!generates_callbacks(h)) { - set_report(h, tbi->getNewNfaReport()); - } - - bool single_trigger = min_offset == max_offset; - - DEBUG_PRINTF("trying for mcclellan (%u, %u)\n", min_offset, max_offset); - *as_dfa = buildMcClellan(h, &rm, single_trigger, triggers, cc.grey, - finalChance); - - if (*as_dfa) { - return true; - } - } - DEBUG_PRINTF("unable to build engine\n"); return false; } @@ -1573,8 +1509,7 @@ bool validateKinds(const RoseInGraph &g) { } #endif -bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter, - bool finalChance) { +bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter) { DEBUG_PRINTF("trying to rose\n"); assert(validateKinds(ig)); assert(hasCorrectlyNumberedVertices(ig)); @@ -1606,8 +1541,9 @@ bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter, continue; // no graph } - if (in[e].haig) { - // Haigs are always implementable (we've already built the raw DFA). + if (in[e].haig || in[e].dfa) { + /* Early DFAs/Haigs are always implementable (we've already built + * the raw DFA). */ continue; } @@ -1618,11 +1554,6 @@ bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter, ordered_graphs.push_back(h); } graphs[h].push_back(e); - if (in[e].dfa) { - assert(!contains(bd.early_dfas, h) - || bd.early_dfas[h] == in[e].dfa); - bd.early_dfas[h] = in[e].dfa; - } } assert(ordered_graphs.size() == graphs.size()); @@ -1631,16 +1562,9 @@ bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter, for (auto h : ordered_graphs) { const vector &h_edges = graphs.at(h); - unique_ptr as_dfa; - /* allow finalChance as fallback is basically an outfix at this point */ - if (!contains(bd.early_dfas, h) - && !canImplementGraph(this, in, *h, h_edges, prefilter, rm, cc, - finalChance, &as_dfa)) { + if (!canImplementGraph(in, *h, h_edges, prefilter, rm, cc)) { return false; } - if (as_dfa) { - bd.early_dfas[h] = move(as_dfa); - } insert(&graph_edges, graph_edges.end(), h_edges); } @@ -1655,7 +1579,6 @@ bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter, assert(allMatchStatesHaveReports(h)); if (!generates_callbacks(whatRoseIsThis(in, e)) - && !contains(bd.early_dfas, &h) && in[target(e, in)].type != RIV_ACCEPT_EOD) { set_report(h, getNewNfaReport()); } @@ -1716,8 +1639,7 @@ bool roseCheckRose(const RoseInGraph &ig, bool prefilter, } for (const auto &m : graphs) { - if (!canImplementGraph(nullptr, ig, *m.first, m.second, prefilter, rm, - cc, false, nullptr)) { + if (!canImplementGraph(ig, *m.first, m.second, prefilter, rm, cc)) { return false; } } diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index ce8a859d..2cefb42a 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -448,8 +448,7 @@ public: void add(bool anchored, bool eod, const ue2_literal &lit, const ue2::flat_set &ids) override; - bool addRose(const RoseInGraph &ig, bool prefilter, - bool finalChance = false) override; + bool addRose(const RoseInGraph &ig, bool prefilter) override; bool addSombeRose(const RoseInGraph &ig) override; bool addOutfix(const NGHolder &h) override; From 8741759c3a15c8a3893a1c08fe5b39c7c7424735 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 4 Jan 2017 11:41:59 +1100 Subject: [PATCH 043/326] refactor restoreTrailingLiteral use --- src/nfagraph/ng_violet.cpp | 39 +++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index 0dc010ea..715c5b5c 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -1821,6 +1821,25 @@ bool makeTransientFromLongLiteral(NGHolder &h, RoseInGraph &vg, return true; } +static +void restoreTrailingLiteralStates(NGHolder &g, + const vector> &lits) { + vector preds; + insert(&preds, preds.end(), inv_adjacent_vertices(g.accept, g)); + clear_in_edges(g.accept, g); + + for (auto v : preds) { + g[v].reports.clear(); /* clear report from old accepts */ + } + + for (const auto &p : lits) { + const ue2_literal &lit = p.first; + u32 delay = p.second; + + restoreTrailingLiteralStates(g, lit, delay, preds); + } +} + static bool improvePrefix(NGHolder &h, RoseInGraph &vg, const vector &ee, const CompileContext &cc) { @@ -1900,27 +1919,18 @@ bool improvePrefix(NGHolder &h, RoseInGraph &vg, const vector &ee, trimmed.clear(); for (auto &elem : trimmed_vec) { shared_ptr &hp = elem.first; - NGHolder &eh = *hp; - - vector base_states; - insert(&base_states, base_states.end(), - inv_adjacent_vertices(eh.accept, eh)); - clear_in_edges(eh.accept, eh); - - for (auto v : base_states) { - eh[v].reports.clear(); /* clear report from old accepts */ - } + vector> succ_lits; for (const auto &edge_delay : elem.second) { const RoseInEdge &e = edge_delay.first; u32 delay = edge_delay.second; - auto succ_lit = vg[target(e, vg)].s; + auto lit = vg[target(e, vg)].s; vg[e].graph = hp; - assert(delay <= succ_lit.length()); - restoreTrailingLiteralStates(*vg[e].graph, succ_lit, delay, - base_states); + assert(delay <= lit.length()); + succ_lits.emplace_back(lit, delay); } + restoreTrailingLiteralStates(*hp, succ_lits); } return true; } @@ -2819,7 +2829,6 @@ bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, renumber_vertices(vg); calcVertexOffsets(vg); - /* Step 5: avoid unimplementable, or overly large engines if possible */ if (!ensureImplementable(rose, vg, last_chance, last_chance, rm, cc)) { return false; From a43116c6d4281701533dd5ddcfe4bf0bf69a57a5 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 5 Jan 2017 10:50:59 +1100 Subject: [PATCH 044/326] remove util functions from ng_rose. --- src/nfagraph/ng_rose.cpp | 82 +------------------------------------ src/nfagraph/ng_rose.h | 12 ------ src/nfagraph/ng_util.cpp | 84 +++++++++++++++++++++++++++++++++++++- src/nfagraph/ng_util.h | 7 +++- src/nfagraph/ng_violet.cpp | 34 +++++++++++++++ 5 files changed, 125 insertions(+), 94 deletions(-) diff --git a/src/nfagraph/ng_rose.cpp b/src/nfagraph/ng_rose.cpp index 1f79b55f..d24c3392 100644 --- a/src/nfagraph/ng_rose.cpp +++ b/src/nfagraph/ng_rose.cpp @@ -807,86 +807,7 @@ bool can_match(const NGHolder &g, const ue2_literal &lit, bool overhang_ok) { return !curr.empty(); } -u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, - u32 max_delay, bool overhang_ok) { - assert(isCorrectlyTopped(g)); - if (max_delay == MO_INVALID_IDX) { - max_delay--; - } - - DEBUG_PRINTF("killing off '%s'\n", dumpString(lit).c_str()); - set curr, next; - curr.insert(g.accept); - - auto it = lit.rbegin(); - for (u32 delay = max_delay; delay > 0 && it != lit.rend(); delay--, ++it) { - next.clear(); - for (auto v : curr) { - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (u == g.start) { - if (overhang_ok) { - DEBUG_PRINTF("bail\n"); - goto bail; /* things got complicated */ - } else { - continue; /* it is not possible for a lhs literal to - * overhang the start */ - } - } - - const CharReach &cr = g[u].char_reach; - if (!overlaps(*it, cr)) { - DEBUG_PRINTF("skip\n"); - continue; - } - if (isSubsetOf(*it, cr)) { - next.insert(u); - } else { - DEBUG_PRINTF("bail\n"); - goto bail; /* things got complicated */ - } - } - } - - curr.swap(next); - } - bail: - if (curr.empty()) { - /* This can happen when we have an edge representing a cross from two - * sides of an alternation. This whole edge needs to be marked as - * dead */ - assert(0); /* should have been picked up by can match */ - return MO_INVALID_IDX; - } - - u32 delay = distance(lit.rbegin(), it); - assert(delay <= max_delay); - assert(delay <= lit.length()); - DEBUG_PRINTF("managed delay %u (of max %u)\n", delay, max_delay); - - set pred; - for (auto v : curr) { - insert(&pred, inv_adjacent_vertices_range(v, g)); - } - - clear_in_edges(g.accept, g); - clearReports(g); - - for (auto v : pred) { - NFAEdge e = add_edge(v, g.accept, g); - g[v].reports.insert(0); - if (is_triggered(g) && v == g.start) { - g[e].tops.insert(DEFAULT_TOP); - } - } - - pruneUseless(g); - assert(allMatchStatesHaveReports(g)); - assert(isCorrectlyTopped(g)); - - DEBUG_PRINTF("graph has %zu vertices left\n", num_vertices(g)); - return delay; -} - +static void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, u32 delay, const vector &preds) { assert(delay <= lit.length()); @@ -922,6 +843,7 @@ void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, assert(isCorrectlyTopped(g)); } +static void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, u32 delay) { vector preds; diff --git a/src/nfagraph/ng_rose.h b/src/nfagraph/ng_rose.h index 1634adf0..9f69fe0c 100644 --- a/src/nfagraph/ng_rose.h +++ b/src/nfagraph/ng_rose.h @@ -63,18 +63,6 @@ bool finalChanceRose(RoseBuild &rose, const NGHolder &h, bool prefilter, bool checkRose(const ReportManager &rm, const NGHolder &h, bool prefilter, const CompileContext &cc); -/** \brief Returns the delay or MO_INVALID_IDX if the graph cannot match with - * the trailing literal. */ -u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, - u32 max_delay, bool overhang_ok = true); - -void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, - u32 delay); - -void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, - u32 delay, - const std::vector &preds); - } // namespace ue2 #endif // NG_ROSE_H diff --git a/src/nfagraph/ng_util.cpp b/src/nfagraph/ng_util.cpp index 5252eb18..0776fa04 100644 --- a/src/nfagraph/ng_util.cpp +++ b/src/nfagraph/ng_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,6 +34,7 @@ #include "grey.h" #include "ng_depth.h" // for NFAVertexDepth #include "ng_dump.h" +#include "ng_prune.h" #include "ue2common.h" #include "nfa/limex_limits.h" // for NFA_MAX_TOP_MASKS. #include "parser/position.h" @@ -43,6 +44,7 @@ #include "util/ue2string.h" #include "util/report_manager.h" +#include #include #include #include @@ -672,6 +674,86 @@ void reverseHolder(const NGHolder &g_in, NGHolder &g) { assert(num_edges(g) == num_edges(g_in)); } +u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, + u32 max_delay, bool overhang_ok) { + assert(isCorrectlyTopped(g)); + if (max_delay == numeric_limits::max()) { + max_delay--; + } + + DEBUG_PRINTF("killing off '%s'\n", dumpString(lit).c_str()); + set curr, next; + curr.insert(g.accept); + + auto it = lit.rbegin(); + for (u32 delay = max_delay; delay > 0 && it != lit.rend(); delay--, ++it) { + next.clear(); + for (auto v : curr) { + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (u == g.start) { + if (overhang_ok) { + DEBUG_PRINTF("bail\n"); + goto bail; /* things got complicated */ + } else { + continue; /* it is not possible for a lhs literal to + * overhang the start */ + } + } + + const CharReach &cr = g[u].char_reach; + if (!overlaps(*it, cr)) { + DEBUG_PRINTF("skip\n"); + continue; + } + if (isSubsetOf(*it, cr)) { + next.insert(u); + } else { + DEBUG_PRINTF("bail\n"); + goto bail; /* things got complicated */ + } + } + } + + curr.swap(next); + } + bail: + if (curr.empty()) { + /* This can happen when we have an edge representing a cross from two + * sides of an alternation. This whole edge needs to be marked as + * dead */ + assert(0); /* should have been picked up by can match */ + return numeric_limits::max(); + } + + u32 delay = distance(lit.rbegin(), it); + assert(delay <= max_delay); + assert(delay <= lit.length()); + DEBUG_PRINTF("managed delay %u (of max %u)\n", delay, max_delay); + + set pred; + for (auto v : curr) { + insert(&pred, inv_adjacent_vertices_range(v, g)); + } + + clear_in_edges(g.accept, g); + clearReports(g); + + for (auto v : pred) { + NFAEdge e = add_edge(v, g.accept, g); + g[v].reports.insert(0); + if (is_triggered(g) && v == g.start) { + g[e].tops.insert(DEFAULT_TOP); + } + } + + pruneUseless(g); + assert(allMatchStatesHaveReports(g)); + assert(isCorrectlyTopped(g)); + + DEBUG_PRINTF("graph has %zu vertices left\n", num_vertices(g)); + return delay; +} + #ifndef NDEBUG bool allMatchStatesHaveReports(const NGHolder &g) { diff --git a/src/nfagraph/ng_util.h b/src/nfagraph/ng_util.h index a0752533..f3fa1354 100644 --- a/src/nfagraph/ng_util.h +++ b/src/nfagraph/ng_util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -275,6 +275,11 @@ void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new); * accepts. */ void reverseHolder(const NGHolder &g, NGHolder &out); +/** \brief Returns the delay or ~0U if the graph cannot match with + * the trailing literal. */ +u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, + u32 max_delay, bool overhang_ok = true); + #ifndef NDEBUG // Assertions: only available in internal builds. diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index 715c5b5c..182e2471 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -1821,6 +1821,40 @@ bool makeTransientFromLongLiteral(NGHolder &h, RoseInGraph &vg, return true; } +static +void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, + u32 delay, const vector &preds) { + assert(delay <= lit.length()); + assert(isCorrectlyTopped(g)); + DEBUG_PRINTF("adding on '%s' %u\n", dumpString(lit).c_str(), delay); + + NFAVertex prev = g.accept; + auto it = lit.rbegin(); + while (delay--) { + NFAVertex curr = add_vertex(g); + assert(it != lit.rend()); + g[curr].char_reach = *it; + add_edge(curr, prev, g); + ++it; + prev = curr; + } + + for (auto v : preds) { + NFAEdge e = add_edge(v, prev, g); + if (v == g.start && is_triggered(g)) { + g[e].tops.insert(DEFAULT_TOP); + } + } + + // Every predecessor of accept must have a report. + set_report(g, 0); + + renumber_vertices(g); + renumber_edges(g); + assert(allMatchStatesHaveReports(g)); + assert(isCorrectlyTopped(g)); +} + static void restoreTrailingLiteralStates(NGHolder &g, const vector> &lits) { From d89cf2f69961f402c94848edc7e2ed89fd6230cb Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Fri, 6 Jan 2017 09:12:22 +1100 Subject: [PATCH 045/326] tidy --- src/rose/rose_build_add.cpp | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index 3ac6836f..68cc67a1 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -1031,14 +1031,8 @@ bool empty(const GraphT &g) { } static -bool canImplementGraph(const RoseInGraph &in, NGHolder &h, - const vector &edges, bool prefilter, - const ReportManager &rm, const CompileContext &cc) { - assert(!edges.empty()); - assert(&*in[edges[0]].graph == &h); - - assert(h.kind == whatRoseIsThis(in, edges[0])); - +bool canImplementGraph(NGHolder &h, bool prefilter, const ReportManager &rm, + const CompileContext &cc) { if (isImplementableNFA(h, &rm, cc)) { return true; } @@ -1561,11 +1555,10 @@ bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter) { vector graph_edges; for (auto h : ordered_graphs) { - const vector &h_edges = graphs.at(h); - if (!canImplementGraph(in, *h, h_edges, prefilter, rm, cc)) { + if (!canImplementGraph(*h, prefilter, rm, cc)) { return false; } - insert(&graph_edges, graph_edges.end(), h_edges); + insert(&graph_edges, graph_edges.end(), graphs[h]); } /* we are now past the point of no return. We can start making irreversible @@ -1623,7 +1616,7 @@ bool roseCheckRose(const RoseInGraph &ig, bool prefilter, return false; } - map> graphs; + vector graphs; for (const auto &e : edges_range(ig)) { if (!ig[e].graph) { @@ -1635,11 +1628,11 @@ bool roseCheckRose(const RoseInGraph &ig, bool prefilter, continue; } - graphs[ig[e].graph.get()].push_back(e); + graphs.push_back(ig[e].graph.get()); } - for (const auto &m : graphs) { - if (!canImplementGraph(ig, *m.first, m.second, prefilter, rm, cc)) { + for (const auto &g : graphs) { + if (!canImplementGraph(*g, prefilter, rm, cc)) { return false; } } From cde6ebf5160127e3ef9bd1a7a56ea62175ad20f9 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 4 Jan 2017 13:15:30 +1100 Subject: [PATCH 046/326] violet: decompose further for implementability --- src/nfagraph/ng_violet.cpp | 92 +++++++++++++++++++++++++++++++------- 1 file changed, 76 insertions(+), 16 deletions(-) diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index 182e2471..3c0aee15 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -736,6 +736,11 @@ void poisonForGoodPrefix(const NGHolder &h, } } +static UNUSED +bool is_any_accept_type(RoseInVertexType t) { + return t == RIV_ACCEPT || t == RIV_ACCEPT_EOD; +} + static flat_set poisonEdges(const NGHolder &h, const vector *depths, @@ -749,7 +754,8 @@ flat_set poisonEdges(const NGHolder &h, for (const RoseInEdge &ve : ee) { if (vg[target(ve, vg)].type != RIV_LITERAL) { /* nothing to poison in suffixes/outfixes */ - assert(vg[target(ve, vg)].type == RIV_ACCEPT); + assert(generates_callbacks(h)); + assert(is_any_accept_type(vg[target(ve, vg)].type)); continue; } succs.insert({vg[target(ve, vg)].s, @@ -964,7 +970,7 @@ bool splitRoseEdge(const NGHolder &base_graph, RoseInGraph &vg, to_string(lhs->kind).c_str(), num_vertices(*lhs), to_string(rhs->kind).c_str(), num_vertices(*rhs)); - bool suffix = vg[target(ee.front(), vg)].type == RIV_ACCEPT; + bool suffix = generates_callbacks(base_graph); if (is_triggered(base_graph)) { /* if we are already guarded, check if the split reduces the size of @@ -1466,6 +1472,11 @@ void removeRedundantLiteralsFromPrefixes(RoseInGraph &g, continue; } + if (g[e].graph_lag) { + /* already removed redundant parts of literals */ + continue; + } + assert(!g[t].delay); const ue2_literal &lit = g[t].s; @@ -1567,20 +1578,22 @@ void removeRedundantLiteralsFromInfix(const NGHolder &h, RoseInGraph &ig, * taking into account overlap of successor literals. */ set preds; + set succs; for (const RoseInEdge &e : ee) { RoseInVertex u = source(e, ig); assert(ig[u].type == RIV_LITERAL); - assert(!ig[e].graph_lag); assert(!ig[u].delay); preds.insert(ig[u].s); - } - set succs; - for (const RoseInEdge &e : ee) { RoseInVertex v = target(e, ig); assert(ig[v].type == RIV_LITERAL); assert(!ig[v].delay); succs.insert(ig[v].s); + + if (ig[e].graph_lag) { + /* already removed redundant parts of literals */ + return; + } } map, u32> > graphs; /* + delay */ @@ -1840,7 +1853,7 @@ void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, } for (auto v : preds) { - NFAEdge e = add_edge(v, prev, g); + NFAEdge e = add_edge_if_not_present(v, prev, g); if (v == g.start && is_triggered(g)) { g[e].tops.insert(DEFAULT_TOP); } @@ -2738,23 +2751,66 @@ bool doEarlyDfa(RoseBuild &rose, RoseInGraph &vg, NGHolder &h, return true; } +#define MAX_EDGES_FOR_IMPLEMENTABILITY 50 + static -bool splitForImplemtabilty(UNUSED RoseInGraph &vg, UNUSED NGHolder &h, - UNUSED const vector &edges, - UNUSED const CompileContext &cc) { - /* TODO: need to add literals back to the graph? */ - return false; +bool splitForImplementabilty(RoseInGraph &vg, NGHolder &h, + const vector &edges, + const CompileContext &cc) { + vector> succ_lits; + DEBUG_PRINTF("trying to split %s with %zu vertices on %zu edges\n", + to_string(h.kind).c_str(), num_vertices(h), edges.size()); + + if (edges.size() > MAX_EDGES_FOR_IMPLEMENTABILITY) { + return false; + } + + if (!generates_callbacks(h)) { + for (const auto &e : edges) { + const auto &lit = vg[target(e, vg)].s; + u32 delay = vg[e].graph_lag; + vg[e].graph_lag = 0; + + assert(delay <= lit.length()); + succ_lits.emplace_back(lit, delay); + } + restoreTrailingLiteralStates(h, succ_lits); + } + + unique_ptr split; + if (h.kind == NFA_PREFIX) { + vector depths; + calcDepths(h, depths); + + split = findBestPrefixSplit(h, depths, vg, edges, cc); + } else { + split = findBestNormalSplit(h, vg, edges, cc); + } + + if (split && splitRoseEdge(h, vg, edges, *split)) { + DEBUG_PRINTF("split on simple literal\n"); + return true; + } + + DEBUG_PRINTF("trying to netflow\n"); + bool rv = doNetflowCut(h, nullptr, vg, edges, false, cc.grey); + DEBUG_PRINTF("done\n"); + + return rv; } -#define MAX_IMPLEMENTABLE_SPLITS 200 +#define MAX_IMPLEMENTABLE_SPLITS 50 bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, bool final_chance, const ReportManager &rm, const CompileContext &cc) { DEBUG_PRINTF("checking for impl\n"); bool changed = false; + bool need_to_recalc = false; u32 added_count = 0; do { + changed = false; + DEBUG_PRINTF("added %u\n", added_count); map > edges_by_graph; vector graphs; for (const RoseInEdge &ve : edges_range(vg)) { @@ -2782,7 +2838,7 @@ bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, return false; } - if (splitForImplemtabilty(vg, *h, edges_by_graph[h], cc)) { + if (splitForImplementabilty(vg, *h, edges_by_graph[h], cc)) { added_count++; changed = true; continue; @@ -2798,11 +2854,15 @@ bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, if (changed) { removeRedundantLiterals(vg, cc); pruneUseless(vg); - renumber_vertices(vg); - calcVertexOffsets(vg); + need_to_recalc = true; } } while (changed); + if (need_to_recalc) { + renumber_vertices(vg); + calcVertexOffsets(vg); + } + DEBUG_PRINTF("ok!\n"); return true; } From 2a1202e1e65a73e62032794582b444bfcd718575 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 5 Jan 2017 11:37:34 +1100 Subject: [PATCH 047/326] add checkViolet and switch SOM path accross to use violet --- src/nfagraph/ng.cpp | 7 ++----- src/nfagraph/ng_som.cpp | 6 +++--- src/nfagraph/ng_violet.cpp | 38 ++++++++++++++++++++++++++++++-------- src/nfagraph/ng_violet.h | 7 +++++++ 4 files changed, 42 insertions(+), 16 deletions(-) diff --git a/src/nfagraph/ng.cpp b/src/nfagraph/ng.cpp index caf631f7..a4f86fee 100644 --- a/src/nfagraph/ng.cpp +++ b/src/nfagraph/ng.cpp @@ -533,16 +533,13 @@ bool NG::addHolder(NGHolder &w) { return true; } - if (splitOffRose(*rose, w, prefilter, rm, cc)) { + if (doViolet(*rose, w, prefilter, false, rm, cc)) { return true; } if (splitOffPuffs(*rose, rm, w, prefilter, cc)) { return true; } - if (splitOffRose(*rose, w, prefilter, rm, cc)) { - return true; - } - if (finalChanceRose(*rose, w, prefilter, rm, cc)) { + if (doViolet(*rose, w, prefilter, true, rm, cc)) { return true; } diff --git a/src/nfagraph/ng_som.cpp b/src/nfagraph/ng_som.cpp index f6ba0fa7..8d3d75a3 100644 --- a/src/nfagraph/ng_som.cpp +++ b/src/nfagraph/ng_som.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,12 +40,12 @@ #include "ng_redundancy.h" #include "ng_region.h" #include "ng_reports.h" -#include "ng_rose.h" #include "ng_som.h" #include "ng_som_add_redundancy.h" #include "ng_som_util.h" #include "ng_split.h" #include "ng_util.h" +#include "ng_violet.h" #include "ng_width.h" #include "grey.h" #include "ue2common.h" @@ -2073,7 +2073,7 @@ sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, const u32 numSomLocsBefore = ssm.numSomSlots(); /* for rollback */ u32 som_loc = ssm.getPrivateSomSlot(); - if (!checkRose(rm, g, false, cc) && !isImplementableNFA(g, &rm, cc)) { + if (!checkViolet(rm, g, false, cc) && !isImplementableNFA(g, &rm, cc)) { // This is an optimisation: if we can't build a Haig from a portion of // the graph, then we won't be able to manage it as an outfix either // when we fall back. diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index 3c0aee15..28ad9549 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -2867,25 +2867,24 @@ bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, return true; } -bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, - bool last_chance, const ReportManager &rm, - const CompileContext &cc) { +static +RoseInGraph doInitialVioletTransform(const NGHolder &h, + const CompileContext &cc) { assert(!can_never_match(h)); + RoseInGraph vg = populateTrivialGraph(h); + if (!cc.grey.allowViolet) { - return false; + return vg; } DEBUG_PRINTF("hello world\n"); - RoseInGraph vg = populateTrivialGraph(h); - /* Step 1: avoid outfixes as we always have to run them. */ avoidOutfixes(vg, cc); if (num_vertices(vg) <= 2) { - /* only have an outfix; leave for ng_rose for now */ - return false; + return vg; /* unable to transform pattern */ } removeRedundantPrefixes(vg); @@ -2923,6 +2922,17 @@ bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, renumber_vertices(vg); calcVertexOffsets(vg); + return vg; +} + +bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, + bool last_chance, const ReportManager &rm, + const CompileContext &cc) { + auto vg = doInitialVioletTransform(h, cc); + if (num_vertices(vg) <= 2) { + return false; + } + /* Step 5: avoid unimplementable, or overly large engines if possible */ if (!ensureImplementable(rose, vg, last_chance, last_chance, rm, cc)) { return false; @@ -2935,4 +2945,16 @@ bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, return rv; } +bool checkViolet(const ReportManager &rm, const NGHolder &h, bool prefilter, + const CompileContext &cc) { + auto vg = doInitialVioletTransform(h, cc); + if (num_vertices(vg) <= 2) { + return false; + } + + bool rv = roseCheckRose(vg, prefilter, rm, cc); + DEBUG_PRINTF("violet: %s\n", rv ? "success" : "fail"); + return rv; +} + } diff --git a/src/nfagraph/ng_violet.h b/src/nfagraph/ng_violet.h index b6ecd028..3fe57dbf 100644 --- a/src/nfagraph/ng_violet.h +++ b/src/nfagraph/ng_violet.h @@ -53,6 +53,13 @@ bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, bool final_chance, const ReportManager &rm, const CompileContext &cc); + +/** \brief True if the pattern in \a h is consumable by Rose/Violet. This + * function may be conservative (return false even if supported) for + * efficiency. */ +bool checkViolet(const ReportManager &rm, const NGHolder &h, bool prefilter, + const CompileContext &cc); + } // namespace ue2 #endif From c32d7d51d9586f439efbc7e70b7ee1c8a4c0a87b Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 5 Jan 2017 12:35:32 +1100 Subject: [PATCH 048/326] remove ng_rose --- CMakeLists.txt | 2 - src/grey.cpp | 7 - src/grey.h | 2 - src/nfagraph/ng.cpp | 24 - src/nfagraph/ng_rose.cpp | 2911 ----------------------------------- src/nfagraph/ng_rose.h | 68 - src/nfagraph/ng_violet.cpp | 1 - src/rose/rose_build_add.cpp | 1 - 8 files changed, 3016 deletions(-) delete mode 100644 src/nfagraph/ng_rose.cpp delete mode 100644 src/nfagraph/ng_rose.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 8fe82a70..f5d29642 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -820,8 +820,6 @@ SET (hs_SRCS src/nfagraph/ng_restructuring.h src/nfagraph/ng_revacc.cpp src/nfagraph/ng_revacc.h - src/nfagraph/ng_rose.cpp - src/nfagraph/ng_rose.h src/nfagraph/ng_sep.cpp src/nfagraph/ng_sep.h src/nfagraph/ng_small_literal_set.cpp diff --git a/src/grey.cpp b/src/grey.cpp index f0374b6d..8881666e 100644 --- a/src/grey.cpp +++ b/src/grey.cpp @@ -54,7 +54,6 @@ Grey::Grey(void) : allowMcSheng(true), allowPuff(true), allowLiteral(true), - allowRose(true), allowViolet(true), allowExtendedNFA(true), /* bounded repeats of course */ allowLimExNFA(true), @@ -114,7 +113,6 @@ Grey::Grey(void) : roseMcClellanSuffix(1), roseMcClellanOutfix(2), roseTransformDelay(true), - roseDesiredSplit(4), earlyMcClellanPrefix(true), earlyMcClellanInfix(true), earlyMcClellanSuffix(true), @@ -219,7 +217,6 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(allowMcSheng); G_UPDATE(allowPuff); G_UPDATE(allowLiteral); - G_UPDATE(allowRose); G_UPDATE(allowViolet); G_UPDATE(allowExtendedNFA); G_UPDATE(allowLimExNFA); @@ -279,7 +276,6 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(roseMcClellanSuffix); G_UPDATE(roseMcClellanOutfix); G_UPDATE(roseTransformDelay); - G_UPDATE(roseDesiredSplit); G_UPDATE(earlyMcClellanPrefix); G_UPDATE(earlyMcClellanInfix); G_UPDATE(earlyMcClellanSuffix); @@ -336,7 +332,6 @@ void applyGreyOverrides(Grey *g, const string &s) { g->allowMcClellan = false; g->allowPuff = false; g->allowLiteral = false; - g->allowRose = false; g->allowViolet = false; g->allowSmallLiteralSet = false; g->roseMasks = false; @@ -354,7 +349,6 @@ void applyGreyOverrides(Grey *g, const string &s) { g->allowMcClellan = true; g->allowPuff = false; g->allowLiteral = false; - g->allowRose = false; g->allowViolet = false; g->allowSmallLiteralSet = false; g->roseMasks = false; @@ -372,7 +366,6 @@ void applyGreyOverrides(Grey *g, const string &s) { g->allowMcClellan = true; g->allowPuff = false; g->allowLiteral = false; - g->allowRose = false; g->allowViolet = false; g->allowSmallLiteralSet = false; g->roseMasks = false; diff --git a/src/grey.h b/src/grey.h index 7a6a168b..17d82527 100644 --- a/src/grey.h +++ b/src/grey.h @@ -54,7 +54,6 @@ struct Grey { bool allowMcSheng; bool allowPuff; bool allowLiteral; - bool allowRose; bool allowViolet; bool allowExtendedNFA; bool allowLimExNFA; @@ -128,7 +127,6 @@ struct Grey { * always */ u32 roseMcClellanOutfix; /* 0 = off, 1 = sometimes, 2 = almost always */ bool roseTransformDelay; - u32 roseDesiredSplit; bool earlyMcClellanPrefix; bool earlyMcClellanInfix; diff --git a/src/nfagraph/ng.cpp b/src/nfagraph/ng.cpp index a4f86fee..e1f29318 100644 --- a/src/nfagraph/ng.cpp +++ b/src/nfagraph/ng.cpp @@ -52,7 +52,6 @@ #include "ng_region.h" #include "ng_region_redundancy.h" #include "ng_reports.h" -#include "ng_rose.h" #include "ng_sep.h" #include "ng_small_literal_set.h" #include "ng_som.h" @@ -255,10 +254,6 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, return true; } - if (splitOffRose(*ng.rose, g, w.prefilter, ng.rm, cc)) { - return true; - } - if (splitOffPuffs(*ng.rose, ng.rm, g, w.prefilter, cc)) { return true; } @@ -276,25 +271,6 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, return true; } - if (splitOffRose(*ng.rose, g, w.prefilter, ng.rm, cc)) { - return true; - } - - // A final pass at cyclic redundancy and Rose - // TODO: investigate - coverage results suggest that this never succeeds? - if (cc.grey.performGraphSimplification) { - if (removeCyclicPathRedundancy(g) || - removeCyclicDominated(g, som)) { - if (handleFixedWidth(*ng.rose, g, cc.grey)) { - return true; - } - } - } - - if (finalChanceRose(*ng.rose, g, w.prefilter, ng.rm, cc)) { - return true; - } - DEBUG_PRINTF("testing for outfix\n"); assert(allMatchStatesHaveReports(g)); if (ng.rose->addOutfix(g)) { diff --git a/src/nfagraph/ng_rose.cpp b/src/nfagraph/ng_rose.cpp deleted file mode 100644 index d24c3392..00000000 --- a/src/nfagraph/ng_rose.cpp +++ /dev/null @@ -1,2911 +0,0 @@ -/* - * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Rose construction from NGHolder. - */ - -// #define DEBUG -// #define DEBUG_ROSE -#include "ng_rose.h" - -#include "grey.h" -#include "ng_depth.h" -#include "ng_dominators.h" -#include "ng_equivalence.h" -#include "ng_holder.h" -#include "ng_is_equal.h" -#include "ng_literal_analysis.h" -#include "ng_netflow.h" -#include "ng_prune.h" -#include "ng_redundancy.h" -#include "ng_region.h" -#include "ng_reports.h" -#include "ng_split.h" -#include "ng_util.h" -#include "ng_violet.h" -#include "ng_width.h" -#include "rose/rose_build.h" -#include "rose/rose_build_util.h" -#include "rose/rose_in_dump.h" -#include "rose/rose_in_graph.h" -#include "rose/rose_in_util.h" -#include "util/compare.h" -#include "util/compile_context.h" -#include "util/container.h" -#include "util/graph.h" -#include "util/graph_range.h" -#include "util/make_unique.h" -#include "util/order_check.h" -#include "util/ue2string.h" -#include "util/ue2_containers.h" - -#include -#include -#include -#include - -#define NDEBUG_PRINTF(x, ...) \ - do { if (0) { DEBUG_PRINTF(x, ## __VA_ARGS__); } } while (0) - -using namespace std; - -namespace ue2 { - -/** - * Maps vertices in the original graph to vertices on edge graphs. Each edge - * graph should contain at most one copy of the vertex. Multiple images for a - * vertex arise after we split on multiple literals - in this cases all edges - * should share a common graph. - * - * If, when an edge is split, a vertex ends up in both the LHS and RHS then only - * the LHS is tracked. This is because in general we want to simplify the LHS - * and allow complexity to be pushed further back. - */ -typedef ue2::unordered_map > > - vdest_map_t; - -typedef ue2::unordered_map > vsrc_map_t; - -/** - * \brief Maximum width of the character class usable as an escape class. - */ -static const u32 MAX_ESCAPE_CHARS = 20; - -static -u32 maxDelay(const CompileContext &cc) { - if (!cc.streaming) { - return MO_INVALID_IDX; - } - return cc.grey.maxHistoryAvailable; -} - -static -bool createsAnchoredLHS(const NGHolder &g, const vector &vv, - const vector &depths, - const Grey &grey, depth max_depth = depth::infinity()) { - max_depth = min(max_depth, depth(grey.maxAnchoredRegion)); - - for (auto v : vv) { - /* avoid issues of self loops blowing out depths: - * look at preds, add 1 */ - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (u == v) { - continue; - } - - u32 idx = g[u].index; - assert(idx < depths.size()); - if (maxDistFromStartOfData(depths.at(idx)) >= max_depth) { - return false; - } - } - } - return true; -} - -static -bool createsTransientLHS(const NGHolder &g, const vector &vv, - const vector &depths, - const Grey &grey) { - const depth max_depth(grey.maxHistoryAvailable); - - for (auto v : vv) { - /* avoid issues of self loops blowing out depths: - * look at preds, add 1 */ - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (u == v) { - continue; - } - - u32 idx = g[u].index; - assert(idx < depths.size()); - if (maxDistFromInit(depths.at(idx)) >= max_depth) { - return false; - } - } - } - return true; -} - -static -bool isLHSUsablyAnchored(const NGHolder &g, - const vector &depths, - const Grey &grey) { - assert(in_degree(g.acceptEod, g) == 1); - - vector accepts; - insert(&accepts, accepts.end(), inv_adjacent_vertices(g.accept, g)); - - bool rv = createsAnchoredLHS(g, accepts, depths, grey); - DEBUG_PRINTF("lhs is %susably anchored\n", rv ? "" : "not "); - return rv; -} - -static -bool isLHSTransient(const NGHolder &g, - const vector &depths, - const Grey &grey) { - assert(in_degree(g.acceptEod, g) == 1); - - vector accepts; - insert(&accepts, accepts.end(), inv_adjacent_vertices(g.accept, g)); - - bool rv = createsTransientLHS(g, accepts, depths, grey); - DEBUG_PRINTF("lhs is %stransient\n", rv ? "" : "not "); - return rv; -} - -namespace { - -/** - * Information on a cut: vertices and literals. - */ -struct VertLitInfo { - VertLitInfo(NFAVertex v, const set &litlit) - : vv(vector(1, v)), lit(litlit) {} - VertLitInfo(const vector &vvvv, const set &litlit) - : vv(vvvv), lit(litlit) {} - vector vv; - set lit; -}; - -/** - * A factory for candidate simple cuts (literals/vertices). - */ -class LitCollection : boost::noncopyable { - vector> lits; /**< sorted list of potential cuts */ - const NGHolder &g; /**< graph on which cuts are found */ - const vector &depths; /**< depth information for g */ - const ue2::unordered_map ®ion_map; /**< region map for g */ - - /** Set of vertices to avoid selecting as end vertices for cuts as previous - * cuts overlap them. This is solely to prevent us picking literal sets - * which do not add significant value. */ - ue2::unordered_set poisoned; - - /** Back-edges in g. */ - ue2::unordered_map > back_edges; - - const Grey &grey; - bool seeking_transient; - bool seeking_anchored; - - void poisonLHS(const VertLitInfo &picked); - void poisonLitVerts(const VertLitInfo &picked); - void poisonCandidates(const VertLitInfo &picked); - - friend class LitComparator; - -public: - LitCollection(const NGHolder &g_in, const vector &depths_in, - const ue2::unordered_map ®ion_map_in, - const set &ap, const set &ap_raw, - u32 min_len, bool desperation, const CompileContext &cc, - bool override_literal_quality_check = false); - - /**< Returns the next candidate cut. Cut still needs to be inspected for - * complete envelopment. */ - unique_ptr pickNext(void); -}; - -/** - * \brief Comparator class for sorting LitCollection::lits. - * - * This is separated out from LitCollection itself as passing LitCollection to - * std::sort() would incur a (potentially expensive) copy. - */ -class LitComparator { -public: - explicit LitComparator(const LitCollection &lc_in) : lc(lc_in) {} - bool operator()(const unique_ptr &a, - const unique_ptr &b) const { - assert(a && b); - - if (lc.seeking_anchored) { - bool a_anchored = - createsAnchoredLHS(lc.g, a->vv, lc.depths, lc.grey); - bool b_anchored = - createsAnchoredLHS(lc.g, b->vv, lc.depths, lc.grey); - - if (a_anchored != b_anchored) { - return a_anchored < b_anchored; - } - } - - if (lc.seeking_transient) { - bool a_transient = - createsTransientLHS(lc.g, a->vv, lc.depths, lc.grey); - bool b_transient = - createsTransientLHS(lc.g, b->vv, lc.depths, lc.grey); - - if (a_transient != b_transient) { - return a_transient < b_transient; - } - } - - u64a score_a = scoreSet(a->lit); - u64a score_b = scoreSet(b->lit); - - if (score_a != score_b) { - return score_a > score_b; - } - - /* vertices should only be in one candidate cut */ - assert(a->vv == b->vv || a->vv.front() != b->vv.front()); - return lc.g[a->vv.front()].index > - lc.g[b->vv.front()].index; - } - -private: - const LitCollection &lc; -}; - -static -size_t shorter_than(const set &s, size_t limit) { - size_t count = 0; - - for (const auto &lit : s) { - if (lit.length() < limit) { - count++; - } - } - - return count; -} - -static -u32 min_len(const set &s) { - u32 rv = ~0U; - - for (const auto &lit : s) { - rv = min(rv, (u32)lit.length()); - } - - return rv; -} - -static -u32 max_len(const set &s) { - u32 rv = 0; - - for (const auto &lit : s) { - rv = max(rv, (u32)lit.length()); - } - - return rv; -} - -static -u32 min_period(const set &s) { - u32 rv = ~0U; - - for (const auto &lit : s) { - rv = min(rv, (u32)minStringPeriod(lit)); - } - DEBUG_PRINTF("min period %u\n", rv); - return rv; -} - -static -bool validateRoseLiteralSetQuality(const set &s, u64a score, - u32 min_allowed_len, bool desperation, - bool override_literal_quality_check) { - if (!override_literal_quality_check && score >= NO_LITERAL_AT_EDGE_SCORE) { - DEBUG_PRINTF("candidate is too bad %llu/%zu\n", score, s.size()); - return false; - } - - assert(!s.empty()); - if (s.empty()) { - DEBUG_PRINTF("candidate is too bad/something went wrong\n"); - return false; - } - - u32 s_min_len = min_len(s); - u32 s_min_period = min_period(s); - size_t short_count = shorter_than(s, 5); - - DEBUG_PRINTF("cand '%s': score %llu count=%zu min_len=%u min_period=%u" - " short_count=%zu desp=%d\n", - dumpString(*s.begin()).c_str(), score, s.size(), s_min_len, - s_min_period, short_count, (int)desperation); - - bool ok = true; - - if (s.size() > 10 /* magic number is magic */ - || s_min_len < min_allowed_len - || (s_min_period <= 1 && !override_literal_quality_check - && min_allowed_len != 1)) { - ok = false; - } - - if (!ok && desperation - && s.size() <= 20 /* more magic numbers are magical */ - && (s_min_len > 5 || (s_min_len > 2 && short_count <= 10)) - && s_min_period > 1) { - DEBUG_PRINTF("candidate is ok\n"); - ok = true; - } - - if (!ok && desperation - && s.size() <= 50 /* more magic numbers are magical */ - && s_min_len > 10 - && s_min_period > 1) { - DEBUG_PRINTF("candidate is ok\n"); - ok = true; - } - - if (!ok) { - DEBUG_PRINTF("candidate is too bad\n"); - return false; - } - - return true; -} - -static UNUSED -void dumpRoseLiteralSet(const set &s) { - for (UNUSED const auto &lit : s) { - DEBUG_PRINTF(" lit: %s\n", dumpString(lit).c_str()); - } -} - -static -void getSimpleRoseLiterals(const NGHolder &g, const set &a_dom, - vector> *lits, - u32 min_allowed_len, bool desperation, - bool override_literal_quality_check) { - map scores; - map> lit_info; - set s; - - for (auto v : a_dom) { - s = getLiteralSet(g, v, true); /* RHS will take responsibility for any - revisits to the target vertex */ - - if (s.empty()) { - DEBUG_PRINTF("candidate is too bad\n"); - continue; - } - - DEBUG_PRINTF("|candidate raw literal set| = %zu\n", s.size()); - dumpRoseLiteralSet(s); - u64a score = compressAndScore(s); - - if (!validateRoseLiteralSetQuality(s, score, min_allowed_len, - desperation, - override_literal_quality_check)) { - continue; - } - - DEBUG_PRINTF("candidate is a candidate\n"); - scores[v] = score; - lit_info.insert(make_pair(v, ue2::make_unique(v, s))); - } - - /* try to filter out cases where appending some characters produces worse - * literals. Only bother to look back one byte, TODO make better */ - for (auto u : a_dom) { - if (out_degree(u, g) != 1 || !scores[u]) { - continue; - } - NFAVertex v = *adjacent_vertices(u, g).first; - if (contains(scores, v) && scores[v] >= scores[u]) { - DEBUG_PRINTF("killing off v as score %llu >= %llu\n", - scores[v], scores[u]); - lit_info.erase(v); - } - } - - lits->reserve(lit_info.size()); - for (auto &m : lit_info) { - lits->push_back(move(m.second)); - } - DEBUG_PRINTF("%zu candidate literal sets\n", lits->size()); -} - -static -void getRegionRoseLiterals(const NGHolder &g, - const ue2::unordered_map ®ion_map, - const set &a_dom_raw, - vector> *lits, - u32 min_allowed_len, bool desperation, - bool override_literal_quality_check) { - /* This allows us to get more places to chop the graph as we are not limited - to points where there is a single vertex to split. */ - - /* TODO: operate over 'proto-regions' which ignore back edges */ - - set mand, optional; - map > exits; - - for (auto v : vertices_range(g)) { - assert(contains(region_map, v)); - const u32 region = region_map.at(v); - - if (is_any_start(v, g) || region == 0) { - continue; - } - - if (is_any_accept(v, g)) { - continue; - } - - if (isRegionExit(g, v, region_map)) { - exits[region].push_back(v); - } - - if (isRegionEntry(g, v, region_map)) { - // Determine whether this region is mandatory or optional. We only - // need to do this check for the first entry vertex we encounter - // for this region. - if (!contains(mand, region) && !contains(optional, region)) { - if (isOptionalRegion(g, v, region_map)) { - optional.insert(region); - } else { - mand.insert(region); - } - } - } - } - - for (const auto &m : exits) { - if (0) { - next_cand: - continue; - } - - const u32 region = m.first; - const vector &vv = m.second; - assert(!vv.empty()); - - if (!contains(mand, region)) { - continue; - } - - for (auto v : vv) { - /* if an exit is in a_dom_raw, the region is already handled well - * by getSimpleRoseLiterals */ - if (contains(a_dom_raw, v)) { - goto next_cand; - } - } - - /* the final region may not have a neat exit. validate that all exits - * have an edge to each accept or none do */ - bool edge_to_a = edge(vv[0], g.accept, g).second; - bool edge_to_aeod = edge(vv[0], g.acceptEod, g).second; - const auto &reports = g[vv[0]].reports; - for (auto v : vv) { - if (edge_to_a != edge(v, g.accept, g).second) { - goto next_cand; - } - - if (edge_to_aeod != edge(v, g.acceptEod, g).second) { - goto next_cand; - } - - if (g[v].reports != reports) { - goto next_cand; - } - } - - DEBUG_PRINTF("inspecting region %u\n", region); - set s; - for (auto v : vv) { - DEBUG_PRINTF(" exit vertex: %zu\n", g[v].index); - /* Note: RHS can not be depended on to take all subsequent revisits - * to this vertex */ - set ss = getLiteralSet(g, v, false); - if (ss.empty()) { - DEBUG_PRINTF("candidate is too bad\n"); - goto next_cand; - } - insert(&s, ss); - } - - assert(!s.empty()); - - DEBUG_PRINTF("|candidate raw literal set| = %zu\n", s.size()); - dumpRoseLiteralSet(s); - u64a score = compressAndScore(s); - DEBUG_PRINTF("|candidate literal set| = %zu\n", s.size()); - dumpRoseLiteralSet(s); - - if (!validateRoseLiteralSetQuality(s, score, min_allowed_len, - desperation, - override_literal_quality_check)) { - continue; - } - - DEBUG_PRINTF("candidate is a candidate\n"); - lits->push_back(ue2::make_unique(vv, s)); - } -} - -static -void gatherBackEdges(const NGHolder &g, - ue2::unordered_map> *out) { - set backEdges; - BackEdges> be(backEdges); - depth_first_search(g, visitor(be).root_vertex(g.start)); - - for (const auto &e : backEdges) { - (*out)[source(e, g)].push_back(target(e, g)); - } -} - -LitCollection::LitCollection(const NGHolder &g_in, - const vector &depths_in, - const ue2::unordered_map ®ion_map_in, - const set &a_dom, - const set &a_dom_raw, u32 min_len, - bool desperation, const CompileContext &cc, - bool override_literal_quality_check) - : g(g_in), depths(depths_in), region_map(region_map_in), grey(cc.grey), - seeking_transient(cc.streaming), seeking_anchored(true) { - getSimpleRoseLiterals(g, a_dom, &lits, min_len, desperation, - override_literal_quality_check); - getRegionRoseLiterals(g, region_map, a_dom_raw, &lits, min_len, desperation, - override_literal_quality_check); - DEBUG_PRINTF("lit coll is looking for a%d t%d\n", (int)seeking_anchored, - (int)seeking_transient); - DEBUG_PRINTF("we have %zu candidate literal splits\n", lits.size()); - sort(lits.begin(), lits.end(), LitComparator(*this)); - gatherBackEdges(g, &back_edges); -} - -void LitCollection::poisonLHS(const VertLitInfo &picked) { - DEBUG_PRINTF("found anchored %d transient %d\n", - (int)createsAnchoredLHS(g, picked.vv, depths, grey), - (int)createsTransientLHS(g, picked.vv, depths, grey)); - set curr; - set next; - - insert(&curr, picked.vv); - - while (!curr.empty()) { - insert(&poisoned, curr); - next.clear(); - for (auto v : curr) { - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (!is_special(u, g) && !contains(poisoned, u)) { - next.insert(u); - } - } - } - - curr.swap(next); - } - - seeking_transient = false; - seeking_anchored = false; - - /* reprioritise cuts now that the LHS is taken care off */ - sort(lits.begin(), lits.end(), LitComparator(*this)); -} - -static -void flood_back(const NGHolder &g, u32 len, const set &initial, - set *visited) { - vector curr; - vector next; - - insert(&curr, curr.end(), initial); - - insert(visited, initial); - - /* bfs: flood back len vertices */ - for (u32 i = 1; i < len; i++) { - next.clear(); - DEBUG_PRINTF("poison %u/%u: curr %zu\n", i, len, curr.size()); - - for (auto v : curr) { - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (!contains(*visited, u)) { - next.push_back(u); - visited->insert(u); - } - } - } - - next.swap(curr); - } -} - -/** - * Add vertices near a picked literal to the poison set unless it looks - * like they may still add value (ie they are on they other side of cycle). - */ -void LitCollection::poisonLitVerts(const VertLitInfo &picked) { - DEBUG_PRINTF("poisoning vertices associated with picked literals\n"); - - u32 len = max_len(picked.lit); - - /* poison vertices behind */ - - set starters; - insert(&starters, picked.vv); - - set visited; - - flood_back(g, len, starters, &visited); - - DEBUG_PRINTF("flood %zu vertices\n", visited.size()); - - /* inspect any back edges which are in the flooded subgraph; look for any - * destination vertices which are not starters */ - set anti; - for (auto u : visited) { - if (!contains(back_edges, u) || contains(starters, u)) { - continue; - } - - for (auto v : back_edges[u]) { - if (contains(visited, v) && !contains(starters, v)) { - anti.insert(v); - } - } - } - DEBUG_PRINTF("%zu cycle ends\n", visited.size()); - - /* remove any vertices which lie on the other side of a cycle from the - * visited set */ - set anti_pred; - flood_back(g, len - 1, anti, &anti_pred); - - DEBUG_PRINTF("flood visited %zu vertices; anti %zu\n", visited.size(), - anti_pred.size()); - - erase_all(&visited, anti_pred); - - DEBUG_PRINTF("filtered flood visited %zu vertices\n", visited.size()); - - insert(&poisoned, visited); - - insert(&poisoned, starters); /* complicated back loops can result in start - vertices being removed from the visited - set */ - - for (UNUSED auto v : picked.vv) { - assert(contains(poisoned, v)); - } - - /* TODO: poison vertices in front of us? */ -} - -void LitCollection::poisonCandidates(const VertLitInfo &picked) { - assert(!picked.lit.empty()); - if (picked.lit.empty()) { - return; - } - - if ((seeking_anchored && createsAnchoredLHS(g, picked.vv, depths, grey)) - || (seeking_transient && createsTransientLHS(g, picked.vv, depths, grey))) { - /* We don't want to pick anything to the LHS of picked.v any more as we - * have something good. We also don't want to provide any bonus for - * remaining literals based on anchoredness/transientness of the lhs. - */ - poisonLHS(picked); - } else { - poisonLitVerts(picked); - } -} - -unique_ptr LitCollection::pickNext() { - while (!lits.empty()) { - if (0) { - next_lit: - continue; - } - - for (auto v : lits.back()->vv) { - if (contains(poisoned, v)) { - DEBUG_PRINTF("skipping '%s' as overlapped\n", - dumpString(*(lits.back()->lit.begin())).c_str()); - lits.pop_back(); - goto next_lit; - } - } - - unique_ptr rv = move(lits.back()); - lits.pop_back(); - poisonCandidates(*rv); - DEBUG_PRINTF("best is '%s' %zu a%d t%d\n", - dumpString(*(rv->lit.begin())).c_str(), - g[rv->vv.front()].index, - (int)createsAnchoredLHS(g, rv->vv, depths, grey), - (int)createsTransientLHS(g, rv->vv, depths, grey)); - - return rv; - } - - return nullptr; -} - -} - -static -bool can_match(const NGHolder &g, const ue2_literal &lit, bool overhang_ok) { - set curr, next; - curr.insert(g.accept); - - for (auto it = lit.rbegin(); it != lit.rend(); ++it) { - next.clear(); - - for (auto v : curr) { - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (u == g.start) { - if (overhang_ok) { - DEBUG_PRINTF("bail\n"); - return true; - } else { - continue; /* it is not possible for a lhs literal to - * overhang the start */ - } - } - - const CharReach &cr = g[u].char_reach; - if (!overlaps(*it, cr)) { - DEBUG_PRINTF("skip\n"); - continue; - } - - next.insert(u); - } - } - - curr.swap(next); - } - - return !curr.empty(); -} - -static -void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, - u32 delay, const vector &preds) { - assert(delay <= lit.length()); - assert(isCorrectlyTopped(g)); - DEBUG_PRINTF("adding on '%s' %u\n", dumpString(lit).c_str(), delay); - - NFAVertex prev = g.accept; - auto it = lit.rbegin(); - while (delay--) { - NFAVertex curr = add_vertex(g); - assert(it != lit.rend()); - g[curr].char_reach = *it; - add_edge(curr, prev, g); - ++it; - prev = curr; - } - - for (auto v : preds) { - NFAEdge e = add_edge(v, prev, g); - if (v == g.start && is_triggered(g)) { - g[e].tops.insert(DEFAULT_TOP); - } - } - - // Every predecessor of accept must have a report. - for (auto u : inv_adjacent_vertices_range(g.accept, g)) { - g[u].reports.insert(0); - } - - renumber_vertices(g); - renumber_edges(g); - assert(allMatchStatesHaveReports(g)); - assert(isCorrectlyTopped(g)); -} - -static -void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, - u32 delay) { - vector preds; - insert(&preds, preds.end(), inv_adjacent_vertices(g.accept, g)); - clear_in_edges(g.accept, g); - - for (auto v : preds) { - g[v].reports.clear(); /* clear report from old accepts */ - } - - restoreTrailingLiteralStates(g, lit, delay, preds); -} - -/* return false if we should get rid of the edge altogether */ -static -bool removeLiteralFromLHS(RoseInGraph &ig, const RoseInEdge &lhs, - const CompileContext &cc) { - unique_ptr h = cloneHolder(*ig[lhs].graph); - NGHolder &g = *h; - assert(ig[target(lhs, ig)].type == RIV_LITERAL); - const ue2_literal &lit = ig[target(lhs, ig)].s; - - /* lhs should be connected to a start */ - assert(ig[source(lhs, ig)].type == RIV_START - || ig[source(lhs, ig)].type == RIV_ANCHORED_START); - - if (in_degree(g.acceptEod, g) != 1 /* edge from accept */) { - assert(0); - return true; - } - if (lit.empty()) { - assert(0); - return true; - } - - const u32 max_delay = maxDelay(cc); - - // In streaming mode, we must limit the depth to the available history - // UNLESS the given literal follows start or startDs and has nothing - // before it that we will need to account for. In that case, we can - // lean on FDR's support for long literals. - if (literalIsWholeGraph(g, lit)) { - assert(!ig[lhs].haig); - assert(ig[lhs].minBound == 0); - assert(ig[lhs].maxBound == ROSE_BOUND_INF); - DEBUG_PRINTF("literal is the whole graph\n"); - - u32 delay = removeTrailingLiteralStates(g, lit, MO_INVALID_IDX, false); - assert(delay == lit.length()); - ig[lhs].graph = move(h); - ig[lhs].graph_lag = delay; - return true; - } - - if (!can_match(g, lit, false)) { - /* This is can happen if the literal arises from a large cyclic - to/beyond the pivot. As the LHS graph only cares about the first - reach of the pivot, this literal is junk */ - DEBUG_PRINTF("bogus edge\n"); - return false; - } - - u32 delay = removeTrailingLiteralStates(g, lit, max_delay, - false /* can't overhang start */); - - if (delay == MO_INVALID_IDX) { - /* This is can happen if the literal arises from a large cyclic - to/beyond the pivot. As the LHS graph only cares about the first - reach of the pivot, this literal is junk */ - DEBUG_PRINTF("bogus edge\n"); - return false; - } - - if (!delay) { - return true; - } - - DEBUG_PRINTF("setting delay %u on lhs %p\n", delay, h.get()); - - ig[lhs].graph = move(h); - ig[lhs].graph_lag = delay; - return true; -} - -static -void handleLhsCliche(RoseInGraph &ig, const RoseInEdge &lhs) { - const NGHolder &h = *ig[lhs].graph; - - size_t s_od = out_degree(h.start, h); - size_t sds_od = out_degree(h.startDs, h); - - assert(in_degree(h.acceptEod, h) == 1 /* edge from accept */); - /* need to check if simple floating start */ - if (edge(h.startDs, h.accept, h).second && sds_od == 2 - && ((s_od == 2 && edge(h.start, h.accept, h).second) || s_od == 1)) { - /* no need for graph */ - ig[lhs].graph.reset(); - ig[lhs].graph_lag = 0; - DEBUG_PRINTF("lhs is floating start\n"); - return; - } - - /* need to check if a simple anchor */ - /* start would have edges to sds and accept in this case */ - if (edge(h.start, h.accept, h).second && s_od == 2 && sds_od == 1) { - if (ig[source(lhs, ig)].type == RIV_ANCHORED_START) { - // assert(ig[lhs].graph_lag == ig[target(lhs, ig)].s.length()); - if (ig[lhs].graph_lag != ig[target(lhs, ig)].s.length()) { - DEBUG_PRINTF("oddness\n"); - return; - } - ig[lhs].graph.reset(); - ig[lhs].graph_lag = 0; - ig[lhs].maxBound = 0; - DEBUG_PRINTF("lhs is anchored start\n"); - } else { - DEBUG_PRINTF("lhs rewiring start\n"); - assert(ig[source(lhs, ig)].type == RIV_START); - RoseInVertex t = target(lhs, ig); - remove_edge(lhs, ig); - RoseInVertex s2 - = add_vertex(RoseInVertexProps::makeStart(true), ig); - add_edge(s2, t, RoseInEdgeProps(0U, 0U), ig); - } - return; - } -} - -static -void filterCandPivots(const NGHolder &g, const set &cand_raw, - set *out) { - for (auto u : cand_raw) { - const CharReach &u_cr = g[u].char_reach; - if (u_cr.count() > 40) { - continue; /* too wide to be plausible */ - } - - if (u_cr.count() > 2) { - /* include u as a candidate as successor may have backed away from - * expanding through it */ - out->insert(u); - continue; - } - - NFAVertex v = getSoleDestVertex(g, u); - if (v && in_degree(v, g) == 1 && out_degree(u, g) == 1) { - const CharReach &v_cr = g[v].char_reach; - if (v_cr.count() == 1 || v_cr.isCaselessChar()) { - continue; /* v will always generate better literals */ - } - } - - out->insert(u); - } -} - -/* cand_raw is the candidate set before filtering points which are clearly - * a bad idea. */ -static -void getCandidatePivots(const NGHolder &g, set *cand, - set *cand_raw) { - ue2::unordered_map dominators = - findDominators(g); - - set accepts; - - for (auto v : inv_adjacent_vertices_range(g.accept, g)) { - if (is_special(v, g)) { - continue; - } - accepts.insert(v); - } - for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { - if (is_special(v, g)) { - continue; - } - accepts.insert(v); - } - - assert(!accepts.empty()); - - vector dom_trace; - auto ait = accepts.begin(); - assert(ait != accepts.end()); - NFAVertex curr = *ait; - while (curr && !is_special(curr, g)) { - dom_trace.push_back(curr); - curr = dominators[curr]; - } - reverse(dom_trace.begin(), dom_trace.end()); - for (++ait; ait != accepts.end(); ++ait) { - curr = *ait; - vector dom_trace2; - while (curr && !is_special(curr, g)) { - dom_trace2.push_back(curr); - curr = dominators[curr]; - } - reverse(dom_trace2.begin(), dom_trace2.end()); - auto dti = dom_trace.begin(), dtie = dom_trace.end(); - auto dtj = dom_trace2.begin(), dtje = dom_trace2.end(); - while (dti != dtie && dtj != dtje && *dti == *dtj) { - ++dti; - ++dtj; - } - dom_trace.erase(dti, dtie); - } - - cand_raw->insert(dom_trace.begin(), dom_trace.end()); - - filterCandPivots(g, *cand_raw, cand); -} - -static -void deanchorIfNeeded(NGHolder &g, bool *orig_anch) { - DEBUG_PRINTF("hi\n"); - if (proper_out_degree(g.startDs, g)) { - return; - } - - /* look for a non-special dot with a loop following start */ - set succ_g; - insert(&succ_g, adjacent_vertices(g.start, g)); - succ_g.erase(g.startDs); - - for (auto v : adjacent_vertices_range(g.start, g)) { - DEBUG_PRINTF("inspecting cand %zu || =%zu\n", g[v].index, - g[v].char_reach.size()); - - if (v == g.startDs || !g[v].char_reach.all()) { - continue; - } - - set succ_v; - insert(&succ_v, adjacent_vertices(v, g)); - - if (succ_v == succ_g) { - DEBUG_PRINTF("found ^.*\n"); - *orig_anch = true; - for (auto succ : succ_g) { - add_edge(g.startDs, succ, g); - } - clear_vertex(v, g); - remove_vertex(v, g); - renumber_vertices(g); - return; - } - - if (succ_g.size() == 1 && hasSelfLoop(v, g)) { - DEBUG_PRINTF("found ^.+\n"); - *orig_anch = true; - add_edge(g.startDs, v, g); - remove_edge(v, v, g); - return; - } - } -} - -static -unique_ptr makeTrivialGraph(const NGHolder &h, - vdest_map_t &v_dest_map, - vsrc_map_t &v_src_map) { - shared_ptr root_g = cloneHolder(h); - bool orig_anch = isAnchored(*root_g); - deanchorIfNeeded(*root_g, &orig_anch); - - DEBUG_PRINTF("orig_anch %d\n", (int)orig_anch); - - unique_ptr igp = ue2::make_unique(); - RoseInVertex start = - add_vertex(RoseInVertexProps::makeStart(orig_anch), *igp); - RoseInVertex accept = - add_vertex(RoseInVertexProps::makeAccept(set()), *igp); - - RoseInEdge e = - add_edge(start, accept, RoseInEdgeProps(root_g, 0), *igp).first; - - for (auto v : vertices_range(*root_g)) { - v_dest_map[v].emplace_back(e, v); - v_src_map[e].push_back(v); - } - - return igp; -} - -static never_inline -void updateVDestMap(const vector > &images, - const ue2::unordered_map &lhs_map, - const vector &l_e, - const ue2::unordered_map &rhs_map, - const vector &r_e, - vdest_map_t &v_dest_map, vsrc_map_t &v_src_map) { - RoseInEdge e = images.front().first; - set edge_set; - for (const auto &image : images) { - edge_set.insert(image.first); - } - const vector &domain = v_src_map[e]; - vector > temp; - - for (auto v : domain) { - vdest_map_t::iterator it = v_dest_map.find(v); - assert(it != v_dest_map.end()); - - temp.clear(); - - for (const auto &dest : it->second) { - const RoseInEdge &old_e = dest.first; - const NFAVertex old_dest = dest.second; - if (old_e != e) { - if (!contains(edge_set, old_e)) { - temp.emplace_back(old_e, old_dest); - } - } else if (contains(lhs_map, old_dest)) { - for (const auto &e2 : l_e) { - temp.emplace_back(e2, lhs_map.at(old_dest)); - } - /* only allow v to be tracked on one side of the split */ - } else if (contains(rhs_map, old_dest)) { - for (const auto &e2 : r_e) { - temp.emplace_back(e2, rhs_map.at(old_dest)); - } - } - } - NDEBUG_PRINTF("%zu images for vertex; prev %zu\n", temp.size(), - it->second.size()); - it->second.swap(temp); - } -} - -/** Returns the collection of vertices from the original graph which end up - * having an image in the [lr]hs side of the graph split. */ -static never_inline -void fillDomain(const vdest_map_t &v_dest_map, const vsrc_map_t &v_src_map, - RoseInEdge e, - const ue2::unordered_map &split_map, - vector *out) { - const vector &presplit_domain = v_src_map.at(e); - for (auto v : presplit_domain) { - /* v is in the original graph, need to find its image on e's graph */ - typedef vector > dests_t; - const dests_t &dests = v_dest_map.at(v); - for (const auto &dest : dests) { - if (dest.first == e) { - NFAVertex vv = dest.second; - /* vv is v image on e's graph */ - if (contains(split_map, vv)) { - out->push_back(v); - } - } - } - } -} - -static -void getSourceVerts(RoseInGraph &ig, - const vector > &images, - vector *out) { - set seen; - for (const auto &image : images) { - RoseInVertex s = source(image.first, ig); - if (contains(seen, s)) { - continue; - } - seen.insert(s); - out->push_back(s); - } -} - -static -void getDestVerts(RoseInGraph &ig, - const vector > &images, - vector *out) { - set seen; - for (const auto &image : images) { - RoseInVertex t = target(image.first, ig); - if (contains(seen, t)) { - continue; - } - seen.insert(t); - out->push_back(t); - } -} - -static -void getSourceVerts(RoseInGraph &ig, const vector &edges, - vector *out) { - set seen; - for (const auto &e : edges) { - RoseInVertex s = source(e, ig); - if (contains(seen, s)) { - continue; - } - seen.insert(s); - out->push_back(s); - } -} - -static -void getDestVerts(RoseInGraph &ig, const vector &edges, - vector *out) { - set seen; - for (const auto &e : edges) { - RoseInVertex t = target(e, ig); - if (contains(seen, t)) { - continue; - } - seen.insert(t); - out->push_back(t); - } -} - -static -bool splitRoseEdge(RoseInGraph &ig, const VertLitInfo &split, - vdest_map_t &v_dest_map, vsrc_map_t &v_src_map) { - const vector &root_splitters = split.vv; /* vertices in the - 'root' graph */ - assert(!root_splitters.empty()); - - /* need copy as split rose edge will update orig map */ - vector > images - = v_dest_map[root_splitters[0]]; - DEBUG_PRINTF("splitting %zu rose edge with %zu literals\n", - images.size(), split.lit.size()); - - /* note: as we haven't removed literals yet the graphs on all edges that we - * are going to split should be identical */ - const auto &base_graph = ig[images.front().first].graph; - - vector splitters; /* vertices in the graph being split */ - for (auto v : root_splitters) { - if (!contains(v_dest_map, v)) { - DEBUG_PRINTF("vertex to split on is no longer in the graph\n"); - return false; - } - - /* sanity check: verify all edges have the same underlying graph */ - for (UNUSED const auto &m : v_dest_map[v]) { - assert(base_graph == ig[m.first].graph); - } - assert(v_dest_map[v].size() == images.size()); - - splitters.push_back(v_dest_map[v].front().second); - } - - /* note: the set of split edges should form a complete bipartite graph */ - vector src_verts; - vector dest_verts; - getSourceVerts(ig, images, &src_verts); - getDestVerts(ig, images, &dest_verts); - assert(images.size() == src_verts.size() * dest_verts.size()); - - shared_ptr lhs = make_shared(); - shared_ptr rhs = make_shared(); - - ue2::unordered_map lhs_map; - ue2::unordered_map rhs_map; - - assert(base_graph); - splitGraph(*base_graph, splitters, lhs.get(), &lhs_map, - rhs.get(), &rhs_map); - - RoseInEdge first_e = images.front().first; - - /* all will be suffix or none */ - bool suffix = ig[target(first_e, ig)].type == RIV_ACCEPT; - - set splitter_reports; - for (auto v : splitters) { - insert(&splitter_reports, (*base_graph)[v].reports); - } - - bool do_accept = false; - bool do_accept_eod = false; - assert(rhs); - if (isVacuous(*rhs) && suffix) { - if (edge(rhs->start, rhs->accept, *rhs).second) { - DEBUG_PRINTF("rhs has a cliche\n"); - do_accept = true; - remove_edge(rhs->start, rhs->accept, *rhs); - } - - if (edge(rhs->start, rhs->acceptEod, *rhs).second) { - DEBUG_PRINTF("rhs has an eod cliche\n"); - do_accept_eod = true; - remove_edge(rhs->start, rhs->acceptEod, *rhs); - } - } - - bool do_norm = out_degree(rhs->start, *rhs) != 1; /* check if we still have - a graph left over */ - vector lhs_domain; - vector rhs_domain; - fillDomain(v_dest_map, v_src_map, first_e, lhs_map, &lhs_domain); - fillDomain(v_dest_map, v_src_map, first_e, rhs_map, &rhs_domain); - - vector l_e; - vector r_e; - for (const auto &lit : split.lit) { - DEBUG_PRINTF("best is '%s'\n", escapeString(lit).c_str()); - RoseInVertex v - = add_vertex(RoseInVertexProps::makeLiteral(lit), ig); - - /* work out delay later */ - if (do_accept) { - DEBUG_PRINTF("rhs has a cliche\n"); - RoseInVertex tt = add_vertex(RoseInVertexProps::makeAccept( - splitter_reports), ig); - add_edge(v, tt, RoseInEdgeProps(0U, 0U), ig); - } - - if (do_accept_eod) { - DEBUG_PRINTF("rhs has an eod cliche\n"); - RoseInVertex tt = add_vertex(RoseInVertexProps::makeAcceptEod( - splitter_reports), ig); - add_edge(v, tt, RoseInEdgeProps(0U, 0U), ig); - } - - for (auto src_v : src_verts) { - l_e.push_back(add_edge(src_v, v, - RoseInEdgeProps(lhs, 0U), ig).first); - v_src_map[l_e.back()] = lhs_domain; - } - - if (do_norm) { - for (auto dst_v : dest_verts) { - /* work out delay later */ - assert(out_degree(rhs->start, *rhs) > 1); - r_e.push_back( - add_edge(v, dst_v, RoseInEdgeProps(rhs, 0U), ig).first); - v_src_map[r_e.back()] = rhs_domain; - } - } - } - - updateVDestMap(images, lhs_map, l_e, rhs_map, r_e, v_dest_map, v_src_map); - - for (const auto &image : images) { - /* remove old edge */ - remove_edge(image.first, ig); - v_src_map.erase(image.first); - } - - return true; -} - -static -bool isStarCliche(const NGHolder &g) { - DEBUG_PRINTF("checking graph with %zu vertices\n", num_vertices(g)); - - bool nonspecials_seen = false; - - for (auto v : vertices_range(g)) { - if (is_special(v, g)) { - continue; - } - - if (nonspecials_seen) { - return false; - } - nonspecials_seen = true; - - if (!g[v].char_reach.all()) { - return false; - } - - if (!hasSelfLoop(v, g)) { - return false; - } - if (!edge(v, g.accept, g).second) { - return false; - } - } - - if (!nonspecials_seen) { - return false; - } - - if (!edge(g.start, g.accept, g).second) { - return false; - } - - return true; -} - -static -void processInfixes(RoseInGraph &ig, const CompileContext &cc) { - /* we want to ensure that every prefix/infix graph is unique at this stage - * as we have not done any analysis to check if they are safe to share */ - - vector dead; - - for (const auto &e : edges_range(ig)) { - if (!ig[e].graph) { - continue; - } - - RoseInVertex u = source(e, ig), v = target(e, ig); - - // Infixes are edges between two literals. - if (ig[u].type != RIV_LITERAL || ig[v].type != RIV_LITERAL) { - continue; - } - - if (ig[e].graph_lag) { - continue; /* already looked at */ - } - - DEBUG_PRINTF("looking at infix %p\n", ig[e].graph.get()); - - const ue2_literal &lit1 = ig[u].s; - const ue2_literal &lit2 = ig[v].s; - size_t overlap = maxOverlap(lit1, lit2, 0); - - const NGHolder &h = *ig[e].graph; - - DEBUG_PRINTF("infix rose between literals '%s' and '%s', overlap %zu," - "size %zu\n", - dumpString(lit1).c_str(), dumpString(lit2).c_str(), - overlap, num_vertices(h)); - - if (!can_match(h, lit2, true)) { - DEBUG_PRINTF("found bogus edge\n"); - dead.push_back(e); - continue; - } - - unique_ptr h_new = cloneHolder(h); - - u32 delay = removeTrailingLiteralStates(*h_new, lit2, MO_INVALID_IDX); - if (delay == MO_INVALID_IDX) { - DEBUG_PRINTF("found bogus edge\n"); - dead.push_back(e); - continue; - } - - // Delay can be set to at most lit2.length() - overlap, but we must - // truncate to history available in streaming mode. - u32 max_allowed_delay = lit2.length() - overlap; - LIMIT_TO_AT_MOST(&max_allowed_delay, delay); - - if (cc.streaming) { - LIMIT_TO_AT_MOST(&max_allowed_delay, cc.grey.maxHistoryAvailable); - } - - if (delay != max_allowed_delay) { - restoreTrailingLiteralStates(*h_new, lit2, delay); - delay = removeTrailingLiteralStates(*h_new, lit2, max_allowed_delay); - } - - if (isStarCliche(*h_new)) { - DEBUG_PRINTF("is a X star!\n"); - ig[e].graph.reset(); - ig[e].graph_lag = 0; - } else { - ig[e].graph = move(h_new); - ig[e].graph_lag = delay; - DEBUG_PRINTF("delay increased to %u\n", delay); - } - } - - for (const auto &e : dead) { - remove_edge(e, ig); - } -} - -static -void poisonNetflowScores(RoseInGraph &ig, RoseInEdge lhs, - vector *scores) { - assert(ig[lhs].graph); - NGHolder &h = *ig[lhs].graph; - - if (ig[target(lhs, ig)].type != RIV_LITERAL) { - /* nothing to poison in outfixes */ - assert(ig[target(lhs, ig)].type == RIV_ACCEPT); - return; - } - - set curr, next; - insert(&curr, inv_adjacent_vertices(h.accept, h)); - set poisoned; - u32 len = ig[target(lhs, ig)].s.length(); - assert(len); - while (len) { - next.clear(); - for (auto v : curr) { - insert(&poisoned, in_edges(v, h)); - insert(&next, inv_adjacent_vertices(v, h)); - } - - curr.swap(next); - len--; - } - - for (const auto &e : poisoned) { - (*scores)[h[e].index] = NO_LITERAL_AT_EDGE_SCORE; - } -} - -#define MAX_NETFLOW_CUT_WIDTH 40 /* magic number is magic */ -#define MAX_LEN_2_LITERALS_PER_CUT 3 - -static -bool checkValidNetflowLits(NGHolder &h, const vector &scores, - const map> &cut_lits, - const Grey &grey) { - DEBUG_PRINTF("cut width %zu\n", cut_lits.size()); - if (cut_lits.size() > MAX_NETFLOW_CUT_WIDTH) { - return false; - } - - u32 len_2_count = 0; - - for (const auto &cut : cut_lits) { - if (scores[h[cut.first].index] >= NO_LITERAL_AT_EDGE_SCORE) { - DEBUG_PRINTF("cut uses a forbidden edge\n"); - return false; - } - - if (min_len(cut.second) < grey.minRoseNetflowLiteralLength) { - DEBUG_PRINTF("cut uses a bad literal\n"); - return false; - } - - for (const auto &lit : cut.second) { - if (lit.length() == 2) { - len_2_count++; - } - } - } - - if (len_2_count > MAX_LEN_2_LITERALS_PER_CUT) { - return false; - } - - return true; -} - -static -void splitEdgesByCut(RoseInGraph &ig, const vector &to_cut, - const vector &cut, - const map > &cut_lits) { - assert(!to_cut.empty()); - assert(ig[to_cut.front()].graph); - NGHolder &h = *ig[to_cut.front()].graph; - - /* note: the set of split edges should form a complete bipartite graph */ - vector src_verts; - vector dest_verts; - getSourceVerts(ig, to_cut, &src_verts); - getDestVerts(ig, to_cut, &dest_verts); - assert(to_cut.size() == src_verts.size() * dest_verts.size()); - - map, shared_ptr > done_rhs; - - /* iterate over cut for determinism */ - for (const auto &e : cut) { - NFAVertex prev_v = source(e, h); - NFAVertex pivot = target(e, h); - - vector adj; - insert(&adj, adj.end(), adjacent_vertices(pivot, h)); - /* we can ignore presence of accept, accepteod in adj as it is best - effort */ - - if (!contains(done_rhs, adj)) { - ue2::unordered_map temp_map; - shared_ptr new_rhs = make_shared(); - splitRHS(h, adj, new_rhs.get(), &temp_map); - remove_edge(new_rhs->start, new_rhs->accept, *new_rhs); - remove_edge(new_rhs->start, new_rhs->acceptEod, *new_rhs); - done_rhs.insert(make_pair(adj, new_rhs)); - /* TODO need to update v_mapping (if we were doing more cuts) */ - } - - DEBUG_PRINTF("splitting on pivot %zu\n", h[pivot].index); - ue2::unordered_map temp_map; - shared_ptr new_lhs = make_shared(); - splitLHS(h, pivot, new_lhs.get(), &temp_map); - - /* want to cut of paths to pivot from things other than the pivot - - * makes a more svelte graphy */ - clear_in_edges(temp_map[pivot], *new_lhs); - add_edge(temp_map[prev_v], temp_map[pivot], *new_lhs); - - pruneUseless(*new_lhs); - - const set &lits = cut_lits.at(e); - for (const auto &lit : lits) { - RoseInVertex v - = add_vertex(RoseInVertexProps::makeLiteral(lit), ig); - - if (edge(pivot, h.accept, h).second) { - /* literal has a direct connection to accept */ - assert(ig[dest_verts.front()].type == RIV_ACCEPT); - const auto &reports = h[pivot].reports; - RoseInVertex tt = - add_vertex(RoseInVertexProps::makeAccept(reports), ig); - add_edge(v, tt, RoseInEdgeProps(0U, 0U), ig); - } - - if (edge(pivot, h.acceptEod, h).second) { - /* literal has a direct connection to accept */ - assert(ig[dest_verts.front()].type == RIV_ACCEPT); - const auto &reports = h[pivot].reports; - RoseInVertex tt = add_vertex( - RoseInVertexProps::makeAcceptEod(reports), ig); - add_edge(v, tt, RoseInEdgeProps(0U, 0U), ig); - } - - assert(done_rhs[adj].get()); - shared_ptr new_rhs = done_rhs[adj]; - if (out_degree(new_rhs->start, *new_rhs) != 1) { - for (auto dst_v : dest_verts) { - add_edge(v, dst_v, RoseInEdgeProps(done_rhs[adj], 0), ig); - } - } - - for (auto src_v : src_verts) { - add_edge(src_v, v, RoseInEdgeProps(new_lhs, 0), ig); - } - } - } - - /* TODO need to update v_mapping (if we were doing more cuts) */ - - for (const auto &e : to_cut) { - assert(ig[e].graph.get() == &h); - remove_edge(e, ig); - } -} - -static -bool doNetflowCut(RoseInGraph &ig, const vector &to_cut, - const Grey &grey) { - DEBUG_PRINTF("doing netflow cut\n"); - /* TODO: we should really get literals/scores from the full graph as this - * allows us to overlap the graph. Doesn't matter at the moment as we - * are working on the LHS. */ - - NGHolder &h = *ig[to_cut.front()].graph; - if (num_edges(h) > grey.maxRoseNetflowEdges) { - /* We have a limit on this because scoring edges and running netflow - * gets very slow for big graphs. */ - DEBUG_PRINTF("too many edges, skipping netflow cut\n"); - return false; - } - - renumber_vertices(h); - renumber_edges(h); - /* Step 1: Get scores for all edges */ - vector scores = scoreEdges(h); /* scores by edge_index */ - /* Step 2: poison scores for edges covered by successor literal */ - for (const auto &e : to_cut) { - assert(&h == ig[e].graph.get()); - poisonNetflowScores(ig, e, &scores); - } - /* Step 3: Find cutset based on scores */ - vector cut = findMinCut(h, scores); - - /* Step 4: Get literals corresponding to cut edges */ - map> cut_lits; - for (const auto &e : cut) { - set lits = getLiteralSet(h, e); - compressAndScore(lits); - cut_lits[e] = lits; - } - - /* if literals are underlength bail or if it involves a forbidden edge*/ - if (!checkValidNetflowLits(h, scores, cut_lits, grey)) { - return false; - } - DEBUG_PRINTF("splitting\n"); - - /* Step 5: Split graph based on cuts */ - splitEdgesByCut(ig, to_cut, cut, cut_lits); - return true; -} - -/** \brief Returns the number of intermediate vertices in the shortest path - * between (from, to). */ -static -u32 min_dist_between(NFAVertex from, NFAVertex to, const NGHolder &g) { - // Check for the trivial case: that way we don't have to set up the - // containers below. - if (edge(from, to, g).second) { - return 0; - } - - ue2::unordered_set visited; - visited.insert(from); - - flat_set curr, next; - curr.insert(from); - - assert(from != to); - - u32 d = 0; - - while (!curr.empty()) { - next.clear(); - for (auto v : curr) { - for (auto w : adjacent_vertices_range(v, g)) { - if (w == to) { - return d; - } - if (visited.insert(w).second) { // first visit to *ai - next.insert(w); - } - } - } - - d++; - curr.swap(next); - } - assert(0); - return ROSE_BOUND_INF; -} - -/** Literals which are completely enveloped by a successor are trouble because - * hamsterwheel acceleration can skip past the start of the literal. */ -static -bool enveloped(const vector &cand_split_v, - const set &cand_lit, const NGHolder &g, - const RoseInVertexProps &succ) { - if (succ.type != RIV_LITERAL) { - return false; - } - - /* TODO: handle multiple v more precisely: not all candidate v can start all - * candidate literals */ - - for (auto v : cand_split_v) { - u32 rhs_min_len = min_dist_between(v, g.accept, g); - if (rhs_min_len + min_len(cand_lit) >= succ.s.length()) { - return false; - } - } - - return true; /* we are in trouble */ -} - -static -bool enveloped(const VertLitInfo &cand_split, const RoseInGraph &ig, - const vdest_map_t &v_dest_map) { - for (auto v : cand_split.vv) { - const auto &images = v_dest_map.at(v); - for (const auto &image : images) { - /* check that we aren't enveloped by the successor */ - if (enveloped(vector(1, image.second), cand_split.lit, - *ig[image.first].graph, - ig[target(image.first, ig)])) { - return true; - } - - const RoseInVertexProps &pred = ig[source(image.first, ig)]; - if (pred.type != RIV_LITERAL) { - continue; - } - - /* check we don't envelop the pred */ - const NGHolder &g = *ig[image.first].graph; - u32 lhs_min_len = min_dist_between(g.start, image.second, g); - if (lhs_min_len + pred.s.length() < max_len(cand_split.lit)) { - return true; - } - } - } - - return false; -} - -static -bool attemptSplit(RoseInGraph &ig, vdest_map_t &v_dest_map, - vsrc_map_t &v_src_map, const vector &v_e, - LitCollection &lits) { - NGHolder &h = *ig[v_e.front()].graph; - unique_ptr split = lits.pickNext(); - - while (split) { - for (const auto &e : v_e) { - RoseInVertex t = target(e, ig); - if (enveloped(split->vv, split->lit, h, ig[t])) { - DEBUG_PRINTF("enveloped\n"); - split = lits.pickNext(); - goto next_split; - } - } - break; - next_split:; - } - - if (!split) { - return false; - } - - for (auto v : split->vv) { - if (edge(v, h.accept, h).second) { - return false; - } - } - - DEBUG_PRINTF("saved by a bad literal\n"); - splitRoseEdge(ig, *split, v_dest_map, v_src_map); - return true; -} - -static -void appendLiteral(const ue2_literal &s, const CharReach &cr, - vector *out) { - for (size_t c = cr.find_first(); c != CharReach::npos; - c = cr.find_next(c)) { - bool nocase = ourisalpha(c) && cr.test(mytoupper(c)) - && cr.test(mytolower(c)); - - if (nocase && (char)c == mytolower(c)) { - continue; /* uppercase already handled us */ - } - - out->push_back(s); - out->back().push_back(c, nocase); - } -} - -static -bool findAnchoredLiterals(const NGHolder &g, vector *out, - vector *pivots_out) { - - DEBUG_PRINTF("trying for anchored\n"); -#define MAX_ANCHORED_LITERALS 30 -#define MAX_ANCHORED_LITERAL_LEN 30 - - /* TODO: this could be beefed up by going region-by-region but currently - * that brings back bad memories of ng_rose. OR any AA region we can build - * a dfa out of */ - assert(!proper_out_degree(g.startDs, g)); - - vector lits; - lits.push_back(ue2_literal()); - - set curr; - insert(&curr, adjacent_vertices(g.start, g)); - curr.erase(g.startDs); - - set old; - - if (contains(curr, g.accept) || curr.empty()) { - DEBUG_PRINTF("surprise accept/voidness\n"); - return false; - } - - while (!curr.empty()) { - set next_verts; - insert(&next_verts, adjacent_vertices(*curr.begin(), g)); - bool can_extend - = !next_verts.empty() && !contains(next_verts, g.accept); - CharReach cr; - - for (auto v : curr) { - assert(!is_special(v, g)); - - if (can_extend) { - /* next verts must agree */ - set next_verts_local; - insert(&next_verts_local, adjacent_vertices(v, g)); - can_extend = next_verts_local == next_verts; - } - - cr |= g[v].char_reach; - } - - if (!can_extend) { - goto bail; - } - - /* extend literals */ - assert(cr.any()); - vector next_lits; - for (const auto &lit : lits) { - appendLiteral(lit, cr, &next_lits); - if (next_lits.size() > MAX_ANCHORED_LITERALS) { - goto bail; - } - } - - assert(!next_lits.empty()); - old.swap(curr); - - if (next_lits[0].length() <= MAX_ANCHORED_LITERAL_LEN) { - curr.swap(next_verts); - } else { - curr.clear(); - } - - lits.swap(next_lits); - } - bail: - assert(!lits.empty()); - for (UNUSED const auto &lit : lits) { - DEBUG_PRINTF("found anchored string: %s\n", dumpString(lit).c_str()); - } - - insert(pivots_out, pivots_out->end(), old); - out->swap(lits); - return !out->empty() && !out->begin()->empty(); -} - -static -bool tryForAnchoredImprovement(RoseInGraph &ig, RoseInEdge e) { - vector lits; - vector pivots; - - if (!findAnchoredLiterals(*ig[e].graph, &lits, &pivots)) { - DEBUG_PRINTF("unable to find literals\n"); - return false; - } - DEBUG_PRINTF("found %zu literals to act as anchors\n", lits.size()); - - RoseInVertex s = source(e, ig); - RoseInVertex t = target(e, ig); - - assert(!ig[e].graph_lag); - - shared_ptr lhs = make_shared(); - shared_ptr rhs = make_shared(); - ue2::unordered_map temp1; - ue2::unordered_map temp2; - - splitGraph(*ig[e].graph, pivots, lhs.get(), &temp1, rhs.get(), &temp2); - - for (const auto &lit : lits) { - RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), - ig); - add_edge(s, v, RoseInEdgeProps(lhs, 0U), ig); - add_edge(v, t, RoseInEdgeProps(rhs, 0U), ig); - } - remove_edge(e, ig); - - return true; -} - -#define MAX_SINGLE_BYTE_ANCHORED_DIST 30 - -/* returns true if we should make another pass */ -static -bool lastChanceImproveLHS(RoseInGraph &ig, RoseInEdge lhs, - const CompileContext &cc) { - DEBUG_PRINTF("argh lhs is nasty\n"); - assert(ig[lhs].graph); - - /* customise the lhs for this literal */ - /* TODO better, don't recalc */ - if (ig[target(lhs, ig)].type == RIV_LITERAL) { - const NGHolder &h = *ig[lhs].graph; - - /* sanitise literal on lhs */ - const ue2_literal &s = ig[target(lhs, ig)].s; - - if (!can_match(h, s, false)) { - DEBUG_PRINTF("found bogus edge\n"); - return false; - } - - /* see if we can build some anchored literals out of this */ - if (isAnchored(h) && tryForAnchoredImprovement(ig, lhs)) { - return true; - } - - unique_ptr cust = cloneHolder(h); - u32 d = removeTrailingLiteralStates(*cust, s, MO_INVALID_IDX); - if (d == MO_INVALID_IDX) { - DEBUG_PRINTF("found bogus edge\n"); - return false; - } - restoreTrailingLiteralStates(*cust, s, d); - ig[lhs].graph = move(cust); - } - - NGHolder &lhs_graph = *ig[lhs].graph; - set cand; - set cand_raw; - getCandidatePivots(lhs_graph, &cand, &cand_raw); - vdest_map_t v_dest_map; - vsrc_map_t v_src_map; - for (auto v : vertices_range(lhs_graph)) { - v_dest_map[v].emplace_back(lhs, v); - v_src_map[lhs].push_back(v); - } - - vector depths; - calcDepths(lhs_graph, depths); - - /* need to ensure regions are valid before we do lit discovery */ - auto region_map = assignRegions(lhs_graph); - - vector to_cut(1, lhs); - DEBUG_PRINTF("see if we can get a better lhs by another cut\n"); - LitCollection lit1(lhs_graph, depths, region_map, cand, cand_raw, - cc.grey.minRoseLiteralLength, true, cc); - if (attemptSplit(ig, v_dest_map, v_src_map, to_cut, lit1)) { - return true; - } - - if (doNetflowCut(ig, to_cut, cc.grey)) { - return true; - } - - DEBUG_PRINTF("eek last chance try len 1 if it creates an anchored lhs\n"); - { - LitCollection lits(lhs_graph, depths, region_map, cand, cand_raw, 1, - true, cc, true); - unique_ptr split = lits.pickNext(); - - /* TODO fix edge to accept check */ - while (split - && (enveloped(split->vv, split->lit, lhs_graph, - ig[target(lhs, ig)]) - || edge(split->vv.front(), lhs_graph.accept, lhs_graph).second - || !createsAnchoredLHS(lhs_graph, split->vv, depths, cc.grey, - MAX_SINGLE_BYTE_ANCHORED_DIST))) { - split = lits.pickNext(); - } - - if (split) { - DEBUG_PRINTF("saved by a really bad literal\n"); - splitRoseEdge(ig, *split, v_dest_map, v_src_map); - return true; - } - } - - return false; -} - -/* returns false if nothing happened */ -static -bool lastChanceImproveLHS(RoseInGraph &ig, const vector &to_cut, - const CompileContext &cc) { - DEBUG_PRINTF("argh lhses are nasty\n"); - - NGHolder &lhs_graph = *ig[to_cut.front()].graph; - set cand; - set cand_raw; - getCandidatePivots(lhs_graph, &cand, &cand_raw); - vdest_map_t v_dest_map; - vsrc_map_t v_src_map; - for (auto v : vertices_range(lhs_graph)) { - for (const auto &e : to_cut) { - v_dest_map[v].emplace_back(e, v); - v_src_map[e].push_back(v); - } - } - - vector depths; - calcDepths(lhs_graph, depths); - - auto region_map = assignRegions(lhs_graph); - - DEBUG_PRINTF("see if we can get a better lhs by allowing another cut\n"); - LitCollection lit1(lhs_graph, depths, region_map, cand, cand_raw, - cc.grey.minRoseLiteralLength, true, cc); - if (attemptSplit(ig, v_dest_map, v_src_map, to_cut, lit1)) { - return true; - } - - return doNetflowCut(ig, to_cut, cc.grey); -} - -static -bool improveLHS(RoseInGraph &ig, const vector &edges, - const CompileContext &cc) { - bool rv = false; - - vector src_verts; - getSourceVerts(ig, edges, &src_verts); - - map> by_src; - for (const auto &e : edges) { - by_src[source(e, ig)].push_back(e); - } - - for (auto v : src_verts) { - const vector &local = by_src[v]; - - vector graphs; - map > by_graph; - for (const auto &e : local) { - NGHolder *gp = ig[e].graph.get(); - if (!contains(by_graph, gp)) { - graphs.push_back(gp); - } - by_graph[gp].push_back(e); - } - - for (auto h : graphs) { - const vector &local2 = by_graph[h]; - if (local2.size() == 1) { - rv |= lastChanceImproveLHS(ig, local2.front(), cc); - continue; - } - - bool lrv = lastChanceImproveLHS(ig, local2, cc); - if (lrv) { - rv = true; - } else { - for (const auto &e2 : local2) { - rv |= lastChanceImproveLHS(ig, e2, cc); - } - } - } - } - - return rv; -} - -static -void processLHS(RoseInGraph &ig, const CompileContext &cc) { - bool redo; - do { - redo = false; - vector to_improve; - for (const auto &lhs : edges_range(ig)) { - if (ig[source(lhs, ig)].type != RIV_START - && ig[source(lhs, ig)].type != RIV_ANCHORED_START) { - continue; - } - - if (ig[target(lhs, ig)].type == RIV_LITERAL) { - DEBUG_PRINTF("checking lhs->'%s'\n", - ig[target(lhs, ig)].s.c_str()); - } else { - DEBUG_PRINTF("checking lhs->?\n"); - } - - - /* if check if lhs is nasty */ - if (ig[target(lhs, ig)].type == RIV_ACCEPT) { - to_improve.push_back(lhs); - continue; - } - - assert(ig[lhs].graph); - const NGHolder *h = ig[lhs].graph.get(); - - vector depths; - calcDepths(*h, depths); - - if (!isLHSTransient(*h, depths, cc.grey) - && !literalIsWholeGraph(*h, ig[target(lhs, ig)].s) - && !isLHSUsablyAnchored(*h, depths, cc.grey)) { - to_improve.push_back(lhs); - } - } - - DEBUG_PRINTF("inspecting %zu lhs\n", to_improve.size()); - if (to_improve.size() > 50) { - DEBUG_PRINTF("too big\n"); - break; - } - - redo = improveLHS(ig, to_improve, cc); - DEBUG_PRINTF("redo = %d\n", (int)redo); - } while (redo); - - vector to_inspect; /* to prevent surprises caused by us - * altering the graph while iterating */ - for (const auto &e : edges_range(ig)) { - if (ig[source(e, ig)].type == RIV_START - || ig[source(e, ig)].type == RIV_ANCHORED_START) { - to_inspect.push_back(e); - } - } - - for (const auto &lhs : to_inspect) { - if (ig[target(lhs, ig)].type == RIV_LITERAL) { - if (removeLiteralFromLHS(ig, lhs, cc)) { - handleLhsCliche(ig, lhs); - } else { - /* telling us to delete the edge */ - remove_edge(lhs, ig); - } - } - } -} - -static -void tryNetflowCutForRHS(RoseInGraph &ig, const Grey &grey) { - vector to_improve; - for (const auto &rhs : edges_range(ig)) { - if (ig[target(rhs, ig)].type != RIV_ACCEPT) { - continue; - } - - if (ig[source(rhs, ig)].type == RIV_LITERAL) { - DEBUG_PRINTF("checking '%s'->rhs\n", ig[source(rhs, ig)].s.c_str()); - } else { - DEBUG_PRINTF("checking ?->rhs\n"); - } - - if (!ig[rhs].graph) { - continue; - } - - DEBUG_PRINTF("%zu vertices\n", num_vertices(*ig[rhs].graph)); - if (num_vertices(*ig[rhs].graph) < 512) { - DEBUG_PRINTF("small\n"); - continue; - } - - /* if check if rhs is nasty */ - to_improve.push_back(rhs); - } - - DEBUG_PRINTF("inspecting %zu lhs\n", to_improve.size()); - if (to_improve.size() > 50) { - DEBUG_PRINTF("too big\n"); - return; - } - - for (const auto &e : to_improve) { - vector to_cut(1, e); - doNetflowCut(ig, to_cut, grey); - } -} - -/* just make the string nocase and get the graph to handle case mask, TODO. - * This could be more nuanced but the effort would probably be better spent - * just making rose less bad. */ -static -void makeNocaseWithPrefixMask(RoseInGraph &g, RoseInVertex v) { - for (const auto &e : in_edges_range(v, g)) { - const RoseInVertex u = source(e, g); - - if (!g[e].graph) { - g[e].graph = make_shared(whatRoseIsThis(g, e)); - g[e].graph_lag = g[v].s.length(); - NGHolder &h = *g[e].graph; - - assert(!g[e].maxBound || g[e].maxBound == ROSE_BOUND_INF); - - if (g[u].type == RIV_START) { - add_edge(h.startDs, h.accept, h); - h[h.startDs].reports.insert(0); - } else if (g[e].maxBound == ROSE_BOUND_INF) { - add_edge(h.start, h.accept, h); - NFAVertex ds = add_vertex(h); - - h[ds].char_reach = CharReach::dot(); - - NFAEdge e_start_to_ds = add_edge(h.start, ds, h); - add_edge(ds, ds, h); - add_edge(ds, h.accept, h); - h[h.start].reports.insert(0); - h[ds].reports.insert(0); - - if (g[u].type == RIV_LITERAL) { - h[e_start_to_ds].tops.insert(DEFAULT_TOP); - } - } else { - assert(g[u].type == RIV_ANCHORED_START); - add_edge(h.start, h.accept, h); - h[h.start].reports.insert(0); - } - } - - if (!g[e].graph_lag) { - continue; - } - unique_ptr newg = cloneHolder(*g[e].graph); - restoreTrailingLiteralStates(*newg, g[v].s, g[e].graph_lag); - g[e].graph_lag = 0; - g[e].graph = move(newg); - } - - make_nocase(&g[v].s); -} - -static -unique_ptr makeGraphCopy(const NGHolder *g) { - if (g) { - return cloneHolder(*g); - } else { - return nullptr; - } -} - -static -void explodeLiteral(RoseInGraph &g, RoseInVertex v, - vector &exploded) { - for (const auto &lit : exploded) { - RoseInVertex v_new = add_vertex(g[v], g); - g[v_new].s = lit; - - for (const auto &e : in_edges_range(v, g)) { - RoseInEdge e2 = add_edge(source(e, g), v_new, g[e], g); - // FIXME: are we safe to share graphs here? For now, make our very - // own copy. - g[e2].graph = makeGraphCopy(g[e].graph.get()); - } - - for (const auto &e : out_edges_range(v, g)) { - RoseInEdge e2 = add_edge(v_new, target(e, g), g[e], g); - // FIXME: are we safe to share graphs here? For now, make our very - // own copy. - g[e2].graph = makeGraphCopy(g[e].graph.get()); - } - } - - clear_vertex(v, g); - remove_vertex(v, g); -} - -/* Sadly rose is hacky in terms of mixed case literals. TODO: remove when rose - * becomes less bad */ -static -void handleLongMixedSensitivityLiterals(RoseInGraph &g) { - const size_t maxExploded = 8; // only case-explode this far - - vector verts; - - for (auto v : vertices_range(g)) { - if (g[v].type != RIV_LITERAL) { - continue; - } - - ue2_literal &s = g[v].s; - - if (!mixed_sensitivity(s)) { - continue; - } - - if (s.length() < MAX_MASK2_WIDTH) { - DEBUG_PRINTF("mixed lit will be handled by benefits mask\n"); - continue; - } - - DEBUG_PRINTF("found mixed lit of len %zu\n", s.length()); - verts.push_back(v); - } - - for (auto v : verts) { - vector exploded; - case_iter cit = caseIterateBegin(g[v].s), cite = caseIterateEnd(); - for (; cit != cite; ++cit) { - exploded.emplace_back(*cit, false); - if (exploded.size() > maxExploded) { - goto dont_explode; - } - } - - DEBUG_PRINTF("exploding literal into %zu pieces\n", exploded.size()); - explodeLiteral(g, v, exploded); - continue; - - dont_explode: - DEBUG_PRINTF("converting to nocase with prefix mask\n"); - makeNocaseWithPrefixMask(g, v); - } - - DEBUG_PRINTF("done!\n"); -} - -static -void dedupe(RoseInGraph &g) { - /* We know that every prefix/infix is unique after the rose construction. - * - * If a vertex has out-going graphs with the same rewind and they are equal - * we can dedupe the graph. - * - * After this, we may share graphs on out-edges of a vertex. */ - map, vector>> buckets; - - for (auto v : vertices_range(g)) { - buckets.clear(); - - for (const auto &e : out_edges_range(v, g)) { - if (!g[e].graph || g[target(e, g)].type != RIV_LITERAL) { - continue; - } - auto k = make_pair(g[e].graph_lag, hash_holder(*g[e].graph)); - auto &bucket = buckets[k]; - for (const auto &h : bucket) { - if (is_equal(*g[e].graph, 0U, *h, 0U)) { - g[e].graph = h; - goto next_edge; - } - } - - bucket.push_back(g[e].graph); - next_edge:; - } - } -} - -static -bool pureReport(NFAVertex v, const NGHolder &g) { - for (auto w : adjacent_vertices_range(v, g)) { - if (w != g.accept && w != g.acceptEod) { - return false; - } - } - return true; -} - -static -bool pureReport(const vector &vv, const NGHolder &g) { - for (auto v : vv) { - if (!pureReport(v, g)) { - return false; - } - } - - return true; -} - -/* ensures that a vertex is followed by a start construct AND the cyclic states - * has a reasonably wide reach */ -static -bool followedByStar(NFAVertex v, const NGHolder &g) { - set succ; - insert(&succ, adjacent_vertices(v, g)); - - set asucc; - - for (auto w : adjacent_vertices_range(v, g)) { - if (g[w].char_reach.count() < N_CHARS - MAX_ESCAPE_CHARS) { - continue; /* state is too narrow to be considered as a sane star - cyclic */ - } - - asucc.clear(); - insert(&asucc, adjacent_vertices(w, g)); - - if (asucc == succ) { - return true; - } - } - return false; -} - -static -bool followedByStar(const vector &vv, const NGHolder &g) { - for (auto v : vv) { - if (!followedByStar(v, g)) { - return false; - } - } - - return true; -} - -static -bool isEodPrefixCandidate(const NGHolder &g) { - if (in_degree(g.accept, g)) { - DEBUG_PRINTF("graph isn't eod anchored\n"); - return false; - } - - // TODO: handle more than one report. - if (all_reports(g).size() != 1) { - return false; - } - - return true; -} - - -static -bool isEodWithPrefix(const RoseInGraph &g) { - if (num_vertices(g) != 2) { - return false; - } - - for (const auto &e : edges_range(g)) { - RoseInVertex u = source(e, g), v = target(e, g); - DEBUG_PRINTF("edge from %d -> %d\n", g[u].type, g[v].type); - - if (g[u].type != RIV_START && g[u].type != RIV_ANCHORED_START) { - DEBUG_PRINTF("source not start, type=%d\n", g[u].type); - return false; - } - - if (g[v].type != RIV_ACCEPT && g[v].type != RIV_ACCEPT_EOD) { - DEBUG_PRINTF("target not accept, type=%d\n", g[v].type); - return false; - } - - // Haigs not handled. - if (g[e].haig) { - DEBUG_PRINTF("edge has haig\n"); - return false; - } - - if (!g[e].graph) { - DEBUG_PRINTF("no graph on edge\n"); - return false; - } - - if (!isEodPrefixCandidate(*g[e].graph)) { - DEBUG_PRINTF("graph is not eod prefix candidate\n"); - return false; - } - } - - return true; -} - -static -void processEodPrefixes(RoseInGraph &g) { - // Find edges to accept with EOD-anchored graphs that we can move over to - // acceptEod. - vector acc_edges; - for (const auto &e : edges_range(g)) { - if (g[target(e, g)].type != RIV_ACCEPT) { - continue; - } - if (g[e].haig || !g[e].graph) { - continue; - } - if (!isEodPrefixCandidate(*g[e].graph)) { - continue; - } - - // TODO: handle cases with multiple out-edges. - if (out_degree(source(e, g), g) > 1) { - continue; - } - - acc_edges.push_back(e); - } - - set accepts; - - for (const RoseInEdge &e : acc_edges) { - RoseInVertex u = source(e, g), v = target(e, g); - assert(g[e].graph); - assert(g[v].type == RIV_ACCEPT); - assert(all_reports(*g[e].graph).size() == 1); - - // Move this edge from accept to acceptEod and give it the right reports - // from the graph on the edge. - const set reports = all_reports(*g[e].graph); - RoseInVertex w = add_vertex( - RoseInVertexProps::makeAcceptEod(reports), g); - add_edge(u, w, g[e], g); - - remove_edge(e, g); - accepts.insert(v); - } - - for (auto v : accepts) { - if (!in_degree(v, g)) { - remove_vertex(v, g); - } - } -} - -/** Run some reduction passes on the graphs on our edges. */ -static -void reduceGraphs(RoseInGraph &g, const CompileContext &cc) { - for (const auto &e : edges_range(g)) { - if (!g[e].graph) { - continue; - } - NGHolder &h = *g[e].graph; - assert(h.kind == whatRoseIsThis(g, e)); - DEBUG_PRINTF("before, graph %p has %zu vertices, %zu edges\n", &h, - num_vertices(h), num_edges(h)); - - pruneUseless(h); - - reduceGraphEquivalences(h, cc); - - removeRedundancy(h, SOM_NONE); /* rose doesn't track som */ - - DEBUG_PRINTF("after, graph %p has %zu vertices, %zu edges\n", &h, - num_vertices(h), num_edges(h)); - - // It's possible that one of our graphs may have reduced to a dot-star - // cliche, i.e. it contains a startDs->accept edge. If so, we can - // remove it from the edge and just use edge bounds to represent it. - if (edge(h.startDs, h.accept, h).second) { - DEBUG_PRINTF("graph reduces to dot-star, deleting\n"); - g[e].graph.reset(); - g[e].graph_lag = 0; - g[e].minBound = 0; - g[e].maxBound = ROSE_BOUND_INF; - } - } -} - -static -unique_ptr buildRose(const NGHolder &h, bool desperation, - const CompileContext &cc) { - /* Need to pick a pivot point which splits the graph in two with starts on - * one side and accepts on the other. Thus the pivot needs to dominate all - * the accept vertices */ - - /* maps a vertex in h to one of its images in the rose graph */ - vdest_map_t v_dest_map; - vsrc_map_t v_src_map; - - /* create trivial rose graph */ - unique_ptr igp = makeTrivialGraph(h, v_dest_map, v_src_map); - RoseInGraph &ig = *igp; - - /* root graph is the graph on the only edge in our new RoseInGraph */ - assert(num_edges(ig) == 1); - shared_ptr root_g = ig[*edges(ig).first].graph; - assert(root_g); - - /* find the literals */ - set cand; - set cand_raw; - getCandidatePivots(*root_g, &cand, &cand_raw); - - DEBUG_PRINTF("|cand| = %zu\n", cand.size()); - - vector depths; - calcDepths(*root_g, depths); - - auto region_map = assignRegions(*root_g); - - LitCollection lits(*root_g, depths, region_map, cand, cand_raw, - cc.grey.minRoseLiteralLength, desperation, cc); - - for (u32 i = 0; i < cc.grey.roseDesiredSplit; ++i) { - DEBUG_PRINTF("attempting split %u (desired %u)\n", i, - cc.grey.roseDesiredSplit); - unique_ptr split = lits.pickNext(); - - /* need to check we aren't creating any enveloping literals */ - while (split && enveloped(*split, ig, v_dest_map)) { - DEBUG_PRINTF("bad cand; getting next split\n"); - split = lits.pickNext(); - } - - if (!split) { - DEBUG_PRINTF("no more lits :(\n"); - break; - } - splitRoseEdge(ig, *split, v_dest_map, v_src_map); - } - - /* try for more split literals if they are followed by .* or accept */ - for (;;) { - DEBUG_PRINTF("attempting bonus split\n"); - unique_ptr split = lits.pickNext(); - - /* need to check we aren't creating any enveloping literals */ - while (split - && (enveloped(*split, ig, v_dest_map) - || (!pureReport(split->vv, *root_g) - && !followedByStar(split->vv, *root_g)))) { - DEBUG_PRINTF("bad cand; getting next split\n"); - split = lits.pickNext(); - } - - if (!split) { - DEBUG_PRINTF("no more lits :(\n"); - break; - } - DEBUG_PRINTF("got bonus split\n"); - splitRoseEdge(ig, *split, v_dest_map, v_src_map); - } - - processLHS(ig, cc); - - if (num_vertices(ig) <= 2) { - // At present, we don't accept all outfixes. - // However, we do handle the specific case of a rose that precedes an - // acceptEod, which we will support as a prefix to a special EOD event - // "literal". - if (!isEodWithPrefix(ig)) { - igp.reset(); - return igp; - } - } - - processEodPrefixes(ig); - - processInfixes(ig, cc); - - handleLongMixedSensitivityLiterals(ig); - - dedupe(ig); - - pruneUseless(ig); - - reduceGraphs(ig, cc); - - dumpPreRoseGraph(ig, cc.grey); - - renumber_vertices(ig); - calcVertexOffsets(ig); - return igp; -} - -static -void desperationImprove(RoseInGraph &ig, const CompileContext &cc) { - DEBUG_PRINTF("rose said no; can we do better?\n"); - - /* infixes are tricky as we have to worry about delays, enveloping - * literals, etc */ - tryNetflowCutForRHS(ig, cc.grey); - processInfixes(ig, cc); - - handleLongMixedSensitivityLiterals(ig); - dedupe(ig); - pruneUseless(ig); - renumber_vertices(ig); - calcVertexOffsets(ig); -} - -static -bool addRose(RoseBuild &rose, RoseInGraph &ig, bool prefilter, - bool final_chance, const ReportManager &rm, - const CompileContext &cc) { - if (!ensureImplementable(rose, ig, false, final_chance, rm, cc) - && !prefilter) { - return false; - } - return rose.addRose(ig, prefilter); -} - -bool splitOffRose(RoseBuild &rose, const NGHolder &h, bool prefilter, - const ReportManager &rm, const CompileContext &cc) { - if (!cc.grey.allowRose) { - return false; - } - - // We should have at least one edge into accept or acceptEod! - assert(in_degree(h.accept, h) || in_degree(h.acceptEod, h) > 1); - - unique_ptr igp = buildRose(h, false, cc); - if (igp && addRose(rose, *igp, prefilter, false, rm, cc)) { - goto ok; - } - - igp = buildRose(h, true, cc); - - if (igp) { - if (addRose(rose, *igp, prefilter, false, rm, cc)) { - goto ok; - } - - desperationImprove(*igp, cc); - - if (addRose(rose, *igp, prefilter, false, rm, cc)) { - goto ok; - } - } - - DEBUG_PRINTF("rose build failed\n"); - return false; - -ok: - DEBUG_PRINTF("rose build ok\n"); - return true; -} - -bool finalChanceRose(RoseBuild &rose, const NGHolder &h, bool prefilter, - const ReportManager &rm, const CompileContext &cc) { - DEBUG_PRINTF("final chance rose\n"); - if (!cc.grey.allowRose) { - return false; - } - assert(h.kind == NFA_OUTFIX); - - ue2_literal lit; - bool anch = false; - shared_ptr rhs = make_shared(); - if (!splitOffLeadingLiteral(h, &lit, &*rhs)) { - DEBUG_PRINTF("no floating literal\n"); - anch = true; - if (!splitOffAnchoredLeadingLiteral(h, &lit, &*rhs)) { - DEBUG_PRINTF("no anchored literal\n"); - return false; - } - } - - if (lit.length() < cc.grey.minRoseLiteralLength - || minStringPeriod(lit) < 2 ) { - DEBUG_PRINTF("lit too weak\n"); - return false; - } - - assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit)); - - RoseInGraph ig; - RoseInVertex s - = add_vertex(RoseInVertexProps::makeStart(anch), ig); - RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig); - add_edge(s, v, RoseInEdgeProps(0, anch ? 0 : ROSE_BOUND_INF), ig); - - ue2_literal lit2; - if (getTrailingLiteral(h, &lit2) - && lit2.length() >= cc.grey.minRoseLiteralLength - && minStringPeriod(lit2) >= 2) { - - /* TODO: handle delay */ - size_t overlap = maxOverlap(lit, lit2, 0); - u32 delay2 = lit2.length() - overlap; - delay2 = min(delay2, maxDelay(cc)); - delay2 = removeTrailingLiteralStates(*rhs, lit2, delay2); - rhs->kind = NFA_INFIX; - assert(delay2 <= lit2.length()); - - RoseInVertex w - = add_vertex(RoseInVertexProps::makeLiteral(lit2), ig); - add_edge(v, w, RoseInEdgeProps(rhs, delay2), ig); - - NFAVertex reporter = getSoleSourceVertex(h, h.accept); - assert(reporter); - const auto &reports = h[reporter].reports; - RoseInVertex a = - add_vertex(RoseInVertexProps::makeAccept(reports), ig); - add_edge(w, a, RoseInEdgeProps(0U, 0U), ig); - } else { - RoseInVertex a = - add_vertex(RoseInVertexProps::makeAccept(set()), ig); - add_edge(v, a, RoseInEdgeProps(rhs, 0U), ig); - } - - renumber_vertices(ig); - calcVertexOffsets(ig); - - return addRose(rose, ig, prefilter, true /* final chance */, rm, cc); -} - -bool checkRose(const ReportManager &rm, const NGHolder &h, bool prefilter, - const CompileContext &cc) { - if (!cc.grey.allowRose) { - return false; - } - - // We should have at least one edge into accept or acceptEod! - assert(in_degree(h.accept, h) || in_degree(h.acceptEod, h) > 1); - - unique_ptr igp; - - // First pass. - - igp = buildRose(h, false, cc); - if (igp && roseCheckRose(*igp, prefilter, rm, cc)) { - return true; - } - - // Second ("desperation") pass. - - igp = buildRose(h, true, cc); - if (igp) { - if (roseCheckRose(*igp, prefilter, rm, cc)) { - return true; - } - - desperationImprove(*igp, cc); - - if (roseCheckRose(*igp, prefilter, rm, cc)) { - return true; - } - } - - return false; -} - -} // namespace ue2 diff --git a/src/nfagraph/ng_rose.h b/src/nfagraph/ng_rose.h deleted file mode 100644 index 9f69fe0c..00000000 --- a/src/nfagraph/ng_rose.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Rose construction from NGHolder. - */ - -#ifndef NG_ROSE_H -#define NG_ROSE_H - -#include "ng_holder.h" -#include "ue2common.h" - -#include - -namespace ue2 { - -class NGHolder; -class ReportManager; -class RoseBuild; - -struct CompileContext; -struct ue2_literal; - -/** \brief Attempt to consume the entire pattern in graph \a h with Rose. - * Returns true if successful. */ -bool splitOffRose(RoseBuild &rose, const NGHolder &h, bool prefilter, - const ReportManager &rm, const CompileContext &cc); - -/** \brief Attempt to consume the entire pattern in graph \a h with Rose. - * This is the last attempt to handle a pattern before we resort to an outfix. - * Returns true if successful. */ -bool finalChanceRose(RoseBuild &rose, const NGHolder &h, bool prefilter, - const ReportManager &rm, const CompileContext &cc); - -/** \brief True if the pattern in \a h is consumable by Rose. This function - * may be conservative (return false even if supported) for efficiency. */ -bool checkRose(const ReportManager &rm, const NGHolder &h, bool prefilter, - const CompileContext &cc); - -} // namespace ue2 - -#endif // NG_ROSE_H diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index 28ad9549..b6618194 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -45,7 +45,6 @@ #include "ng_redundancy.h" #include "ng_region.h" #include "ng_reports.h" -#include "ng_rose.h" #include "ng_split.h" #include "ng_util.h" #include "ng_width.h" diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index 68cc67a1..e6861ea4 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -46,7 +46,6 @@ #include "nfagraph/ng_region.h" #include "nfagraph/ng_repeat.h" #include "nfagraph/ng_reports.h" -#include "nfagraph/ng_rose.h" #include "nfagraph/ng_util.h" #include "nfagraph/ng_width.h" #include "util/charreach.h" From d402ef9b28b3c3433fd9a65aebd0f9111c3d4076 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 12 Jan 2017 13:33:34 +1100 Subject: [PATCH 049/326] violet: heuristic tweaks for ensuring implementablity --- src/nfagraph/ng_violet.cpp | 268 +++++++++++++++++++++++-------------- 1 file changed, 164 insertions(+), 104 deletions(-) diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index b6618194..6ca6f187 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -132,83 +132,20 @@ bool createsTransientLHS(const NGHolder &g, const vector &vv, return true; } -namespace { -/** - * Information on a cut: vertices and literals. - */ -struct VertLitInfo { - VertLitInfo() {} - VertLitInfo(NFAVertex v, const set &litlit, bool c_anch, - bool c_tran = false) - : vv(vector(1, v)), lit(litlit), creates_anchored(c_anch), - creates_transient(c_tran) {} - VertLitInfo(const vector &vv_in, const set &lit_in, - bool c_anch) - : vv(vv_in), lit(lit_in), creates_anchored(c_anch) {} - vector vv; - set lit; +static +double calcSplitRatio(const NGHolder &g, const vector &vv) { + flat_set not_reachable; + find_unreachable(g, vv, ¬_reachable); + double rv = (double)not_reachable.size() / num_vertices(g); + rv = rv > 0.5 ? 1 - rv : rv; - bool creates_anchored = false; - bool creates_transient = false; -}; - -/** - * \brief Comparator class for sorting LitCollection::lits. - * - * This is separated out from LitCollection itself as passing LitCollection to - * std::sort() would incur a (potentially expensive) copy. - */ -class LitComparator { -public: - LitComparator(const NGHolder &g_in, bool sa, bool st) - : g(g_in), seeking_anchored(sa), seeking_transient(st) {} - bool operator()(const unique_ptr &a, - const unique_ptr &b) const { - assert(a && b); - - if (seeking_anchored) { - if (a->creates_anchored != b->creates_anchored) { - return a->creates_anchored < b->creates_anchored; - } - } - - if (seeking_transient) { - if (a->creates_transient != b->creates_transient) { - return a->creates_transient < b->creates_transient; - } - } - - u64a score_a = scoreSet(a->lit); - u64a score_b = scoreSet(b->lit); - - if (score_a != score_b) { - return score_a > score_b; - } - - /* vertices should only be in one candidate cut */ - assert(a->vv == b->vv || a->vv.front() != b->vv.front()); - return g[a->vv.front()].index > g[b->vv.front()].index; - } - -private: - const NGHolder &g; /**< graph on which cuts are found */ - - bool seeking_anchored; - bool seeking_transient; -}; + return rv; } static size_t shorter_than(const set &s, size_t limit) { - size_t count = 0; - - for (const auto &lit : s) { - if (lit.length() < limit) { - count++; - } - } - - return count; + return count_if(s.begin(), s.end(), + [&](const ue2_literal &a) { return a.length() < limit; }); } static @@ -233,14 +170,101 @@ u32 min_period(const set &s) { return rv; } -#define MIN_ANCHORED_LEN 2 +namespace { +/** + * Information on a cut: vertices and literals. + */ +struct VertLitInfo { + VertLitInfo() {} + VertLitInfo(NFAVertex v, const set &litlit, bool c_anch, + bool c_tran = false) + : vv(vector(1, v)), lit(litlit), creates_anchored(c_anch), + creates_transient(c_tran) {} + VertLitInfo(const vector &vv_in, const set &lit_in, + bool c_anch) + : vv(vv_in), lit(lit_in), creates_anchored(c_anch) {} + vector vv; + set lit; + bool creates_anchored = false; + bool creates_transient = false; + double split_ratio = 0; +}; + +#define LAST_CHANCE_STRONG_LEN 1 + +/** + * \brief Comparator class for comparing different literal cuts. + */ +class LitComparator { +public: + LitComparator(const NGHolder &g_in, bool sa, bool st, bool lc) + : g(g_in), seeking_anchored(sa), seeking_transient(st), + last_chance(lc) {} + bool operator()(const unique_ptr &a, + const unique_ptr &b) const { + assert(a && b); + + if (seeking_anchored) { + if (a->creates_anchored != b->creates_anchored) { + return a->creates_anchored < b->creates_anchored; + } + } + + if (seeking_transient) { + if (a->creates_transient != b->creates_transient) { + return a->creates_transient < b->creates_transient; + } + } + + if (last_chance + && min_len(a->lit) > LAST_CHANCE_STRONG_LEN + && min_len(b->lit) > LAST_CHANCE_STRONG_LEN) { + DEBUG_PRINTF("using split ratio %g , %g\n", a->split_ratio, + b->split_ratio); + return a->split_ratio < b->split_ratio; + } + + u64a score_a = scoreSet(a->lit); + u64a score_b = scoreSet(b->lit); + + if (score_a != score_b) { + return score_a > score_b; + } + + /* vertices should only be in one candidate cut */ + assert(a->vv == b->vv || a->vv.front() != b->vv.front()); + return g[a->vv.front()].index > g[b->vv.front()].index; + } + +private: + const NGHolder &g; /**< graph on which cuts are found */ + + bool seeking_anchored; + bool seeking_transient; + bool last_chance; +}; +} + +#define MIN_ANCHORED_LEN 2 +#define MIN_ANCHORED_DESPERATE_LEN 1 + +/* anchored here means that the cut creates a 'usefully' anchored LHS */ static bool validateRoseLiteralSetQuality(const set &s, u64a score, bool anchored, u32 min_allowed_floating_len, - bool desperation) { + bool desperation, bool last_chance) { u32 min_allowed_len = anchored ? MIN_ANCHORED_LEN : min_allowed_floating_len; + if (anchored && last_chance) { + min_allowed_len = MIN_ANCHORED_DESPERATE_LEN; + } + if (last_chance) { + desperation = true; + } + + DEBUG_PRINTF("validating%s set, min allowed len %u\n", + anchored ? " anchored" : "", min_allowed_len); assert(none_of(begin(s), end(s), bad_mixed_sensitivity)); @@ -269,6 +293,7 @@ bool validateRoseLiteralSetQuality(const set &s, u64a score, if (s.size() > 10 /* magic number is magic */ || s_min_len < min_allowed_len || (s_min_period <= 1 && min_allowed_len != 1)) { + DEBUG_PRINTF("candidate may be bad\n"); ok = false; } @@ -309,7 +334,7 @@ void getSimpleRoseLiterals(const NGHolder &g, bool seeking_anchored, const set &a_dom, vector> *lits, u32 min_allowed_len, bool desperation, - const CompileContext &cc) { + bool last_chance, const CompileContext &cc) { assert(depths || !seeking_anchored); map scores; @@ -335,7 +360,7 @@ void getSimpleRoseLiterals(const NGHolder &g, bool seeking_anchored, } if (!validateRoseLiteralSetQuality(s, score, anchored, min_allowed_len, - desperation)) { + desperation, last_chance)) { continue; } @@ -372,7 +397,7 @@ void getRegionRoseLiterals(const NGHolder &g, bool seeking_anchored, const set *allowed, vector> *lits, u32 min_allowed_len, bool desperation, - const CompileContext &cc) { + bool last_chance, const CompileContext &cc) { /* This allows us to get more places to split the graph as we are not limited to points where there is a single vertex to split at. */ @@ -492,7 +517,7 @@ void getRegionRoseLiterals(const NGHolder &g, bool seeking_anchored, } if (!validateRoseLiteralSetQuality(s, score, anchored, min_allowed_len, - desperation)) { + desperation, last_chance)) { goto next_cand; } @@ -590,6 +615,7 @@ unique_ptr findBestSplit(const NGHolder &g, bool for_prefix, u32 min_len, const set *allowed_cand, const set *disallowed_cand, + bool last_chance, const CompileContext &cc) { assert(!for_prefix || depths); @@ -636,17 +662,16 @@ unique_ptr findBestSplit(const NGHolder &g, DEBUG_PRINTF("|cand| = %zu\n", cand.size()); bool seeking_anchored = for_prefix; - bool seeking_transient = for_prefix; //cc.streaming; + bool seeking_transient = for_prefix; - /* TODO: revisit when backstop goes away */ bool desperation = for_prefix && cc.streaming; vector> lits; /**< sorted list of potential cuts */ getSimpleRoseLiterals(g, seeking_anchored, depths, cand, &lits, min_len, - desperation, cc); + desperation, last_chance, cc); getRegionRoseLiterals(g, seeking_anchored, depths, cand_raw, allowed_cand, - &lits, min_len, desperation, cc); + &lits, min_len, desperation, last_chance, cc); if (lits.empty()) { DEBUG_PRINTF("no literals found\n"); @@ -660,7 +685,14 @@ unique_ptr findBestSplit(const NGHolder &g, } } - auto cmp = LitComparator(g, seeking_anchored, seeking_transient); + if (last_chance) { + for (auto &a : lits) { + a->split_ratio = calcSplitRatio(g, a->vv); + } + } + + auto cmp = LitComparator(g, seeking_anchored, seeking_transient, + last_chance); unique_ptr best = move(lits.back()); lits.pop_back(); @@ -801,7 +833,19 @@ unique_ptr findBestNormalSplit(const NGHolder &g, set bad_vertices = poisonVertices(g, vg, ee, cc.grey); return findBestSplit(g, nullptr, false, cc.grey.minRoseLiteralLength, - nullptr, &bad_vertices, cc); + nullptr, &bad_vertices, false, cc); +} + +static +unique_ptr findBestLastChanceSplit(const NGHolder &g, + const RoseInGraph &vg, + const vector &ee, + const CompileContext &cc) { + assert(g.kind == NFA_OUTFIX || g.kind == NFA_INFIX || g.kind == NFA_SUFFIX); + set bad_vertices = poisonVertices(g, vg, ee, cc.grey); + + return findBestSplit(g, nullptr, false, cc.grey.minRoseLiteralLength, + nullptr, &bad_vertices, true, cc); } static @@ -878,11 +922,12 @@ unique_ptr findBestPrefixSplit(const NGHolder &g, const vector &depths, const RoseInGraph &vg, const vector &ee, + bool last_chance, const CompileContext &cc) { - assert(g.kind == NFA_PREFIX); + assert(g.kind == NFA_PREFIX || g.kind == NFA_OUTFIX); set bad_vertices = poisonVertices(g, vg, ee, cc.grey); auto rv = findBestSplit(g, &depths, true, cc.grey.minRoseLiteralLength, - nullptr, &bad_vertices, cc); + nullptr, &bad_vertices, last_chance, cc); /* large back edges may prevent us identifying anchored or transient cases * properly - use a simple walk instead */ @@ -913,7 +958,7 @@ unique_ptr findBestCleanSplit(const NGHolder &g, return nullptr; } return findBestSplit(g, nullptr, false, cc.grey.violetEarlyCleanLiteralLen, - &cleanSplits, nullptr, cc); + &cleanSplits, nullptr, false, cc); } static @@ -1385,12 +1430,11 @@ RoseInGraph populateTrivialGraph(const NGHolder &h) { } static -void avoidOutfixes(RoseInGraph &vg, const CompileContext &cc) { +void avoidOutfixes(RoseInGraph &vg, bool last_chance, + const CompileContext &cc) { STAGE_DEBUG_PRINTF("AVOIDING OUTFIX\n"); - if (num_vertices(vg) > 2) { - /* must be at least one literal aside from start and accept */ - return; - } + assert(num_vertices(vg) == 2); + assert(num_edges(vg) == 1); RoseInEdge e = *edges(vg).first; @@ -1400,13 +1444,28 @@ void avoidOutfixes(RoseInGraph &vg, const CompileContext &cc) { renumber_vertices(h); renumber_edges(h); - unique_ptr split = findBestNormalSplit(h, vg, {e}, cc); + unique_ptr split = findBestNormalSplit(h, vg, {e}, cc); if (split && splitRoseEdge(h, vg, {e}, *split)) { DEBUG_PRINTF("split on simple literal\n"); - } else { - doNetflowCut(h, nullptr, vg, {e}, false, cc.grey); + return; } + + if (last_chance) { + /* look for a prefix split as it allows us to accept very weak anchored + * literals. */ + vector depths; + calcDepths(h, depths); + + split = findBestPrefixSplit(h, depths, vg, {e}, last_chance, cc); + + if (split && splitRoseEdge(h, vg, {e}, *split)) { + DEBUG_PRINTF("split on simple literal\n"); + return; + } + } + + doNetflowCut(h, nullptr, vg, {e}, false, cc.grey); } static @@ -1906,7 +1965,7 @@ bool improvePrefix(NGHolder &h, RoseInGraph &vg, const vector &ee, return true; } - unique_ptr split = findBestPrefixSplit(h, depths, vg, ee, cc); + auto split = findBestPrefixSplit(h, depths, vg, ee, false, cc); if (split && (split->creates_transient || split->creates_anchored) && splitRoseEdge(h, vg, ee, *split)) { @@ -2293,7 +2352,7 @@ bool replaceSuffixWithInfix(const NGHolder &h, RoseInGraph &vg, if (vli.lit.empty() || !validateRoseLiteralSetQuality(vli.lit, score, false, min_len, - false)) { + false, false)) { return false; } } @@ -2777,13 +2836,14 @@ bool splitForImplementabilty(RoseInGraph &vg, NGHolder &h, } unique_ptr split; + bool last_chance = true; if (h.kind == NFA_PREFIX) { vector depths; calcDepths(h, depths); - split = findBestPrefixSplit(h, depths, vg, edges, cc); + split = findBestPrefixSplit(h, depths, vg, edges, last_chance, cc); } else { - split = findBestNormalSplit(h, vg, edges, cc); + split = findBestLastChanceSplit(h, vg, edges, cc); } if (split && splitRoseEdge(h, vg, edges, *split)) { @@ -2803,7 +2863,7 @@ bool splitForImplementabilty(RoseInGraph &vg, NGHolder &h, bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, bool final_chance, const ReportManager &rm, const CompileContext &cc) { - DEBUG_PRINTF("checking for impl\n"); + DEBUG_PRINTF("checking for impl %d\n", final_chance); bool changed = false; bool need_to_recalc = false; u32 added_count = 0; @@ -2867,7 +2927,7 @@ bool ensureImplementable(RoseBuild &rose, RoseInGraph &vg, bool allow_changes, } static -RoseInGraph doInitialVioletTransform(const NGHolder &h, +RoseInGraph doInitialVioletTransform(const NGHolder &h, bool last_chance, const CompileContext &cc) { assert(!can_never_match(h)); @@ -2880,7 +2940,7 @@ RoseInGraph doInitialVioletTransform(const NGHolder &h, DEBUG_PRINTF("hello world\n"); /* Step 1: avoid outfixes as we always have to run them. */ - avoidOutfixes(vg, cc); + avoidOutfixes(vg, last_chance, cc); if (num_vertices(vg) <= 2) { return vg; /* unable to transform pattern */ @@ -2927,7 +2987,7 @@ RoseInGraph doInitialVioletTransform(const NGHolder &h, bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, bool last_chance, const ReportManager &rm, const CompileContext &cc) { - auto vg = doInitialVioletTransform(h, cc); + auto vg = doInitialVioletTransform(h, last_chance, cc); if (num_vertices(vg) <= 2) { return false; } @@ -2946,7 +3006,7 @@ bool doViolet(RoseBuild &rose, const NGHolder &h, bool prefilter, bool checkViolet(const ReportManager &rm, const NGHolder &h, bool prefilter, const CompileContext &cc) { - auto vg = doInitialVioletTransform(h, cc); + auto vg = doInitialVioletTransform(h, true, cc); if (num_vertices(vg) <= 2) { return false; } From fbaa0a1b25b5c9992c1bc668297706f45bbbf3d5 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Tue, 31 Jan 2017 09:09:57 +1100 Subject: [PATCH 050/326] make expected too large patterns even larger --- unit/hyperscan/bad_patterns.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unit/hyperscan/bad_patterns.txt b/unit/hyperscan/bad_patterns.txt index 1a33210d..37307bc9 100644 --- a/unit/hyperscan/bad_patterns.txt +++ b/unit/hyperscan/bad_patterns.txt @@ -70,8 +70,8 @@ 70:/foo[^\x00-\xff]/ #Pattern can never match. 71:/foo[^\x00-\xff]$/ #Pattern can never match. 72:/\Bd\B/i{min_length=2,min_offset=4,max_offset=54} #Expression has min_length=2 but can only produce matches of length 1 bytes at most. -73:/(((.|aaa)aaaaaa.aaa){14,19}a((a|a{5,6}|aa){3,11}|aa.|a){2}){4}\Z/sm #Pattern is too large. -74:/(((.|aaa)aaaaaa.aaa){14,19}a((a|a{5,6}|aa){3,11}|aa.|a){2}){4}\Z/smL #Pattern is too large. +73:/(((.|aaa)aaaaaa.aaa){14,19}a((a|a{5,6}|aa){3,11}|aa.|a){2}){40}\Z/sm #Pattern is too large. +74:/(((.|aaa)aaaaaa.aaa){14,19}a((a|a{5,6}|aa){3,11}|aa.|a){2}){40}\Z/smL #Pattern is too large. 75:/\B/s8{min_length=1} #Expression has min_length=1 but can only produce matches of length 0 bytes at most. 76:/(f|d|(\b)|i|a\Z)/mHV8{min_length=2,min_offset=9,max_offset=14} #Expression has min_length=2 but can only produce matches of length 1 bytes at most. 77:/(f|e|d{19,}|h\Z|^j|\Aa)/smi{min_length=7,min_offset=8,max_offset=18} #Extended parameter constraints can not be satisfied for any match from this expression. From bbd64f98ae49e34e6ce405f57b699201ed686b1c Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Tue, 31 Jan 2017 09:29:41 +1100 Subject: [PATCH 051/326] allow streams to marked as exhausted in more cases At stream boundaries, we can mark streams as exhausted if there are no groups active and there are no other ways to report matches. This allows us to stop maintaining the history buffer on subsequent stream writes. Previously, streams were only marked as exhausted if a pure highlander case reported all patterns or the outfix in a sole outfix case died. --- src/hwlm/hwlm.c | 7 ++- src/rose/rose_build_add.cpp | 4 +- src/rose/rose_build_bytecode.cpp | 14 ++++- src/rose/rose_build_compile.cpp | 8 ++- src/rose/rose_build_groups.cpp | 95 ++++++++++++++++++++++++++------ src/rose/rose_build_impl.h | 2 + src/rose/rose_build_misc.cpp | 7 +++ src/rose/stream.c | 38 ++++++++++++- src/runtime.c | 6 +- src/scratch.h | 9 ++- src/util/container.h | 11 ++++ 11 files changed, 171 insertions(+), 30 deletions(-) diff --git a/src/hwlm/hwlm.c b/src/hwlm/hwlm.c index 3c7615a7..6eaa7ed1 100644 --- a/src/hwlm/hwlm.c +++ b/src/hwlm/hwlm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -172,6 +172,8 @@ void do_accel_streaming(const union AccelAux *aux, const u8 *hbuf, size_t hlen, hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len, size_t start, HWLMCallback cb, void *ctxt, hwlm_group_t groups) { + assert(t); + DEBUG_PRINTF("buf len=%zu, start=%zu, groups=%llx\n", len, start, groups); if (!groups) { DEBUG_PRINTF("groups all off\n"); @@ -201,6 +203,9 @@ hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len, hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch, size_t len, size_t start, HWLMCallback cb, void *ctxt, hwlm_group_t groups) { + assert(t); + assert(scratch); + const u8 *hbuf = scratch->core_info.hbuf; const size_t hlen = scratch->core_info.hlen; const u8 *buf = scratch->core_info.buf; diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index e6861ea4..01d7d827 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -722,7 +722,9 @@ void makeEodEventLeftfix(RoseBuildImpl &build, RoseVertex u, RoseEdge e = add_edge(v, w, g); g[e].minBound = 0; g[e].maxBound = 0; - g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE; + /* No need to set history as the event is only delivered at the last + * byte anyway - no need to invalidate stale entries. */ + g[e].history = ROSE_ROLE_HISTORY_NONE; DEBUG_PRINTF("accept eod vertex (index=%zu)\n", g[w].index); } } diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index b4270278..736e0d35 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -172,6 +172,7 @@ struct RoseResources { bool has_lit_delay = false; bool has_lit_check = false; // long literal support bool has_anchored = false; + bool has_floating = false; bool has_eod = false; }; @@ -352,6 +353,11 @@ bool needsCatchup(const RoseBuildImpl &build, static bool isPureFloating(const RoseResources &resources, const CompileContext &cc) { + if (!resources.has_floating) { + DEBUG_PRINTF("no floating table\n"); + return false; + } + if (resources.has_outfixes || resources.has_suffixes || resources.has_leftfixes) { DEBUG_PRINTF("has engines\n"); @@ -429,6 +435,7 @@ u8 pickRuntimeImpl(const RoseBuildImpl &build, const build_context &bc, DEBUG_PRINTF("has_lit_delay=%d\n", bc.resources.has_lit_delay); DEBUG_PRINTF("has_lit_check=%d\n", bc.resources.has_lit_check); DEBUG_PRINTF("has_anchored=%d\n", bc.resources.has_anchored); + DEBUG_PRINTF("has_floating=%d\n", bc.resources.has_floating); DEBUG_PRINTF("has_eod=%d\n", bc.resources.has_eod); if (isPureFloating(bc.resources, build.cc)) { @@ -539,7 +546,10 @@ void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount, // Get the mask of initial vertices due to root and anchored_root. rose_group RoseBuildImpl::getInitialGroups() const { - rose_group groups = getSuccGroups(root) | getSuccGroups(anchored_root); + rose_group groups = getSuccGroups(root) + | getSuccGroups(anchored_root) + | boundary_group_mask; + DEBUG_PRINTF("initial groups = %016llx\n", groups); return groups; } @@ -2227,6 +2237,7 @@ u32 buildLastByteIter(const RoseGraph &g, build_context &bc) { auto it = bc.roleStateIndices.find(v); if (it != end(bc.roleStateIndices)) { lb_roles.push_back(it->second); + DEBUG_PRINTF("last byte %u\n", it->second); } } @@ -5521,6 +5532,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { currOffset = ROUNDUP_CL(currOffset); fmatcherOffset = currOffset; currOffset += verify_u32(fsize); + bc.resources.has_floating = true; } // Build EOD-anchored HWLM matcher. diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 24df8427..63b5bd0f 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -1236,11 +1236,15 @@ void buildRoseSquashMasks(RoseBuildImpl &tbi) { } } - rose_group unsquashable = 0; + rose_group unsquashable = tbi.boundary_group_mask; for (u32 lit_id : lit_ids) { const rose_literal_info &info = tbi.literal_info[lit_id]; - if (info.vertices.size() > 1 || !info.delayed_ids.empty()) { + if (!info.delayed_ids.empty() + || !all_of_in(info.vertices, + [&](RoseVertex v) { + return left == tbi.g[v].left; })) { + DEBUG_PRINTF("group %llu is unsquashable\n", info.group_mask); unsquashable |= info.group_mask; } } diff --git a/src/rose/rose_build_groups.cpp b/src/rose/rose_build_groups.cpp index 0a1c501f..a253ef04 100644 --- a/src/rose/rose_build_groups.cpp +++ b/src/rose/rose_build_groups.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,6 +33,9 @@ #include "rose_build_groups.h" +#include "util/boundary_reports.h" +#include "util/compile_context.h" + #include #include @@ -71,24 +74,18 @@ bool superStrong(const rose_literal_id &lit) { static bool eligibleForAlwaysOnGroup(const RoseBuildImpl &build, u32 id) { - /* returns true if it or any of its delay versions have root role */ - for (auto v : build.literal_info[id].vertices) { - if (build.isRootSuccessor(v)) { - NGHolder *h = build.g[v].left.graph.get(); - if (!h || proper_out_degree(h->startDs, *h)) { - return true; - } - } + auto eligble = [&](RoseVertex v) { + return build.isRootSuccessor(v) + && (!build.g[v].left || !isAnchored(build.g[v].left)); + }; + + if (any_of_in(build.literal_info[id].vertices, eligble)) { + return true; } for (u32 delayed_id : build.literal_info[id].delayed_ids) { - for (auto v : build.literal_info[delayed_id].vertices) { - if (build.isRootSuccessor(v)) { - NGHolder *h = build.g[v].left.graph.get(); - if (!h || proper_out_degree(h->startDs, *h)) { - return true; - } - } + if (any_of_in(build.literal_info[delayed_id].vertices, eligble)) { + return true; } } @@ -170,6 +167,64 @@ u32 next_available_group(u32 counter, u32 min_start_group) { return counter; } +static +void allocateGroupForBoundary(RoseBuildImpl &build, u32 group_always_on, + map &groupCount) { + /* Boundary reports at zero will always fired and forgotten, no need to + * worry about preventing the stream being marked as exhausted */ + if (build.boundary.report_at_eod.empty()) { + return; + } + + /* Group based stream exhaustion is only done at stream boundaries */ + if (!build.cc.streaming) { + return; + } + + DEBUG_PRINTF("allocating %u as boundary group id\n", group_always_on); + + build.boundary_group_mask = 1ULL << group_always_on; + groupCount[group_always_on]++; +} + +static +void allocateGroupForEvent(RoseBuildImpl &build, u32 group_always_on, + map &groupCount, u32 *counter) { + if (build.eod_event_literal_id == MO_INVALID_IDX) { + return; + } + + /* Group based stream exhaustion is only done at stream boundaries */ + if (!build.cc.streaming) { + return; + } + + rose_literal_info &info = build.literal_info[build.eod_event_literal_id]; + + if (info.vertices.empty()) { + return; + } + + bool new_group = !groupCount[group_always_on]; + for (RoseVertex v : info.vertices) { + if (build.g[v].left && !isAnchored(build.g[v].left)) { + new_group = false; + } + } + + u32 group; + if (!new_group) { + group = group_always_on; + } else { + group = *counter; + *counter += 1; + } + + DEBUG_PRINTF("allocating %u as eod event group id\n", *counter); + info.group_mask = 1ULL << group; + groupCount[group]++; +} + void assignGroupsToLiterals(RoseBuildImpl &build) { auto &literals = build.literals; auto &literal_info = build.literal_info; @@ -211,6 +266,9 @@ void assignGroupsToLiterals(RoseBuildImpl &build) { counter++; } + allocateGroupForBoundary(build, group_always_on, groupCount); + allocateGroupForEvent(build, group_always_on, groupCount, &counter); + u32 min_start_group = counter; priority_queue> pq; @@ -453,6 +511,7 @@ rose_group getSquashableGroups(const RoseBuildImpl &build) { } DEBUG_PRINTF("squashable groups=0x%llx\n", squashable_groups); + assert(!(squashable_groups & build.boundary_group_mask)); return squashable_groups; } @@ -505,7 +564,7 @@ bool isGroupSquasher(const RoseBuildImpl &build, const u32 id /* literal id */, lit_info.group_mask); if (build.literals.right.at(id).table == ROSE_EVENT) { - DEBUG_PRINTF("event literal, has no groups to squash\n"); + DEBUG_PRINTF("event literal\n"); return false; } @@ -628,7 +687,7 @@ bool isGroupSquasher(const RoseBuildImpl &build, const u32 id /* literal id */, } void findGroupSquashers(RoseBuildImpl &build) { - rose_group forbidden_squash_group = 0; + rose_group forbidden_squash_group = build.boundary_group_mask; for (const auto &e : build.literals.right) { if (e.second.delay) { forbidden_squash_group |= build.literal_info[e.first].group_mask; diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 2cefb42a..bfdca80c 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -601,6 +601,8 @@ public: u32 max_rose_anchored_floating_overlap; + rose_group boundary_group_mask = 0; + QueueIndexFactory qif; ReportManager &rm; SomSlotManager &ssm; diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index b33192da..142bf138 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -1082,6 +1082,13 @@ bool isAnchored(const left_id &r) { if (r.graph()) { return isAnchored(*r.graph()); } + if (r.dfa()) { + return r.dfa()->start_anchored == DEAD_STATE; + } + if (r.haig()) { + return r.haig()->start_anchored == DEAD_STATE; + } + // All other types are explicitly anchored. return true; } diff --git a/src/rose/stream.c b/src/rose/stream.c index 9599612f..17139b25 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -512,6 +512,34 @@ void runEagerPrefixesStream(const struct RoseEngine *t, } } +static really_inline +int can_never_match(const struct RoseEngine *t, char *state, + struct hs_scratch *scratch, size_t length, u64a offset) { + struct RoseContext *tctxt = &scratch->tctxt; + + if (tctxt->groups) { + DEBUG_PRINTF("still has active groups\n"); + return 0; + } + + if (offset + length <= t->anchoredDistance) { /* not < as may have eod */ + DEBUG_PRINTF("still in anchored region\n"); + return 0; + } + + if (t->lastByteHistoryIterOffset) { /* last byte history is hard */ + DEBUG_PRINTF("last byte history\n"); + return 0; + } + + if (mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) { + DEBUG_PRINTF("active leaf\n"); + return 0; + } + + return 1; +} + void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { DEBUG_PRINTF("OH HAI [%llu, %llu)\n", scratch->core_info.buf_offset, scratch->core_info.buf_offset + (u64a)scratch->core_info.len); @@ -647,6 +675,14 @@ exit: if (!can_stop_matching(scratch)) { ensureStreamNeatAndTidy(t, state, scratch, length, offset); } + + if (!told_to_stop_matching(scratch) + && can_never_match(t, state, scratch, length, offset)) { + DEBUG_PRINTF("PATTERN SET IS EXHAUSTED\n"); + scratch->core_info.status = STATUS_EXHAUSTED; + return; + } + DEBUG_PRINTF("DONE STREAMING SCAN, status = %u\n", scratch->core_info.status); return; diff --git a/src/runtime.c b/src/runtime.c index a2ed1026..1ee3efa5 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -214,7 +214,7 @@ void pureLiteralBlockExec(const struct RoseEngine *rose, scratch->tctxt.groups = rose->initialGroups; hwlmExec(ftable, buffer, length, 0, roseCallback, scratch, - rose->initialGroups); + rose->initialGroups & rose->floating_group_mask); } static really_inline @@ -762,7 +762,7 @@ void pureLiteralStreamExec(struct hs_stream *stream_state, const size_t start = 0; hwlmExecStreaming(ftable, scratch, len2, start, roseCallback, scratch, - rose->initialGroups); + rose->initialGroups & rose->floating_group_mask); if (!told_to_stop_matching(scratch) && isAllExhausted(rose, scratch->core_info.exhaustionVector)) { diff --git a/src/scratch.h b/src/scratch.h index b59dc8d4..47f8afa8 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -73,8 +73,11 @@ struct catchup_pq { /** \brief Status flag: user requested termination. */ #define STATUS_TERMINATED (1U << 0) -/** \brief Status flag: all possible matches on this stream have - * been raised (i.e. all its exhaustion keys are on.) */ +/** \brief Status flag: it has been determined that it is not possible for this + * stream to raise any more matches. + * + * This may be because all its exhaustion keys are on or for other reasons + * (anchored sections not matching). */ #define STATUS_EXHAUSTED (1U << 1) /** \brief Status flag: Rose requires rebuild as delay literal matched in diff --git a/src/util/container.h b/src/util/container.h index 24f01fd2..e8a16418 100644 --- a/src/util/container.h +++ b/src/util/container.h @@ -202,6 +202,17 @@ void erase_all(C *container, const D &donor) { } } + +template +bool any_of_in(const C &c, Pred p) { + return std::any_of(c.begin(), c.end(), std::move(p)); +} + +template +bool all_of_in(const C &c, Pred p) { + return std::all_of(c.begin(), c.end(), std::move(p)); +} + } // namespace ue2 #ifdef DUMP_SUPPORT From 1fad82273542b57719f84ab754ec373576483053 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 25 Jan 2017 11:29:45 +1100 Subject: [PATCH 052/326] violet: use bitset to speed up poisonFromSuccessor --- src/nfagraph/ng_violet.cpp | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index 6ca6f187..1cf3b716 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -69,6 +69,7 @@ #include #include #include +#include #include #define STAGE_DEBUG_PRINTF DEBUG_PRINTF @@ -718,27 +719,39 @@ void poisonFromSuccessor(const NGHolder &h, const ue2_literal &succ, DEBUG_PRINTF("poisoning holder of size %zu, succ len %zu\n", num_vertices(h), succ.length()); - map > curr; + using EdgeSet = boost::dynamic_bitset<>; + + const size_t edge_count = num_edges(h); + EdgeSet bad_edges(edge_count); + + unordered_map curr; for (const auto &e : in_edges_range(h.accept, h)) { - curr[source(e, h)].insert(e); + auto &path_set = curr[source(e, h)]; + if (path_set.empty()) { + path_set.resize(edge_count); + } + path_set.set(h[e].index); } - map > next; + unordered_map next; for (auto it = succ.rbegin(); it != succ.rend(); ++it) { for (const auto &path : curr) { NFAVertex u = path.first; const auto &path_set = path.second; if (u == h.start && overhang_ok) { DEBUG_PRINTF("poisoning early %zu [overhang]\n", - path_set.size()); - insert(&bad, path_set); + path_set.count()); + bad_edges |= path_set; continue; } if (overlaps(h[u].char_reach, *it)) { for (const auto &e : in_edges_range(u, h)) { auto &new_path_set = next[source(e, h)]; - insert(&new_path_set, path_set); - new_path_set.insert(e); + if (new_path_set.empty()) { + new_path_set.resize(edge_count); + } + new_path_set |= path_set; + new_path_set.set(h[e].index); } } } @@ -750,8 +763,14 @@ void poisonFromSuccessor(const NGHolder &h, const ue2_literal &succ, assert(overhang_ok || !curr.empty()); for (const auto &path : curr) { - insert(&bad, path.second); - DEBUG_PRINTF("poisoning %zu vertices\n", path.second.size()); + bad_edges |= path.second; + DEBUG_PRINTF("poisoning %zu vertices\n", path.second.count()); + } + + for (const auto &e : edges_range(h)) { + if (bad_edges.test(h[e].index)) { + bad.insert(e); + } } } From 5706acf5c00556be2c2b586c7e01aa4f7142d42e Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 1 Feb 2017 13:08:20 +1100 Subject: [PATCH 053/326] role aliasing: bail if new graph is not implementable --- src/rose/rose_build_role_aliasing.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/rose/rose_build_role_aliasing.cpp b/src/rose/rose_build_role_aliasing.cpp index 60142156..3ad4566e 100644 --- a/src/rose/rose_build_role_aliasing.cpp +++ b/src/rose/rose_build_role_aliasing.cpp @@ -1125,6 +1125,8 @@ bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, shared_ptr a_h = a_left.graph; shared_ptr b_h = b_left.graph; assert(a_h && b_h); + assert(isImplementableNFA(*a_h, nullptr, build.cc)); + assert(isImplementableNFA(*b_h, nullptr, build.cc)); // If we only differ in reports, this is a very easy merge. Just use b's // report for both. @@ -1215,6 +1217,11 @@ bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, duplicateReport(*new_graph, b_left.leftfix_report, new_report); pruneAllOtherReports(*new_graph, new_report); + if (!isImplementableNFA(*new_graph, nullptr, build.cc)) { + DEBUG_PRINTF("new graph not implementable\n"); + return false; + } + rai.rev_leftfix[a_left_id].erase(a); rai.rev_leftfix[b_left_id].erase(b); pruneUnusedTops(*a_h, g, rai.rev_leftfix[a_left_id]); From ac858cd47c079f4dbfe6c39ef9622d72a9986ddd Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 25 Jan 2017 14:45:12 +1100 Subject: [PATCH 054/326] rose: build a separate delay rebuild matcher --- src/rose/rose_build_bytecode.cpp | 16 +++++++++++ src/rose/rose_build_dump.cpp | 14 +++++---- src/rose/rose_build_matchers.cpp | 49 ++++++++++++++++++++++++++++---- src/rose/rose_build_matchers.h | 8 ++++-- src/rose/rose_dump.cpp | 14 +++++++++ src/rose/rose_internal.h | 1 + src/rose/stream.c | 16 +++++++---- 7 files changed, 100 insertions(+), 18 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 736e0d35..321d1ca2 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -5535,6 +5535,17 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { bc.resources.has_floating = true; } + // Build delay rebuild HWLM matcher. + size_t drsize = 0; + auto drtable = buildDelayRebuildMatcher(*this, bc.longLitLengthThreshold, + final_to_frag_map, &drsize); + u32 drmatcherOffset = 0; + if (drtable) { + currOffset = ROUNDUP_CL(currOffset); + drmatcherOffset = currOffset; + currOffset += verify_u32(drsize); + } + // Build EOD-anchored HWLM matcher. size_t esize = 0; auto etable = buildEodAnchoredMatcher(*this, final_to_frag_map, &esize); @@ -5632,6 +5643,10 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { assert(fmatcherOffset); memcpy(ptr + fmatcherOffset, ftable.get(), fsize); } + if (drtable) { + assert(drmatcherOffset); + memcpy(ptr + drmatcherOffset, drtable.get(), drsize); + } if (etable) { assert(ematcherOffset); memcpy(ptr + ematcherOffset, etable.get(), esize); @@ -5724,6 +5739,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->ematcherOffset = ematcherOffset; engine->sbmatcherOffset = sbmatcherOffset; engine->fmatcherOffset = fmatcherOffset; + engine->drmatcherOffset = drmatcherOffset; engine->longLitTableOffset = longLitTableOffset; engine->amatcherMinWidth = findMinWidth(*this, ROSE_ANCHORED); engine->fmatcherMinWidth = findMinWidth(*this, ROSE_FLOATING); diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 495d6f36..abd34629 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -507,23 +507,27 @@ void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) { const auto final_to_frag_map = groupByFragment(build); - auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_ANCHORED, + auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_ANCHORED, false, longLitLengthThreshold); dumpTestLiterals(base + "rose_anchored_test_literals.txt", mp.lits); - mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, + mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, false, longLitLengthThreshold); dumpTestLiterals(base + "rose_float_test_literals.txt", mp.lits); - mp = makeMatcherProto(build, final_to_frag_map, ROSE_EOD_ANCHORED, + mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, true, + longLitLengthThreshold); + dumpTestLiterals(base + "rose_delay_rebuild_test_literals.txt", mp.lits); + + mp = makeMatcherProto(build, final_to_frag_map, ROSE_EOD_ANCHORED, false, build.ematcher_region_size); dumpTestLiterals(base + "rose_eod_test_literals.txt", mp.lits); if (!build.cc.streaming) { - mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, + mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, false, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); auto mp2 = makeMatcherProto(build, final_to_frag_map, - ROSE_ANCHORED_SMALL_BLOCK, + ROSE_ANCHORED_SMALL_BLOCK, false, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); mp.lits.insert(end(mp.lits), begin(mp2.lits), end(mp2.lits)); dumpTestLiterals(base + "rose_smallblock_test_literals.txt", mp.lits); diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index e16a0ac7..9f770973 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -666,8 +666,8 @@ void trim_to_suffix(Container &c, size_t len) { MatcherProto makeMatcherProto(const RoseBuildImpl &build, const map &final_to_frag_map, - rose_literal_table table, size_t max_len, - u32 max_offset) { + rose_literal_table table, bool delay_rebuild, + size_t max_len, u32 max_offset) { MatcherProto mp; for (const auto &e : build.literals.right) { @@ -694,6 +694,13 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, DEBUG_PRINTF("lit='%s' (len %zu)\n", escapeString(lit).c_str(), lit.length()); + // When building the delay rebuild table, we only want to include + // literals that have delayed variants. + if (delay_rebuild && info.delayed_ids.empty()) { + DEBUG_PRINTF("not needed for delay rebuild\n"); + continue; + } + if (max_offset != ROSE_BOUND_INF) { u64a min_report = literalMinReportOffset(build, e.second, info); if (min_report > max_offset) { @@ -802,7 +809,7 @@ buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, *fsize = 0; *fgroups = 0; - auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, + auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, false, longLitLengthThreshold); if (mp.lits.empty()) { DEBUG_PRINTF("empty floating matcher\n"); @@ -832,6 +839,36 @@ buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, return hwlm; } +aligned_unique_ptr buildDelayRebuildMatcher( + const RoseBuildImpl &build, size_t longLitLengthThreshold, + const map &final_to_frag_map, size_t *drsize) { + *drsize = 0; + + if (!build.cc.streaming) { + DEBUG_PRINTF("not streaming\n"); + return nullptr; + } + + auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, true, + longLitLengthThreshold); + if (mp.lits.empty()) { + DEBUG_PRINTF("empty delay rebuild matcher\n"); + return nullptr; + } + + auto hwlm = hwlmBuild(mp.lits, false, build.cc, build.getInitialGroups()); + if (!hwlm) { + throw CompileError("Unable to generate bytecode."); + } + + buildAccel(build, mp, *hwlm); + + *drsize = hwlmSize(hwlm.get()); + assert(*drsize); + DEBUG_PRINTF("built delay rebuild table size %zu bytes\n", *drsize); + return hwlm; +} + aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, const map &final_to_frag_map, size_t *sbsize) { @@ -849,7 +886,7 @@ buildSmallBlockMatcher(const RoseBuildImpl &build, return nullptr; } - auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, + auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, false, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); if (mp.lits.empty()) { DEBUG_PRINTF("no floating table\n"); @@ -861,7 +898,7 @@ buildSmallBlockMatcher(const RoseBuildImpl &build, auto mp_anchored = makeMatcherProto(build, final_to_frag_map, ROSE_ANCHORED_SMALL_BLOCK, - ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); + false, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); if (mp_anchored.lits.empty()) { DEBUG_PRINTF("no small-block anchored literals\n"); return nullptr; @@ -898,7 +935,7 @@ buildEodAnchoredMatcher(const RoseBuildImpl &build, *esize = 0; auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_EOD_ANCHORED, - build.ematcher_region_size); + false, build.ematcher_region_size); if (mp.lits.empty()) { DEBUG_PRINTF("no eod anchored literals\n"); diff --git a/src/rose/rose_build_matchers.h b/src/rose/rose_build_matchers.h index a1817307..b06d460f 100644 --- a/src/rose/rose_build_matchers.h +++ b/src/rose/rose_build_matchers.h @@ -69,8 +69,8 @@ struct MatcherProto { */ MatcherProto makeMatcherProto(const RoseBuildImpl &build, const std::map &final_to_frag_map, - rose_literal_table table, size_t max_len, - u32 max_offset = ROSE_BOUND_INF); + rose_literal_table table, bool delay_rebuild, + size_t max_len, u32 max_offset = ROSE_BOUND_INF); aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, @@ -79,6 +79,10 @@ aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, size_t *fsize, size_t *historyRequired); +aligned_unique_ptr buildDelayRebuildMatcher( + const RoseBuildImpl &build, size_t longLitLengthThreshold, + const std::map &final_to_frag_map, size_t *drsize); + aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, const std::map &final_to_frag_map, diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 96f49688..d83f8f9e 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -108,6 +108,11 @@ const HWLM *getFloatingMatcher(const RoseEngine *t) { return (const HWLM *)loadFromByteCodeOffset(t, t->fmatcherOffset); } +static +const HWLM *getDelayRebuildMatcher(const RoseEngine *t) { + return (const HWLM *)loadFromByteCodeOffset(t, t->drmatcherOffset); +} + static const HWLM *getEodMatcher(const RoseEngine *t) { return (const HWLM *)loadFromByteCodeOffset(t, t->ematcherOffset); @@ -1158,6 +1163,7 @@ void roseDumpText(const RoseEngine *t, FILE *f) { const void *atable = getAnchoredMatcher(t); const HWLM *ftable = getFloatingMatcher(t); + const HWLM *drtable = getDelayRebuildMatcher(t); const HWLM *etable = getEodMatcher(t); const HWLM *sbtable = getSmallBlockMatcher(t); @@ -1212,6 +1218,8 @@ void roseDumpText(const RoseEngine *t, FILE *f) { } else { fprintf(f, "\n"); } + fprintf(f, " - delay-rb matcher : %zu bytes\n", + drtable ? hwlmSize(drtable) : 0); fprintf(f, " - eod-anch matcher : %zu bytes over last %u bytes\n", etable ? hwlmSize(etable) : 0, t->ematcherRegionSize); fprintf(f, " - small-blk matcher : %zu bytes over %u bytes\n", @@ -1274,6 +1282,11 @@ void roseDumpText(const RoseEngine *t, FILE *f) { hwlmPrintStats(ftable, f); } + if (drtable) { + fprintf(f, "\nDelay Rebuild literal matcher stats:\n\n"); + hwlmPrintStats(drtable, f); + } + if (etable) { fprintf(f, "\nEOD-anchored literal matcher stats:\n\n"); hwlmPrintStats(etable, f); @@ -1322,6 +1335,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, amatcherOffset); DUMP_U32(t, ematcherOffset); DUMP_U32(t, fmatcherOffset); + DUMP_U32(t, drmatcherOffset); DUMP_U32(t, sbmatcherOffset); DUMP_U32(t, longLitTableOffset); DUMP_U32(t, amatcherMinWidth); diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index bf6e9a86..8e55a37d 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -326,6 +326,7 @@ struct RoseEngine { u32 amatcherOffset; // offset of the anchored literal matcher (bytes) u32 ematcherOffset; // offset of the eod-anchored literal matcher (bytes) u32 fmatcherOffset; // offset of the floating literal matcher (bytes) + u32 drmatcherOffset; // offset of the delayed rebuild table (bytes) u32 sbmatcherOffset; // offset of the small-block literal matcher (bytes) u32 longLitTableOffset; // offset of the long literal table u32 amatcherMinWidth; /**< minimum number of bytes required for a pattern diff --git a/src/rose/stream.c b/src/rose/stream.c index 17139b25..31224276 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -412,16 +412,22 @@ void ensureStreamNeatAndTidy(const struct RoseEngine *t, char *state, } static really_inline -void do_rebuild(const struct RoseEngine *t, const struct HWLM *ftable, - struct hs_scratch *scratch) { +void do_rebuild(const struct RoseEngine *t, struct hs_scratch *scratch) { assert(!can_stop_matching(scratch)); + + if (!t->drmatcherOffset) { + DEBUG_PRINTF("no delayed rebuild table\n"); + return; + } + + const struct HWLM *hwlm = getByOffset(t, t->drmatcherOffset); size_t len = MIN(scratch->core_info.hlen, t->delayRebuildLength); const u8 *buf = scratch->core_info.hbuf + scratch->core_info.hlen - len; DEBUG_PRINTF("BEGIN FLOATING REBUILD over %zu bytes\n", len); scratch->core_info.status &= ~STATUS_DELAY_DIRTY; - hwlmExec(ftable, buf, len, 0, roseDelayRebuildCallback, scratch, + hwlmExec(hwlm, buf, len, 0, roseDelayRebuildCallback, scratch, scratch->tctxt.groups); assert(!can_stop_matching(scratch)); } @@ -637,13 +643,13 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { if (!flen) { if (rebuild) { /* rebuild floating delayed match stuff */ - do_rebuild(t, ftable, scratch); + do_rebuild(t, scratch); } goto flush_delay_and_exit; } if (rebuild) { /* rebuild floating delayed match stuff */ - do_rebuild(t, ftable, scratch); + do_rebuild(t, scratch); } if (flen + offset <= t->floatingMinDistance) { From 76f72b6ab44e9eae387035b95833e6bf88d3d74c Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 30 Jan 2017 09:14:03 +1100 Subject: [PATCH 055/326] rose: use program offsets directly in lit tables --- src/rose/match.c | 26 +++++++-------------- src/rose/rose_build_anchored.cpp | 15 ++++++------ src/rose/rose_build_anchored.h | 5 ++-- src/rose/rose_build_bytecode.cpp | 40 ++++++++++++++++++-------------- src/rose/rose_build_impl.h | 9 ++++++- src/rose/rose_build_matchers.cpp | 24 +++++++++++-------- src/rose/rose_build_matchers.h | 26 ++++++++++----------- 7 files changed, 76 insertions(+), 69 deletions(-) diff --git a/src/rose/match.c b/src/rose/match.c index 9a702804..c7c73d25 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -85,19 +85,13 @@ hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, DEBUG_PRINTF("STATE groups=0x%016llx\n", tctx->groups); - const u32 *delayRebuildPrograms = - getByOffset(t, t->litDelayRebuildProgramOffset); - assert(id < t->literalCount); - const u32 program = delayRebuildPrograms[id]; - - if (program) { - const u64a som = 0; - const size_t match_len = end - start + 1; - const u8 flags = 0; - UNUSED hwlmcb_rv_t rv = roseRunProgram(t, scratch, program, som, - real_end, match_len, flags); - assert(rv != HWLM_TERMINATE_MATCHING); - } + assert(id < t->size); // id is a program offset + const u64a som = 0; + const size_t match_len = end - start + 1; + const u8 flags = 0; + UNUSED hwlmcb_rv_t rv = + roseRunProgram(t, scratch, id, som, real_end, match_len, flags); + assert(rv != HWLM_TERMINATE_MATCHING); /* we are just repopulating the delay queue, groups should be * already set from the original scan. */ @@ -245,12 +239,10 @@ hwlmcb_rv_t roseProcessMatchInline(const struct RoseEngine *t, struct hs_scratch *scratch, u64a end, size_t match_len, u32 id) { DEBUG_PRINTF("id=%u\n", id); - const u32 *programs = getByOffset(t, t->litProgramOffset); - assert(id < t->literalCount); + assert(id < t->size); // id is an offset into bytecode const u64a som = 0; const u8 flags = 0; - return roseRunProgram_i(t, scratch, programs[id], som, end, match_len, - flags); + return roseRunProgram_i(t, scratch, id, som, end, match_len, flags); } static rose_inline diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index ea565eaa..77eef142 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -208,8 +208,8 @@ void remapAnchoredReports(RoseBuildImpl &build) { * raw_dfa with program offsets. */ static -void remapIdsToPrograms(raw_dfa &rdfa, const vector &litPrograms, - const map &final_to_frag_map) { +void remapIdsToPrograms(raw_dfa &rdfa, + const map &final_to_frag_map) { for (dstate &ds : rdfa.states) { assert(ds.reports_eod.empty()); // Not used in anchored matcher. if (ds.reports.empty()) { @@ -219,9 +219,8 @@ void remapIdsToPrograms(raw_dfa &rdfa, const vector &litPrograms, flat_set new_reports; for (auto final_id : ds.reports) { assert(contains(final_to_frag_map, final_id)); - auto frag_id = final_to_frag_map.at(final_id); - assert(frag_id < litPrograms.size()); - new_reports.insert(litPrograms.at(frag_id)); + auto &frag = final_to_frag_map.at(final_id); + new_reports.insert(frag.lit_program_offset); } ds.reports = move(new_reports); } @@ -849,8 +848,8 @@ vector buildAnchoredDfas(RoseBuildImpl &build) { aligned_unique_ptr buildAnchoredMatcher(RoseBuildImpl &build, vector &dfas, - const vector &litPrograms, - const map &final_to_frag_map, size_t *asize) { + const map &final_to_frag_map, + size_t *asize) { const CompileContext &cc = build.cc; if (dfas.empty()) { @@ -860,7 +859,7 @@ buildAnchoredMatcher(RoseBuildImpl &build, vector &dfas, } for (auto &rdfa : dfas) { - remapIdsToPrograms(rdfa, litPrograms, final_to_frag_map); + remapIdsToPrograms(rdfa, final_to_frag_map); } vector> nfas; diff --git a/src/rose/rose_build_anchored.h b/src/rose/rose_build_anchored.h index fa379ff6..3d411bd7 100644 --- a/src/rose/rose_build_anchored.h +++ b/src/rose/rose_build_anchored.h @@ -30,7 +30,7 @@ #define ROSE_BUILD_ANCHORED #include "ue2common.h" -#include "rose_build.h" +#include "rose_build_impl.h" #include "nfagraph/ng_holder.h" #include "util/alloc.h" @@ -59,8 +59,7 @@ std::vector buildAnchoredDfas(RoseBuildImpl &build); */ aligned_unique_ptr buildAnchoredMatcher(RoseBuildImpl &build, std::vector &dfas, - const std::vector &litPrograms, - const std::map &final_to_frag_map, + const std::map &final_to_frag_map, size_t *asize); u32 anchoredStateSize(const anchored_matcher_info &atable); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 321d1ca2..92e9aa59 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -213,10 +213,6 @@ struct build_context : boost::noncopyable { * that have already been pushed into the engine_blob. */ ue2::unordered_map engineOffsets; - /** \brief Literal programs, indexed by final_id, after they have been - * written to the engine_blob. */ - vector litPrograms; - /** \brief List of long literals (ones with CHECK_LONG_LIT instructions) * that need hash table support. */ vector longLiterals; @@ -4578,6 +4574,10 @@ u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc, static u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc, const flat_set &final_ids) { + if (!build.cc.streaming) { + return 0; // We only do delayed rebuild in streaming mode. + } + RoseProgram program; for (const auto &final_id : final_ids) { @@ -4649,9 +4649,9 @@ rose_literal_id getFragment(const rose_literal_id &lit) { return frag; } -map groupByFragment(const RoseBuildImpl &build) { +map groupByFragment(const RoseBuildImpl &build) { u32 frag_id = 0; - map final_to_frag; + map final_to_frag; map> frag_lits; for (const auto &m : build.final_id_to_literal) { @@ -4660,21 +4660,21 @@ map groupByFragment(const RoseBuildImpl &build) { assert(!lit_ids.empty()); if (lit_ids.size() > 1) { - final_to_frag.emplace(final_id, frag_id++); + final_to_frag.emplace(final_id, LitFragment(frag_id++)); continue; } const auto lit_id = *lit_ids.begin(); const auto &lit = build.literals.right.at(lit_id); if (lit.s.length() < ROSE_SHORT_LITERAL_LEN_MAX) { - final_to_frag.emplace(final_id, frag_id++); + final_to_frag.emplace(final_id, LitFragment(frag_id++)); continue; } // Combining fragments that squash their groups is unsafe. const auto &info = build.literal_info[lit_id]; if (info.squash_group) { - final_to_frag.emplace(final_id, frag_id++); + final_to_frag.emplace(final_id, LitFragment(frag_id++)); continue; } @@ -4689,7 +4689,7 @@ map groupByFragment(const RoseBuildImpl &build) { as_string_list(m.second).c_str()); for (const auto final_id : m.second) { assert(!contains(final_to_frag, final_id)); - final_to_frag.emplace(final_id, frag_id); + final_to_frag.emplace(final_id, LitFragment(frag_id)); } frag_id++; } @@ -4709,11 +4709,11 @@ map groupByFragment(const RoseBuildImpl &build) { static tuple buildLiteralPrograms(RoseBuildImpl &build, build_context &bc, - const map &final_to_frag_map) { + map &final_to_frag_map) { // Build a reverse mapping from fragment -> final_id. map> frag_to_final_map; for (const auto &m : final_to_frag_map) { - frag_to_final_map[m.second].insert(m.first); + frag_to_final_map[m.second.fragment_id].insert(m.first); } const u32 num_fragments = verify_u32(frag_to_final_map.size()); @@ -4721,7 +4721,7 @@ buildLiteralPrograms(RoseBuildImpl &build, build_context &bc, auto lit_edge_map = findEdgesByLiteral(build); - bc.litPrograms.resize(num_fragments); + vector litPrograms(num_fragments); vector delayRebuildPrograms(num_fragments); for (u32 frag_id = 0; frag_id != num_fragments; ++frag_id) { @@ -4729,14 +4729,20 @@ buildLiteralPrograms(RoseBuildImpl &build, build_context &bc, DEBUG_PRINTF("frag_id=%u, final_ids=[%s]\n", frag_id, as_string_list(final_ids).c_str()); - bc.litPrograms[frag_id] = + litPrograms[frag_id] = writeLiteralProgram(build, bc, final_ids, lit_edge_map); delayRebuildPrograms[frag_id] = buildDelayRebuildProgram(build, bc, final_ids); } + // Update LitFragment entries. + for (auto &frag : final_to_frag_map | map_values) { + frag.lit_program_offset = litPrograms[frag.fragment_id]; + frag.delay_program_offset = delayRebuildPrograms[frag.fragment_id]; + } + u32 litProgramsOffset = - bc.engine_blob.add(begin(bc.litPrograms), end(bc.litPrograms)); + bc.engine_blob.add(begin(litPrograms), end(litPrograms)); u32 delayRebuildProgramsOffset = bc.engine_blob.add( begin(delayRebuildPrograms), end(delayRebuildPrograms)); @@ -5513,8 +5519,8 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Build anchored matcher. size_t asize = 0; u32 amatcherOffset = 0; - auto atable = buildAnchoredMatcher(*this, anchored_dfas, bc.litPrograms, - final_to_frag_map, &asize); + auto atable = + buildAnchoredMatcher(*this, anchored_dfas, final_to_frag_map, &asize); if (atable) { currOffset = ROUNDUP_CL(currOffset); amatcherOffset = currOffset; diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index bfdca80c..e615d42b 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -642,7 +642,14 @@ void normaliseLiteralMask(const ue2_literal &s, std::vector &msk, bool canImplementGraphs(const RoseBuildImpl &tbi); #endif -std::map groupByFragment(const RoseBuildImpl &build); +struct LitFragment { + explicit LitFragment(u32 fragment_id_in) : fragment_id(fragment_id_in) {} + u32 fragment_id; + u32 lit_program_offset = 0; + u32 delay_program_offset = 0; +}; + +std::map groupByFragment(const RoseBuildImpl &build); } // namespace ue2 diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index 9f770973..c725b39d 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -637,12 +637,12 @@ u64a literalMinReportOffset(const RoseBuildImpl &build, static map makeFragGroupMap(const RoseBuildImpl &build, - const map &final_to_frag_map) { + const map &final_to_frag_map) { map frag_to_group; for (const auto &m : final_to_frag_map) { u32 final_id = m.first; - u32 frag_id = m.second; + u32 frag_id = m.second.fragment_id; hwlm_group_t groups = 0; const auto &lits = build.final_id_to_literal.at(final_id); for (auto lit_id : lits) { @@ -665,7 +665,7 @@ void trim_to_suffix(Container &c, size_t len) { } MatcherProto makeMatcherProto(const RoseBuildImpl &build, - const map &final_to_frag_map, + const map &final_to_frag_map, rose_literal_table table, bool delay_rebuild, size_t max_len, u32 max_offset) { MatcherProto mp; @@ -758,9 +758,11 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, for (auto &lit : mp.lits) { u32 final_id = lit.id; assert(contains(final_to_frag_map, final_id)); - lit.id = final_to_frag_map.at(final_id); - assert(contains(frag_group_map, lit.id)); - lit.groups = frag_group_map.at(lit.id); + const auto &frag = final_to_frag_map.at(final_id); + lit.id = delay_rebuild ? frag.delay_program_offset + : frag.lit_program_offset; + assert(contains(frag_group_map, frag.fragment_id)); + lit.groups = frag_group_map.at(frag.fragment_id); } sort_and_unique(mp.lits); @@ -803,7 +805,7 @@ void buildAccel(const RoseBuildImpl &build, const MatcherProto &mp, aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, - const map &final_to_frag_map, + const map &final_to_frag_map, rose_group *fgroups, size_t *fsize, size_t *historyRequired) { *fsize = 0; @@ -841,7 +843,7 @@ buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, aligned_unique_ptr buildDelayRebuildMatcher( const RoseBuildImpl &build, size_t longLitLengthThreshold, - const map &final_to_frag_map, size_t *drsize) { + const map &final_to_frag_map, size_t *drsize) { *drsize = 0; if (!build.cc.streaming) { @@ -871,7 +873,8 @@ aligned_unique_ptr buildDelayRebuildMatcher( aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, - const map &final_to_frag_map, size_t *sbsize) { + const map &final_to_frag_map, + size_t *sbsize) { *sbsize = 0; if (build.cc.streaming) { @@ -931,7 +934,8 @@ buildSmallBlockMatcher(const RoseBuildImpl &build, aligned_unique_ptr buildEodAnchoredMatcher(const RoseBuildImpl &build, - const map &final_to_frag_map, size_t *esize) { + const map &final_to_frag_map, + size_t *esize) { *esize = 0; auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_EOD_ANCHORED, diff --git a/src/rose/rose_build_matchers.h b/src/rose/rose_build_matchers.h index b06d460f..d7da113c 100644 --- a/src/rose/rose_build_matchers.h +++ b/src/rose/rose_build_matchers.h @@ -67,30 +67,30 @@ struct MatcherProto { * If max_offset is specified (and not ROSE_BOUND_INF), then literals that can * only lead to a pattern match after max_offset may be excluded. */ -MatcherProto makeMatcherProto(const RoseBuildImpl &build, - const std::map &final_to_frag_map, - rose_literal_table table, bool delay_rebuild, - size_t max_len, u32 max_offset = ROSE_BOUND_INF); +MatcherProto +makeMatcherProto(const RoseBuildImpl &build, + const std::map &final_to_frag_map, + rose_literal_table table, bool delay_rebuild, size_t max_len, + u32 max_offset = ROSE_BOUND_INF); -aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, - size_t longLitLengthThreshold, - const std::map &final_to_frag_map, - rose_group *fgroups, - size_t *fsize, - size_t *historyRequired); +aligned_unique_ptr +buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, + const std::map &final_to_frag_map, + rose_group *fgroups, size_t *fsize, + size_t *historyRequired); aligned_unique_ptr buildDelayRebuildMatcher( const RoseBuildImpl &build, size_t longLitLengthThreshold, - const std::map &final_to_frag_map, size_t *drsize); + const std::map &final_to_frag_map, size_t *drsize); aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, - const std::map &final_to_frag_map, + const std::map &final_to_frag_map, size_t *sbsize); aligned_unique_ptr buildEodAnchoredMatcher(const RoseBuildImpl &build, - const std::map &final_to_frag_map, + const std::map &final_to_frag_map, size_t *esize); void findMoreLiteralMasks(RoseBuildImpl &build); From cfa63a70964d26cf7b52d255c91f26c66dd84a86 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 31 Jan 2017 09:53:16 +1100 Subject: [PATCH 056/326] stream: simplify do_rebuild call site --- src/rose/stream.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/src/rose/stream.c b/src/rose/stream.c index 31224276..c68cd8ab 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -413,13 +413,9 @@ void ensureStreamNeatAndTidy(const struct RoseEngine *t, char *state, static really_inline void do_rebuild(const struct RoseEngine *t, struct hs_scratch *scratch) { + assert(t->drmatcherOffset); assert(!can_stop_matching(scratch)); - if (!t->drmatcherOffset) { - DEBUG_PRINTF("no delayed rebuild table\n"); - return; - } - const struct HWLM *hwlm = getByOffset(t, t->drmatcherOffset); size_t len = MIN(scratch->core_info.hlen, t->delayRebuildLength); const u8 *buf = scratch->core_info.hbuf + scratch->core_info.hlen - len; @@ -641,17 +637,14 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { rebuild, scratch->core_info.status, t->maxFloatingDelayedMatch, offset); - if (!flen) { - if (rebuild) { /* rebuild floating delayed match stuff */ - do_rebuild(t, scratch); - } - goto flush_delay_and_exit; - } - if (rebuild) { /* rebuild floating delayed match stuff */ do_rebuild(t, scratch); } + if (!flen) { + goto flush_delay_and_exit; + } + if (flen + offset <= t->floatingMinDistance) { DEBUG_PRINTF("skip FLOATING: before floating min\n"); goto flush_delay_and_exit; From 3ae2fb417e9270e7b666d51ee32f98f0e7b00eb0 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 31 Jan 2017 10:22:23 +1100 Subject: [PATCH 057/326] move final_to_frag_map into RoseBuildImpl (for dump code) --- src/rose/rose_build_bytecode.cpp | 2 +- src/rose/rose_build_dump.cpp | 21 ++++++++++----------- src/rose/rose_build_impl.h | 16 +++++++++------- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 92e9aa59..51d8da97 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -5414,7 +5414,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { DEBUG_PRINTF("longLitLengthThreshold=%zu\n", longLitLengthThreshold); allocateFinalLiteralId(*this); - auto final_to_frag_map = groupByFragment(*this); + final_to_frag_map = groupByFragment(*this); auto anchored_dfas = buildAnchoredDfas(*this); diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index abd34629..2b19e197 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -505,28 +505,27 @@ void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) { size_t longLitLengthThreshold = calcLongLitThreshold(build, historyRequired); - const auto final_to_frag_map = groupByFragment(build); - - auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_ANCHORED, false, - longLitLengthThreshold); + auto mp = makeMatcherProto(build, build.final_to_frag_map, ROSE_ANCHORED, + false, longLitLengthThreshold); dumpTestLiterals(base + "rose_anchored_test_literals.txt", mp.lits); - mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, false, + mp = makeMatcherProto(build, build.final_to_frag_map, ROSE_FLOATING, false, longLitLengthThreshold); dumpTestLiterals(base + "rose_float_test_literals.txt", mp.lits); - mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, true, + mp = makeMatcherProto(build, build.final_to_frag_map, ROSE_FLOATING, true, longLitLengthThreshold); dumpTestLiterals(base + "rose_delay_rebuild_test_literals.txt", mp.lits); - mp = makeMatcherProto(build, final_to_frag_map, ROSE_EOD_ANCHORED, false, - build.ematcher_region_size); + mp = makeMatcherProto(build, build.final_to_frag_map, ROSE_EOD_ANCHORED, + false, build.ematcher_region_size); dumpTestLiterals(base + "rose_eod_test_literals.txt", mp.lits); if (!build.cc.streaming) { - mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, false, - ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); - auto mp2 = makeMatcherProto(build, final_to_frag_map, + mp = + makeMatcherProto(build, build.final_to_frag_map, ROSE_FLOATING, + false, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); + auto mp2 = makeMatcherProto(build, build.final_to_frag_map, ROSE_ANCHORED_SMALL_BLOCK, false, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); mp.lits.insert(end(mp.lits), begin(mp2.lits), end(mp2.lits)); diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index e615d42b..e9ba5f55 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -436,6 +436,13 @@ private: std::set all_reports(const OutfixInfo &outfix); +struct LitFragment { + explicit LitFragment(u32 fragment_id_in) : fragment_id(fragment_id_in) {} + u32 fragment_id; + u32 lit_program_offset = 0; + u32 delay_program_offset = 0; +}; + // Concrete impl class class RoseBuildImpl : public RoseBuild { public: @@ -587,6 +594,8 @@ public: std::map > final_id_to_literal; /* final literal id to * literal id */ + std::map final_to_frag_map; + unordered_set transient; unordered_map rose_squash_masks; @@ -642,13 +651,6 @@ void normaliseLiteralMask(const ue2_literal &s, std::vector &msk, bool canImplementGraphs(const RoseBuildImpl &tbi); #endif -struct LitFragment { - explicit LitFragment(u32 fragment_id_in) : fragment_id(fragment_id_in) {} - u32 fragment_id; - u32 lit_program_offset = 0; - u32 delay_program_offset = 0; -}; - std::map groupByFragment(const RoseBuildImpl &build); } // namespace ue2 From c2cac5009a307043698029a9662dc58ea36f44ff Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 31 Jan 2017 10:28:10 +1100 Subject: [PATCH 058/326] tidy up args to builders --- src/rose/rose_build_anchored.cpp | 3 +- src/rose/rose_build_anchored.h | 1 - src/rose/rose_build_bytecode.cpp | 25 +++++++--------- src/rose/rose_build_dump.cpp | 22 ++++++-------- src/rose/rose_build_matchers.cpp | 49 ++++++++++++++------------------ src/rose/rose_build_matchers.h | 36 ++++++++++------------- 6 files changed, 57 insertions(+), 79 deletions(-) diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index 77eef142..8ba80f8e 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -848,7 +848,6 @@ vector buildAnchoredDfas(RoseBuildImpl &build) { aligned_unique_ptr buildAnchoredMatcher(RoseBuildImpl &build, vector &dfas, - const map &final_to_frag_map, size_t *asize) { const CompileContext &cc = build.cc; @@ -859,7 +858,7 @@ buildAnchoredMatcher(RoseBuildImpl &build, vector &dfas, } for (auto &rdfa : dfas) { - remapIdsToPrograms(rdfa, final_to_frag_map); + remapIdsToPrograms(rdfa, build.final_to_frag_map); } vector> nfas; diff --git a/src/rose/rose_build_anchored.h b/src/rose/rose_build_anchored.h index 3d411bd7..618a46a3 100644 --- a/src/rose/rose_build_anchored.h +++ b/src/rose/rose_build_anchored.h @@ -59,7 +59,6 @@ std::vector buildAnchoredDfas(RoseBuildImpl &build); */ aligned_unique_ptr buildAnchoredMatcher(RoseBuildImpl &build, std::vector &dfas, - const std::map &final_to_frag_map, size_t *asize); u32 anchoredStateSize(const anchored_matcher_info &atable); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 51d8da97..42142936 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4707,12 +4707,11 @@ map groupByFragment(const RoseBuildImpl &build) { * - total number of literal fragments */ static -tuple -buildLiteralPrograms(RoseBuildImpl &build, build_context &bc, - map &final_to_frag_map) { +tuple buildLiteralPrograms(RoseBuildImpl &build, + build_context &bc) { // Build a reverse mapping from fragment -> final_id. map> frag_to_final_map; - for (const auto &m : final_to_frag_map) { + for (const auto &m : build.final_to_frag_map) { frag_to_final_map[m.second.fragment_id].insert(m.first); } @@ -4736,7 +4735,7 @@ buildLiteralPrograms(RoseBuildImpl &build, build_context &bc, } // Update LitFragment entries. - for (auto &frag : final_to_frag_map | map_values) { + for (auto &frag : build.final_to_frag_map | map_values) { frag.lit_program_offset = litPrograms[frag.fragment_id]; frag.delay_program_offset = delayRebuildPrograms[frag.fragment_id]; } @@ -5480,7 +5479,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 litDelayRebuildProgramOffset; u32 litProgramCount; tie(litProgramOffset, litDelayRebuildProgramOffset, litProgramCount) = - buildLiteralPrograms(*this, bc, final_to_frag_map); + buildLiteralPrograms(*this, bc); u32 delayProgramOffset = buildDelayPrograms(*this, bc); u32 anchoredProgramOffset = buildAnchoredPrograms(*this, bc); @@ -5519,8 +5518,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Build anchored matcher. size_t asize = 0; u32 amatcherOffset = 0; - auto atable = - buildAnchoredMatcher(*this, anchored_dfas, final_to_frag_map, &asize); + auto atable = buildAnchoredMatcher(*this, anchored_dfas, &asize); if (atable) { currOffset = ROUNDUP_CL(currOffset); amatcherOffset = currOffset; @@ -5531,8 +5529,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { rose_group fgroups = 0; size_t fsize = 0; auto ftable = buildFloatingMatcher(*this, bc.longLitLengthThreshold, - final_to_frag_map, &fgroups, &fsize, - &historyRequired); + &fgroups, &fsize, &historyRequired); u32 fmatcherOffset = 0; if (ftable) { currOffset = ROUNDUP_CL(currOffset); @@ -5543,8 +5540,8 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Build delay rebuild HWLM matcher. size_t drsize = 0; - auto drtable = buildDelayRebuildMatcher(*this, bc.longLitLengthThreshold, - final_to_frag_map, &drsize); + auto drtable = + buildDelayRebuildMatcher(*this, bc.longLitLengthThreshold, &drsize); u32 drmatcherOffset = 0; if (drtable) { currOffset = ROUNDUP_CL(currOffset); @@ -5554,7 +5551,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Build EOD-anchored HWLM matcher. size_t esize = 0; - auto etable = buildEodAnchoredMatcher(*this, final_to_frag_map, &esize); + auto etable = buildEodAnchoredMatcher(*this, &esize); u32 ematcherOffset = 0; if (etable) { currOffset = ROUNDUP_CL(currOffset); @@ -5564,7 +5561,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Build small-block HWLM matcher. size_t sbsize = 0; - auto sbtable = buildSmallBlockMatcher(*this, final_to_frag_map, &sbsize); + auto sbtable = buildSmallBlockMatcher(*this, &sbsize); u32 sbmatcherOffset = 0; if (sbtable) { currOffset = ROUNDUP_CL(currOffset); diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 2b19e197..3df2d5f5 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -505,28 +505,24 @@ void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) { size_t longLitLengthThreshold = calcLongLitThreshold(build, historyRequired); - auto mp = makeMatcherProto(build, build.final_to_frag_map, ROSE_ANCHORED, - false, longLitLengthThreshold); + auto mp = + makeMatcherProto(build, ROSE_ANCHORED, false, longLitLengthThreshold); dumpTestLiterals(base + "rose_anchored_test_literals.txt", mp.lits); - mp = makeMatcherProto(build, build.final_to_frag_map, ROSE_FLOATING, false, - longLitLengthThreshold); + mp = makeMatcherProto(build, ROSE_FLOATING, false, longLitLengthThreshold); dumpTestLiterals(base + "rose_float_test_literals.txt", mp.lits); - mp = makeMatcherProto(build, build.final_to_frag_map, ROSE_FLOATING, true, - longLitLengthThreshold); + mp = makeMatcherProto(build, ROSE_FLOATING, true, longLitLengthThreshold); dumpTestLiterals(base + "rose_delay_rebuild_test_literals.txt", mp.lits); - mp = makeMatcherProto(build, build.final_to_frag_map, ROSE_EOD_ANCHORED, - false, build.ematcher_region_size); + mp = makeMatcherProto(build, ROSE_EOD_ANCHORED, false, + build.ematcher_region_size); dumpTestLiterals(base + "rose_eod_test_literals.txt", mp.lits); if (!build.cc.streaming) { - mp = - makeMatcherProto(build, build.final_to_frag_map, ROSE_FLOATING, - false, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); - auto mp2 = makeMatcherProto(build, build.final_to_frag_map, - ROSE_ANCHORED_SMALL_BLOCK, false, + mp = makeMatcherProto(build, ROSE_FLOATING, false, ROSE_SMALL_BLOCK_LEN, + ROSE_SMALL_BLOCK_LEN); + auto mp2 = makeMatcherProto(build, ROSE_ANCHORED_SMALL_BLOCK, false, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); mp.lits.insert(end(mp.lits), begin(mp2.lits), end(mp2.lits)); dumpTestLiterals(base + "rose_smallblock_test_literals.txt", mp.lits); diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index c725b39d..998cd545 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -636,11 +636,10 @@ u64a literalMinReportOffset(const RoseBuildImpl &build, } static -map makeFragGroupMap(const RoseBuildImpl &build, - const map &final_to_frag_map) { +map makeFragGroupMap(const RoseBuildImpl &build) { map frag_to_group; - for (const auto &m : final_to_frag_map) { + for (const auto &m : build.final_to_frag_map) { u32 final_id = m.first; u32 frag_id = m.second.fragment_id; hwlm_group_t groups = 0; @@ -665,7 +664,6 @@ void trim_to_suffix(Container &c, size_t len) { } MatcherProto makeMatcherProto(const RoseBuildImpl &build, - const map &final_to_frag_map, rose_literal_table table, bool delay_rebuild, size_t max_len, u32 max_offset) { MatcherProto mp; @@ -753,12 +751,12 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, cmp); } - auto frag_group_map = makeFragGroupMap(build, final_to_frag_map); + auto frag_group_map = makeFragGroupMap(build); for (auto &lit : mp.lits) { u32 final_id = lit.id; - assert(contains(final_to_frag_map, final_id)); - const auto &frag = final_to_frag_map.at(final_id); + assert(contains(build.final_to_frag_map, final_id)); + const auto &frag = build.final_to_frag_map.at(final_id); lit.id = delay_rebuild ? frag.delay_program_offset : frag.lit_program_offset; assert(contains(frag_group_map, frag.fragment_id)); @@ -805,14 +803,13 @@ void buildAccel(const RoseBuildImpl &build, const MatcherProto &mp, aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, - const map &final_to_frag_map, rose_group *fgroups, size_t *fsize, size_t *historyRequired) { *fsize = 0; *fgroups = 0; - auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, false, - longLitLengthThreshold); + auto mp = + makeMatcherProto(build, ROSE_FLOATING, false, longLitLengthThreshold); if (mp.lits.empty()) { DEBUG_PRINTF("empty floating matcher\n"); return nullptr; @@ -841,9 +838,9 @@ buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, return hwlm; } -aligned_unique_ptr buildDelayRebuildMatcher( - const RoseBuildImpl &build, size_t longLitLengthThreshold, - const map &final_to_frag_map, size_t *drsize) { +aligned_unique_ptr buildDelayRebuildMatcher(const RoseBuildImpl &build, + size_t longLitLengthThreshold, + size_t *drsize) { *drsize = 0; if (!build.cc.streaming) { @@ -851,8 +848,8 @@ aligned_unique_ptr buildDelayRebuildMatcher( return nullptr; } - auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, true, - longLitLengthThreshold); + auto mp = + makeMatcherProto(build, ROSE_FLOATING, true, longLitLengthThreshold); if (mp.lits.empty()) { DEBUG_PRINTF("empty delay rebuild matcher\n"); return nullptr; @@ -871,10 +868,8 @@ aligned_unique_ptr buildDelayRebuildMatcher( return hwlm; } -aligned_unique_ptr -buildSmallBlockMatcher(const RoseBuildImpl &build, - const map &final_to_frag_map, - size_t *sbsize) { +aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, + size_t *sbsize) { *sbsize = 0; if (build.cc.streaming) { @@ -889,7 +884,7 @@ buildSmallBlockMatcher(const RoseBuildImpl &build, return nullptr; } - auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, false, + auto mp = makeMatcherProto(build, ROSE_FLOATING, false, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); if (mp.lits.empty()) { DEBUG_PRINTF("no floating table\n"); @@ -900,8 +895,8 @@ buildSmallBlockMatcher(const RoseBuildImpl &build, } auto mp_anchored = - makeMatcherProto(build, final_to_frag_map, ROSE_ANCHORED_SMALL_BLOCK, - false, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); + makeMatcherProto(build, ROSE_ANCHORED_SMALL_BLOCK, false, + ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); if (mp_anchored.lits.empty()) { DEBUG_PRINTF("no small-block anchored literals\n"); return nullptr; @@ -932,14 +927,12 @@ buildSmallBlockMatcher(const RoseBuildImpl &build, return hwlm; } -aligned_unique_ptr -buildEodAnchoredMatcher(const RoseBuildImpl &build, - const map &final_to_frag_map, - size_t *esize) { +aligned_unique_ptr buildEodAnchoredMatcher(const RoseBuildImpl &build, + size_t *esize) { *esize = 0; - auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_EOD_ANCHORED, - false, build.ematcher_region_size); + auto mp = makeMatcherProto(build, ROSE_EOD_ANCHORED, false, + build.ematcher_region_size); if (mp.lits.empty()) { DEBUG_PRINTF("no eod anchored literals\n"); diff --git a/src/rose/rose_build_matchers.h b/src/rose/rose_build_matchers.h index d7da113c..184c2633 100644 --- a/src/rose/rose_build_matchers.h +++ b/src/rose/rose_build_matchers.h @@ -67,31 +67,25 @@ struct MatcherProto { * If max_offset is specified (and not ROSE_BOUND_INF), then literals that can * only lead to a pattern match after max_offset may be excluded. */ -MatcherProto -makeMatcherProto(const RoseBuildImpl &build, - const std::map &final_to_frag_map, - rose_literal_table table, bool delay_rebuild, size_t max_len, - u32 max_offset = ROSE_BOUND_INF); +MatcherProto makeMatcherProto(const RoseBuildImpl &build, + rose_literal_table table, bool delay_rebuild, + size_t max_len, u32 max_offset = ROSE_BOUND_INF); -aligned_unique_ptr -buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, - const std::map &final_to_frag_map, - rose_group *fgroups, size_t *fsize, - size_t *historyRequired); +aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, + size_t longLitLengthThreshold, + rose_group *fgroups, + size_t *fsize, + size_t *historyRequired); -aligned_unique_ptr buildDelayRebuildMatcher( - const RoseBuildImpl &build, size_t longLitLengthThreshold, - const std::map &final_to_frag_map, size_t *drsize); +aligned_unique_ptr buildDelayRebuildMatcher(const RoseBuildImpl &build, + size_t longLitLengthThreshold, + size_t *drsize); -aligned_unique_ptr -buildSmallBlockMatcher(const RoseBuildImpl &build, - const std::map &final_to_frag_map, - size_t *sbsize); +aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, + size_t *sbsize); -aligned_unique_ptr -buildEodAnchoredMatcher(const RoseBuildImpl &build, - const std::map &final_to_frag_map, - size_t *esize); +aligned_unique_ptr buildEodAnchoredMatcher(const RoseBuildImpl &build, + size_t *esize); void findMoreLiteralMasks(RoseBuildImpl &build); From c6bf1919d01ca9094dfdea7bef6c9882a15a48e3 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 31 Jan 2017 10:57:09 +1100 Subject: [PATCH 059/326] rose: merge all dump code into rose_build_dump.cpp --- CMakeLists.txt | 2 - src/rose/rose_build_dump.cpp | 1376 ++++++++++++++++++++++++++++++- src/rose/rose_dump.cpp | 1481 ---------------------------------- src/rose/rose_dump.h | 50 -- 4 files changed, 1375 insertions(+), 1534 deletions(-) delete mode 100644 src/rose/rose_dump.cpp delete mode 100644 src/rose/rose_dump.h diff --git a/CMakeLists.txt b/CMakeLists.txt index f5d29642..27d3e02b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1048,8 +1048,6 @@ set(hs_dump_SRCS src/rose/rose_build_dump.h src/rose/rose_in_dump.cpp src/rose/rose_in_dump.h - src/rose/rose_dump.cpp - src/rose/rose_dump.h src/util/dump_charclass.cpp src/util/dump_charclass.h src/util/dump_util.cpp diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 3df2d5f5..e3497898 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -32,11 +32,16 @@ #include "rose_build_impl.h" #include "rose_build_matchers.h" -#include "rose/rose_dump.h" #include "rose_internal.h" +#include "rose_program.h" #include "ue2common.h" +#include "hs_compile.h" +#include "hwlm/hwlm_build.h" +#include "hwlm/hwlm_dump.h" #include "hwlm/hwlm_literal.h" #include "nfa/castlecompile.h" +#include "nfa/nfa_build_util.h" +#include "nfa/nfa_dump_api.h" #include "nfa/nfa_internal.h" #include "nfagraph/ng_dump.h" #include "som/slot_manager_dump.h" @@ -44,9 +49,12 @@ #include "util/container.h" #include "util/dump_charclass.h" #include "util/graph_range.h" +#include "util/multibit.h" +#include "util/multibit_build.h" #include "util/ue2string.h" #include +#include #include #include #include @@ -81,6 +89,27 @@ string render_kind(const Graph &g) { namespace { +struct rose_off { + explicit rose_off(u32 j) : i(j) {} + string str(void) const; + u32 i; +}; + +ostream &operator<<(ostream &o, const rose_off &to) { + if (to.i == ROSE_BOUND_INF) { + o << "inf"; + } else { + o << to.i; + } + return o; +} + +string rose_off::str(void) const { + ostringstream out; + out << *this; + return out.str(); +} + class RoseGraphWriter { public: RoseGraphWriter(const RoseBuildImpl &b_in, const RoseEngine *t_in) : @@ -529,6 +558,1351 @@ void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) { } } +static +const void *loadFromByteCodeOffset(const RoseEngine *t, u32 offset) { + if (!offset) { + return nullptr; + } + + const char *lt = (const char *)t + offset; + return lt; +} + +static +const void *getAnchoredMatcher(const RoseEngine *t) { + return loadFromByteCodeOffset(t, t->amatcherOffset); +} + +static +const HWLM *getFloatingMatcher(const RoseEngine *t) { + return (const HWLM *)loadFromByteCodeOffset(t, t->fmatcherOffset); +} + +static +const HWLM *getDelayRebuildMatcher(const RoseEngine *t) { + return (const HWLM *)loadFromByteCodeOffset(t, t->drmatcherOffset); +} + +static +const HWLM *getEodMatcher(const RoseEngine *t) { + return (const HWLM *)loadFromByteCodeOffset(t, t->ematcherOffset); +} + +static +const HWLM *getSmallBlockMatcher(const RoseEngine *t) { + return (const HWLM *)loadFromByteCodeOffset(t, t->sbmatcherOffset); +} + +static +CharReach bitvectorToReach(const u8 *reach) { + CharReach cr; + + for (size_t i = 0; i < 256; i++) { + if (reach[i / 8] & (1U << (i % 8))) { + cr.set(i); + + } + } + return cr; +} + +static +void dumpLookaround(ofstream &os, const RoseEngine *t, + const ROSE_STRUCT_CHECK_LOOKAROUND *ri) { + assert(ri); + + const u8 *base = (const u8 *)t; + const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset); + const u8 *reach_base = base + t->lookaroundReachOffset; + + const s8 *look = look_base + ri->index; + const s8 *look_end = look + ri->count; + const u8 *reach = reach_base + ri->index * REACH_BITVECTOR_LEN; + + os << " contents:" << endl; + + for (; look < look_end; look++, reach += REACH_BITVECTOR_LEN) { + os << " " << std::setw(4) << std::setfill(' ') << int{*look} + << ": "; + describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT); + os << endl; + } +} + +static +vector sparseIterValues(const mmbit_sparse_iter *it, u32 num_bits) { + vector keys; + + if (num_bits == 0) { + return keys; + } + + vector bits(mmbit_size(num_bits), u8{0xff}); // All bits on. + vector state(MAX_SPARSE_ITER_STATES); + + const u8 *b = bits.data(); + mmbit_sparse_state *s = state.data(); + + u32 idx = 0; + u32 i = mmbit_sparse_iter_begin(b, num_bits, &idx, it, s); + while (i != MMB_INVALID) { + keys.push_back(i); + i = mmbit_sparse_iter_next(b, num_bits, i, &idx, it, s); + } + + return keys; +} + +static +void dumpJumpTable(ofstream &os, const RoseEngine *t, + const ROSE_STRUCT_SPARSE_ITER_BEGIN *ri) { + auto *it = + (const mmbit_sparse_iter *)loadFromByteCodeOffset(t, ri->iter_offset); + auto *jumps = (const u32 *)loadFromByteCodeOffset(t, ri->jump_table); + + for (const auto &key : sparseIterValues(it, t->rolesWithStateCount)) { + os << " " << std::setw(4) << std::setfill(' ') << key << " : +" + << *jumps << endl; + ++jumps; + } +} + +static +void dumpSomOperation(ofstream &os, const som_operation &op) { + os << " som (type=" << u32{op.type} << ", onmatch=" << op.onmatch; + switch (op.type) { + case SOM_EXTERNAL_CALLBACK_REV_NFA: + case SOM_INTERNAL_LOC_SET_REV_NFA: + case SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET: + case SOM_INTERNAL_LOC_SET_REV_NFA_IF_WRITABLE: + os << ", revNfaIndex=" << op.aux.revNfaIndex; + break; + default: + os << ", somDistance=" << op.aux.somDistance; + break; + } + os << ")" << endl; +} + +static +string dumpStrMask(const u8 *mask, size_t len) { + ostringstream oss; + for (size_t i = 0; i < len; i++) { + oss << std::hex << std::setw(2) << std::setfill('0') << u32{mask[i]} + << " "; + } + return oss.str(); +} + +#define PROGRAM_CASE(name) \ + case ROSE_INSTR_##name: { \ + os << " " << std::setw(4) << std::setfill('0') << (pc - pc_base) \ + << ": " #name " (" << (int)ROSE_INSTR_##name << ")" << endl; \ + const auto *ri = (const struct ROSE_STRUCT_##name *)pc; + +#define PROGRAM_NEXT_INSTRUCTION \ + pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ + break; \ + } + + +static +void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { + const char *pc_base = pc; + for (;;) { + u8 code = *(const u8 *)pc; + assert(code <= LAST_ROSE_INSTRUCTION); + const size_t offset = pc - pc_base; + switch (code) { + PROGRAM_CASE(END) { return; } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(ANCHORED_DELAY) { + os << " groups 0x" << std::hex << ri->groups << std::dec + << endl; + os << " done_jump " << offset + ri->done_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LIT_EARLY) { + os << " min_offset " << ri->min_offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_GROUPS) { + os << " groups 0x" << std::hex << ri->groups << std::dec + << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_ONLY_EOD) { + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_BOUNDS) { + os << " min_bound " << ri->min_bound << endl; + os << " max_bound " << ri->max_bound << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_NOT_HANDLED) { + os << " key " << ri->key << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SINGLE_LOOKAROUND) { + os << " offset " << int{ri->offset} << endl; + os << " reach_index " << ri->reach_index << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + const u8 *base = (const u8 *)t; + const u8 *reach_base = base + t->lookaroundReachOffset; + const u8 *reach = reach_base + + ri->reach_index * REACH_BITVECTOR_LEN; + os << " contents "; + describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT); + os << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LOOKAROUND) { + os << " index " << ri->index << endl; + os << " count " << ri->count << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + dumpLookaround(os, t, ri); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MASK) { + os << " and_mask 0x" << std::hex << std::setw(16) + << std::setfill('0') << ri->and_mask << std::dec << endl; + os << " cmp_mask 0x" << std::hex << std::setw(16) + << std::setfill('0') << ri->cmp_mask << std::dec << endl; + os << " neg_mask 0x" << std::hex << std::setw(16) + << std::setfill('0') << ri->neg_mask << std::dec << endl; + os << " offset " << ri->offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MASK_32) { + os << " and_mask " + << dumpStrMask(ri->and_mask, sizeof(ri->and_mask)) + << endl; + os << " cmp_mask " + << dumpStrMask(ri->cmp_mask, sizeof(ri->cmp_mask)) + << endl; + os << " neg_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->neg_mask << std::dec << endl; + os << " offset " << ri->offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_BYTE) { + os << " and_mask 0x" << std::hex << std::setw(2) + << std::setfill('0') << u32{ri->and_mask} << std::dec + << endl; + os << " cmp_mask 0x" << std::hex << std::setw(2) + << std::setfill('0') << u32{ri->cmp_mask} << std::dec + << endl; + os << " negation " << u32{ri->negation} << endl; + os << " offset " << ri->offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_16x8) { + os << " nib_mask " + << dumpStrMask(ri->nib_mask, sizeof(ri->nib_mask)) + << endl; + os << " bucket_select_mask " + << dumpStrMask(ri->bucket_select_mask, + sizeof(ri->bucket_select_mask)) + << endl; + os << " offset " << ri->offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_32x8) { + os << " hi_mask " + << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) + << endl; + os << " lo_mask " + << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) + << endl; + os << " bucket_select_mask " + << dumpStrMask(ri->bucket_select_mask, + sizeof(ri->bucket_select_mask)) + << endl; + os << " offset " << ri->offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_16x16) { + os << " hi_mask " + << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) + << endl; + os << " lo_mask " + << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) + << endl; + os << " bucket_select_mask " + << dumpStrMask(ri->bucket_select_mask, + sizeof(ri->bucket_select_mask)) + << endl; + os << " offset " << ri->offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_SHUFTI_32x16) { + os << " hi_mask " + << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) + << endl; + os << " lo_mask " + << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) + << endl; + os << " bucket_select_mask_hi " + << dumpStrMask(ri->bucket_select_mask_hi, + sizeof(ri->bucket_select_mask_hi)) + << endl; + os << " bucket_select_mask_lo " + << dumpStrMask(ri->bucket_select_mask_lo, + sizeof(ri->bucket_select_mask_lo)) + << endl; + os << " offset " << ri->offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_INFIX) { + os << " queue " << ri->queue << endl; + os << " lag " << ri->lag << endl; + os << " report " << ri->report << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_PREFIX) { + os << " queue " << ri->queue << endl; + os << " lag " << ri->lag << endl; + os << " report " << ri->report << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(PUSH_DELAYED) { + os << " delay " << u32{ri->delay} << endl; + os << " index " << ri->index << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(RECORD_ANCHORED) { + os << " id " << ri->id << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CATCH_UP) {} + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CATCH_UP_MPV) {} + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_ADJUST) { + os << " distance " << ri->distance << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_LEFTFIX) { + os << " queue " << ri->queue << endl; + os << " lag " << ri->lag << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_FROM_REPORT) { + dumpSomOperation(os, ri->som); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_ZERO) {} + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(TRIGGER_INFIX) { + os << " queue " << ri->queue << endl; + os << " event " << ri->event << endl; + os << " cancel " << u32{ri->cancel} << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(TRIGGER_SUFFIX) { + os << " queue " << ri->queue << endl; + os << " event " << ri->event << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(DEDUPE) { + os << " quash_som " << u32{ri->quash_som} << endl; + os << " dkey " << ri->dkey << endl; + os << " offset_adjust " << ri->offset_adjust << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(DEDUPE_SOM) { + os << " quash_som " << u32{ri->quash_som} << endl; + os << " dkey " << ri->dkey << endl; + os << " offset_adjust " << ri->offset_adjust << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_CHAIN) { + os << " event " << ri->event << endl; + os << " top_squash_distance " << ri->top_squash_distance + << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_INT) { + dumpSomOperation(os, ri->som); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_AWARE) { + dumpSomOperation(os, ri->som); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT) { + os << " onmatch " << ri->onmatch << endl; + os << " offset_adjust " << ri->offset_adjust << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_EXHAUST) { + os << " onmatch " << ri->onmatch << endl; + os << " offset_adjust " << ri->offset_adjust << endl; + os << " ekey " << ri->ekey << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM) { + os << " onmatch " << ri->onmatch << endl; + os << " offset_adjust " << ri->offset_adjust << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_EXHAUST) { + os << " onmatch " << ri->onmatch << endl; + os << " offset_adjust " << ri->offset_adjust << endl; + os << " ekey " << ri->ekey << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(DEDUPE_AND_REPORT) { + os << " quash_som " << u32{ri->quash_som} << endl; + os << " dkey " << ri->dkey << endl; + os << " onmatch " << ri->onmatch << endl; + os << " offset_adjust " << ri->offset_adjust << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(FINAL_REPORT) { + os << " onmatch " << ri->onmatch << endl; + os << " offset_adjust " << ri->offset_adjust << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_EXHAUSTED) { + os << " ekey " << ri->ekey << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MIN_LENGTH) { + os << " end_adj " << ri->end_adj << endl; + os << " min_length " << ri->min_length << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_STATE) { + os << " index " << ri->index << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_GROUPS) { + os << " groups 0x" << std::hex << ri->groups << std::dec + << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SQUASH_GROUPS) { + os << " groups 0x" << std::hex << ri->groups << std::dec + << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_STATE) { + os << " index " << ri->index << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SPARSE_ITER_BEGIN) { + os << " iter_offset " << ri->iter_offset << endl; + os << " jump_table " << ri->jump_table << endl; + dumpJumpTable(os, t, ri); + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SPARSE_ITER_NEXT) { + os << " iter_offset " << ri->iter_offset << endl; + os << " jump_table " << ri->jump_table << endl; + os << " state " << ri->state << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SPARSE_ITER_ANY) { + os << " iter_offset " << ri->iter_offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(ENGINES_EOD) { + os << " iter_offset " << ri->iter_offset << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SUFFIXES_EOD) {} + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(MATCHER_EOD) {} + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LONG_LIT) { + os << " lit_offset " << ri->lit_offset << endl; + os << " lit_length " << ri->lit_length << endl; + const char *lit = (const char *)t + ri->lit_offset; + os << " literal: \"" + << escapeString(string(lit, ri->lit_length)) << "\"" << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) { + os << " lit_offset " << ri->lit_offset << endl; + os << " lit_length " << ri->lit_length << endl; + const char *lit = (const char *)t + ri->lit_offset; + os << " literal: \"" + << escapeString(string(lit, ri->lit_length)) << "\"" << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MED_LIT) { + os << " lit_offset " << ri->lit_offset << endl; + os << " lit_length " << ri->lit_length << endl; + const char *lit = (const char *)t + ri->lit_offset; + os << " literal: \"" + << escapeString(string(lit, ri->lit_length)) << "\"" << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MED_LIT_NOCASE) { + os << " lit_offset " << ri->lit_offset << endl; + os << " lit_length " << ri->lit_length << endl; + const char *lit = (const char *)t + ri->lit_offset; + os << " literal: \"" + << escapeString(string(lit, ri->lit_length)) << "\"" << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + default: + os << " UNKNOWN (code " << int{code} << ")" << endl; + os << " " << endl; + return; + } + } +} + +#undef PROGRAM_CASE +#undef PROGRAM_NEXT_INSTRUCTION + +static +void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) { + ofstream os(filename); + + const u32 *litPrograms = + (const u32 *)loadFromByteCodeOffset(t, t->litProgramOffset); + const u32 *delayRebuildPrograms = + (const u32 *)loadFromByteCodeOffset(t, t->litDelayRebuildProgramOffset); + + for (u32 i = 0; i < t->literalCount; i++) { + os << "Literal " << i << endl; + os << "---------------" << endl; + + if (litPrograms[i]) { + os << "Program @ " << litPrograms[i] << ":" << endl; + const char *prog = + (const char *)loadFromByteCodeOffset(t, litPrograms[i]); + dumpProgram(os, t, prog); + } else { + os << "" << endl; + } + + if (delayRebuildPrograms[i]) { + os << "Delay Rebuild Program @ " << delayRebuildPrograms[i] << ":" + << endl; + const char *prog = (const char *)loadFromByteCodeOffset( + t, delayRebuildPrograms[i]); + dumpProgram(os, t, prog); + } + + os << endl; + } + + os.close(); +} + +static +void dumpRoseEodPrograms(const RoseEngine *t, const string &filename) { + ofstream os(filename); + const char *base = (const char *)t; + + if (t->eodProgramOffset) { + os << "EOD Program @ " << t->eodProgramOffset << ":" << endl; + dumpProgram(os, t, base + t->eodProgramOffset); + os << endl; + } else { + os << "" << endl; + } + + os.close(); +} + +static +void dumpRoseReportPrograms(const RoseEngine *t, const string &filename) { + ofstream os(filename); + + const u32 *programs = + (const u32 *)loadFromByteCodeOffset(t, t->reportProgramOffset); + + for (u32 i = 0; i < t->reportProgramCount; i++) { + os << "Report " << i << endl; + os << "---------------" << endl; + + if (programs[i]) { + os << "Program @ " << programs[i] << ":" << endl; + const char *prog = + (const char *)loadFromByteCodeOffset(t, programs[i]); + dumpProgram(os, t, prog); + } else { + os << "" << endl; + } + } + + os.close(); +} + +static +void dumpRoseDelayPrograms(const RoseEngine *t, const string &filename) { + ofstream os(filename); + + const u32 *programs = + (const u32 *)loadFromByteCodeOffset(t, t->delayProgramOffset); + + for (u32 i = 0; i < t->delay_count; i++) { + os << "Delay entry " << i << endl; + os << "---------------" << endl; + + if (programs[i]) { + os << "Program @ " << programs[i] << ":" << endl; + const char *prog = + (const char *)loadFromByteCodeOffset(t, programs[i]); + dumpProgram(os, t, prog); + } else { + os << "" << endl; + } + } + + os.close(); +} + +static +void dumpRoseAnchoredPrograms(const RoseEngine *t, const string &filename) { + ofstream os(filename); + + const u32 *programs = + (const u32 *)loadFromByteCodeOffset(t, t->anchoredProgramOffset); + + for (u32 i = 0; i < t->anchored_count; i++) { + os << "Anchored entry " << i << endl; + os << "---------------" << endl; + + if (programs[i]) { + os << "Program @ " << programs[i] << ":" << endl; + const char *prog = + (const char *)loadFromByteCodeOffset(t, programs[i]); + dumpProgram(os, t, prog); + } else { + os << "" << endl; + } + } + + os.close(); +} + +static +void dumpNfaNotes(ofstream &fout, const RoseEngine *t, const NFA *n) { + const u32 qindex = n->queueIndex; + + if (qindex < t->outfixBeginQueue) { + fout << "chained"; + return; + } + + if (qindex < t->outfixEndQueue) { + fout << "outfix"; + return; + } + + const NfaInfo *nfa_info = getNfaInfoByQueue(t, qindex); + const NFA *nfa = getNfaByInfo(t, nfa_info); + + if (nfa_info->eod) { + fout << "eod "; + } + + if (qindex < t->leftfixBeginQueue) { + fout << "suffix"; + return; + } + + const LeftNfaInfo *left = getLeftInfoByQueue(t, qindex); + if (left->eager) { + fout << "eager "; + } + if (left->transient) { + fout << "transient " << (u32)left->transient << " "; + } + if (left->infix) { + fout << "infix"; + u32 maxQueueLen = left->maxQueueLen; + if (maxQueueLen != (u32)(-1)) { + fout << " maxqlen=" << maxQueueLen; + } + } else { + fout << "prefix"; + } + fout << " maxlag=" << left->maxLag; + if (left->stopTable) { + fout << " miracles"; + } + if (left->countingMiracleOffset) { + const RoseCountingMiracle *cm + = (const RoseCountingMiracle *)((const char *)t + + left->countingMiracleOffset); + fout << " counting_miracle:" << (int)cm->count + << (cm->shufti ? "s" : "v"); + } + if (nfaSupportsZombie(nfa)) { + fout << " zombie"; + } + if (left->eod_check) { + fout << " eod"; + } +} + +static +void dumpComponentInfo(const RoseEngine *t, const string &base) { + stringstream ss; + ss << base << "rose_components.txt"; + ofstream fout(ss.str().c_str()); + + fout << "Index Offset\tEngine \tStates S.State Bytes Notes\n"; + + for (u32 i = 0; i < t->queueCount; i++) { + const NfaInfo *nfa_info = getNfaInfoByQueue(t, i); + const NFA *n = getNfaByInfo(t, nfa_info); + + fout << left << setw(6) << i << " "; + + fout << left << ((const char *)n - (const char *)t) << "\t"; /* offset */ + + fout << left << setw(16) << describe(*n) << "\t"; + + fout << left << setw(6) << n->nPositions << " "; + fout << left << setw(7) << n->streamStateSize << " "; + fout << left << setw(7) << n->length << " "; + + dumpNfaNotes(fout, t, n); + + fout << endl; + } +} + + +static +void dumpComponentInfoCsv(const RoseEngine *t, const string &base) { + FILE *f = fopen((base +"rose_components.csv").c_str(), "w"); + + fprintf(f, "Index, Offset,Engine Type,States,Stream State,Bytecode Size," + "Kind,Notes\n"); + + for (u32 i = 0; i < t->queueCount; i++) { + const NfaInfo *nfa_info = getNfaInfoByQueue(t, i); + const NFA *n = getNfaByInfo(t, nfa_info); + nfa_kind kind; + stringstream notes; + + if (i < t->outfixBeginQueue) { + notes << "chained;"; + } + + if (nfa_info->eod) { + notes << "eod;"; + } + + if (i < t->outfixEndQueue) { + kind = NFA_OUTFIX; + } else if (i < t->leftfixBeginQueue) { + kind = NFA_SUFFIX; + } else { + const LeftNfaInfo *left = getLeftInfoByQueue(t, i); + if (left->eager) { + notes << "eager;"; + } + if (left->transient) { + notes << "transient " << (u32)left->transient << ";"; + } + if (left->infix) { + kind = NFA_INFIX; + u32 maxQueueLen = left->maxQueueLen; + if (maxQueueLen != (u32)(-1)) { + notes << "maxqlen=" << maxQueueLen << ";"; + } + } else { + kind = NFA_PREFIX; + } + notes << "maxlag=" << left->maxLag << ";"; + if (left->stopTable) { + notes << "miracles;"; + } + if (left->countingMiracleOffset) { + auto cm = (const RoseCountingMiracle *) + ((const char *)t + left->countingMiracleOffset); + notes << "counting_miracle:" << (int)cm->count + << (cm->shufti ? "s" : "v") << ";"; + } + if (nfaSupportsZombie(n)) { + notes << " zombie;"; + } + if (left->eod_check) { + notes << "left_eod;"; + } + } + + fprintf(f, "%u,%zd,\"%s\",%u,%u,%u,%s,%s\n", i, + (const char *)n - (const char *)t, describe(*n).c_str(), + n->nPositions, n->streamStateSize, n->length, + to_string(kind).c_str(), notes.str().c_str()); + } + fclose(f); +} + +static +void dumpExhaust(const RoseEngine *t, const string &base) { + stringstream sstxt; + sstxt << base << "rose_exhaust.txt"; + FILE *f = fopen(sstxt.str().c_str(), "w"); + + const NfaInfo *infos + = (const NfaInfo *)((const char *)t + t->nfaInfoOffset); + + u32 queue_count = t->activeArrayCount; + + for (u32 i = 0; i < queue_count; ++i) { + u32 ekey_offset = infos[i].ekeyListOffset; + + fprintf(f, "%u (%u):", i, ekey_offset); + + if (ekey_offset) { + const u32 *ekeys = (const u32 *)((const char *)t + ekey_offset); + while (1) { + u32 e = *ekeys; + ++ekeys; + if (e == ~0U) { + break; + } + fprintf(f, " %u", e); + } + } + + fprintf(f, "\n"); + } + + fclose(f); +} + +static +void dumpNfas(const RoseEngine *t, bool dump_raw, const string &base) { + dumpExhaust(t, base); + + for (u32 i = 0; i < t->queueCount; i++) { + const NfaInfo *nfa_info = getNfaInfoByQueue(t, i); + const NFA *n = getNfaByInfo(t, nfa_info); + + stringstream ssbase; + ssbase << base << "rose_nfa_" << i; + nfaGenerateDumpFiles(n, ssbase.str()); + + if (dump_raw) { + stringstream ssraw; + ssraw << base << "rose_nfa_" << i << ".raw"; + FILE *f = fopen(ssraw.str().c_str(), "w"); + fwrite(n, 1, n->length, f); + fclose(f); + } + } +} + +static +void dumpRevComponentInfo(const RoseEngine *t, const string &base) { + stringstream ss; + ss << base << "som_rev_components.txt"; + ofstream fout(ss.str().c_str()); + + fout << "Index Offset\tEngine \tStates S.State Bytes\n"; + + const char *tp = (const char *)t; + const u32 *rev_offsets = (const u32 *)(tp + t->somRevOffsetOffset); + + for (u32 i = 0; i < t->somRevCount; i++) { + u32 offset = rev_offsets[i]; + const NFA *n = (const NFA *)(tp + offset); + + fout << left << setw(6) << i << " "; + + fout << left << offset << "\t"; /* offset */ + + fout << left << setw(16) << describe(*n) << "\t"; + + fout << left << setw(6) << n->nPositions << " "; + fout << left << setw(7) << n->streamStateSize << " "; + fout << left << setw(7) << n->length; + fout << endl; + } +} + +static +void dumpRevNfas(const RoseEngine *t, bool dump_raw, const string &base) { + const char *tp = (const char *)t; + const u32 *rev_offsets = (const u32 *)(tp + t->somRevOffsetOffset); + + for (u32 i = 0; i < t->somRevCount; i++) { + const NFA *n = (const NFA *)(tp + rev_offsets[i]); + + stringstream ssbase; + ssbase << base << "som_rev_nfa_" << i; + nfaGenerateDumpFiles(n, ssbase.str()); + + if (dump_raw) { + stringstream ssraw; + ssraw << base << "som_rev_nfa_" << i << ".raw"; + FILE *f = fopen(ssraw.str().c_str(), "w"); + fwrite(n, 1, n->length, f); + fclose(f); + } + } +} + +static +void dumpAnchored(const RoseEngine *t, const string &base) { + u32 i = 0; + const anchored_matcher_info *curr + = (const anchored_matcher_info *)getALiteralMatcher(t); + + while (curr) { + const NFA *n = (const NFA *)((const char *)curr + sizeof(*curr)); + + stringstream ssbase; + ssbase << base << "anchored_" << i; + nfaGenerateDumpFiles(n, ssbase.str()); + + curr = curr->next_offset ? (const anchored_matcher_info *) + ((const char *)curr + curr->next_offset) : nullptr; + i++; + }; +} + +static +void dumpAnchoredStats(const void *atable, FILE *f) { + assert(atable); + + u32 i = 0; + const anchored_matcher_info *curr = (const anchored_matcher_info *)atable; + + while (curr) { + const NFA *n = (const NFA *)((const char *)curr + sizeof(*curr)); + + fprintf(f, " NFA %u: %s, %u states (%u bytes)\n", i, + describe(*n).c_str(), n->nPositions, n->length); + + curr = curr->next_offset ? (const anchored_matcher_info *) + ((const char *)curr + curr->next_offset) : nullptr; + i++; + }; + +} + +static +void dumpLongLiteralSubtable(const RoseLongLitTable *ll_table, + const RoseLongLitSubtable *ll_sub, FILE *f) { + if (!ll_sub->hashBits) { + fprintf(f, " \n"); + return; + } + + const char *base = (const char *)ll_table; + + u32 nbits = ll_sub->hashBits; + u32 num_entries = 1U << nbits; + const auto *tab = (const RoseLongLitHashEntry *)(base + ll_sub->hashOffset); + u32 hash_occ = + count_if(tab, tab + num_entries, [](const RoseLongLitHashEntry &ent) { + return ent.str_offset != 0; + }); + float hash_occ_percent = ((float)hash_occ / (float)num_entries) * 100; + + fprintf(f, " hash table : %u bits, occupancy %u/%u (%0.1f%%)\n", + nbits, hash_occ, num_entries, hash_occ_percent); + + u32 bloom_bits = ll_sub->bloomBits; + u32 bloom_size = 1U << bloom_bits; + const u8 *bloom = (const u8 *)base + ll_sub->bloomOffset; + u32 bloom_occ = accumulate(bloom, bloom + bloom_size / 8, 0, + [](const u32 &sum, const u8 &elem) { return sum + popcount32(elem); }); + float bloom_occ_percent = ((float)bloom_occ / (float)(bloom_size)) * 100; + + fprintf(f, " bloom filter : %u bits, occupancy %u/%u (%0.1f%%)\n", + bloom_bits, bloom_occ, bloom_size, bloom_occ_percent); +} + +static +void dumpLongLiteralTable(const RoseEngine *t, FILE *f) { + if (!t->longLitTableOffset) { + return; + } + + fprintf(f, "\n"); + fprintf(f, "Long literal table (streaming):\n"); + + const auto *ll_table = + (const struct RoseLongLitTable *)loadFromByteCodeOffset( + t, t->longLitTableOffset); + + fprintf(f, " total size : %u bytes\n", ll_table->size); + fprintf(f, " longest len : %u\n", ll_table->maxLen); + fprintf(f, " stream state : %u bytes\n", ll_table->streamStateBytes); + + fprintf(f, " caseful:\n"); + dumpLongLiteralSubtable(ll_table, &ll_table->caseful, f); + + fprintf(f, " nocase:\n"); + dumpLongLiteralSubtable(ll_table, &ll_table->nocase, f); +} + +static +void roseDumpText(const RoseEngine *t, FILE *f) { + if (!t) { + fprintf(f, "<< no rose >>\n"); + return; + } + + const void *atable = getAnchoredMatcher(t); + const HWLM *ftable = getFloatingMatcher(t); + const HWLM *drtable = getDelayRebuildMatcher(t); + const HWLM *etable = getEodMatcher(t); + const HWLM *sbtable = getSmallBlockMatcher(t); + + fprintf(f, "Rose:\n\n"); + + fprintf(f, "mode: : "); + switch(t->mode) { + case HS_MODE_BLOCK: + fprintf(f, "block"); + break; + case HS_MODE_STREAM: + fprintf(f, "streaming"); + break; + case HS_MODE_VECTORED: + fprintf(f, "vectored"); + break; + } + fprintf(f, "\n"); + + fprintf(f, "properties :"); + if (t->canExhaust) { + fprintf(f, " canExhaust"); + } + if (t->hasSom) { + fprintf(f, " hasSom"); + } + if (t->runtimeImpl == ROSE_RUNTIME_PURE_LITERAL) { + fprintf(f, " pureLiteral"); + } + if (t->runtimeImpl == ROSE_RUNTIME_SINGLE_OUTFIX) { + fprintf(f, " soleOutfix"); + } + fprintf(f, "\n"); + + fprintf(f, "dkey count : %u\n", t->dkeyCount); + fprintf(f, "som slot count : %u\n", t->somLocationCount); + fprintf(f, "som width : %u bytes\n", t->somHorizon); + fprintf(f, "rose count : %u\n", t->roseCount); + fprintf(f, "\n"); + + fprintf(f, "total engine size : %u bytes\n", t->size); + fprintf(f, " - anchored matcher : %u bytes over %u bytes\n", t->asize, + t->anchoredDistance); + fprintf(f, " - floating matcher : %zu bytes%s", + ftable ? hwlmSize(ftable) : 0, t->noFloatingRoots ? " (cond)":""); + if (t->floatingMinDistance) { + fprintf(f, " from %s bytes\n", + rose_off(t->floatingMinDistance).str().c_str()); + } + if (t->floatingDistance != ROSE_BOUND_INF && ftable) { + fprintf(f, " over %u bytes\n", t->floatingDistance); + } else { + fprintf(f, "\n"); + } + fprintf(f, " - delay-rb matcher : %zu bytes\n", + drtable ? hwlmSize(drtable) : 0); + fprintf(f, " - eod-anch matcher : %zu bytes over last %u bytes\n", + etable ? hwlmSize(etable) : 0, t->ematcherRegionSize); + fprintf(f, " - small-blk matcher : %zu bytes over %u bytes\n", + sbtable ? hwlmSize(sbtable) : 0, t->smallBlockDistance); + fprintf(f, " - role state table : %zu bytes\n", + t->rolesWithStateCount * sizeof(u32)); + fprintf(f, " - nfa info table : %zu bytes\n", + t->queueCount * sizeof(NfaInfo)); + fprintf(f, " - lookaround table : %u bytes\n", + t->nfaInfoOffset - t->lookaroundTableOffset); + fprintf(f, " - lookaround reach : %u bytes\n", + t->lookaroundTableOffset - t->lookaroundReachOffset); + + fprintf(f, "state space required : %u bytes\n", t->stateOffsets.end); + fprintf(f, " - history buffer : %u bytes\n", t->historyRequired); + fprintf(f, " - exhaustion vector : %u bytes\n", (t->ekeyCount + 7) / 8); + fprintf(f, " - role state mmbit : %u bytes\n", t->stateSize); + fprintf(f, " - long lit matcher : %u bytes\n", t->longLitStreamState); + fprintf(f, " - active array : %u bytes\n", + mmbit_size(t->activeArrayCount)); + fprintf(f, " - active rose : %u bytes\n", + mmbit_size(t->activeLeftCount)); + fprintf(f, " - anchored state : %u bytes\n", t->anchorStateSize); + fprintf(f, " - nfa state : %u bytes\n", t->nfaStateSize); + fprintf(f, " - (trans. nfa state): %u bytes\n", t->tStateSize); + fprintf(f, " - one whole bytes : %u bytes\n", + t->stateOffsets.anchorState - t->stateOffsets.leftfixLagTable); + fprintf(f, " - groups : %u bytes\n", + t->stateOffsets.groups_size); + fprintf(f, "\n"); + + fprintf(f, "initial groups : 0x%016llx\n", t->initialGroups); + fprintf(f, "floating groups : 0x%016llx\n", t->floating_group_mask); + fprintf(f, "handled key count : %u\n", t->handledKeyCount); + fprintf(f, "\n"); + + fprintf(f, "total literal count : %u\n", t->totalNumLiterals); + fprintf(f, " prog table size : %u\n", t->literalCount); + fprintf(f, " delayed literals : %u\n", t->delay_count); + + fprintf(f, "\n"); + fprintf(f, " minWidth : %u\n", t->minWidth); + fprintf(f, " minWidthExcludingBoundaries : %u\n", + t->minWidthExcludingBoundaries); + fprintf(f, " maxBiAnchoredWidth : %s\n", + rose_off(t->maxBiAnchoredWidth).str().c_str()); + fprintf(f, " minFloatLitMatchOffset : %s\n", + rose_off(t->floatingMinLiteralMatchOffset).str().c_str()); + fprintf(f, " delay_base_id : %u\n", t->delay_base_id); + fprintf(f, " maxFloatingDelayedMatch : %s\n", + rose_off(t->maxFloatingDelayedMatch).str().c_str()); + + if (atable) { + fprintf(f, "\nAnchored literal matcher stats:\n\n"); + dumpAnchoredStats(atable, f); + } + + if (ftable) { + fprintf(f, "\nFloating literal matcher stats:\n\n"); + hwlmPrintStats(ftable, f); + } + + if (drtable) { + fprintf(f, "\nDelay Rebuild literal matcher stats:\n\n"); + hwlmPrintStats(drtable, f); + } + + if (etable) { + fprintf(f, "\nEOD-anchored literal matcher stats:\n\n"); + hwlmPrintStats(etable, f); + } + + if (sbtable) { + fprintf(f, "\nSmall-block literal matcher stats:\n\n"); + hwlmPrintStats(sbtable, f); + } + + dumpLongLiteralTable(t, f); +} + +#define DUMP_U8(o, member) \ + fprintf(f, " %-32s: %hhu/%hhx\n", #member, o->member, o->member) +#define DUMP_U32(o, member) \ + fprintf(f, " %-32s: %u/%08x\n", #member, o->member, o->member) +#define DUMP_U64(o, member) \ + fprintf(f, " %-32s: %llu/%016llx\n", #member, o->member, o->member) + +static +void roseDumpStructRaw(const RoseEngine *t, FILE *f) { + fprintf(f, "struct RoseEngine {\n"); + DUMP_U8(t, noFloatingRoots); + DUMP_U8(t, requiresEodCheck); + DUMP_U8(t, hasOutfixesInSmallBlock); + DUMP_U8(t, runtimeImpl); + DUMP_U8(t, mpvTriggeredByLeaf); + DUMP_U8(t, canExhaust); + DUMP_U8(t, hasSom); + DUMP_U8(t, somHorizon); + DUMP_U8(t, needsCatchup); + DUMP_U32(t, mode); + DUMP_U32(t, historyRequired); + DUMP_U32(t, ekeyCount); + DUMP_U32(t, dkeyCount); + DUMP_U32(t, dkeyLogSize); + DUMP_U32(t, invDkeyOffset); + DUMP_U32(t, somLocationCount); + DUMP_U32(t, somLocationFatbitSize); + DUMP_U32(t, rolesWithStateCount); + DUMP_U32(t, stateSize); + DUMP_U32(t, anchorStateSize); + DUMP_U32(t, nfaStateSize); + DUMP_U32(t, tStateSize); + DUMP_U32(t, smallWriteOffset); + DUMP_U32(t, amatcherOffset); + DUMP_U32(t, ematcherOffset); + DUMP_U32(t, fmatcherOffset); + DUMP_U32(t, drmatcherOffset); + DUMP_U32(t, sbmatcherOffset); + DUMP_U32(t, longLitTableOffset); + DUMP_U32(t, amatcherMinWidth); + DUMP_U32(t, fmatcherMinWidth); + DUMP_U32(t, eodmatcherMinWidth); + DUMP_U32(t, amatcherMaxBiAnchoredWidth); + DUMP_U32(t, fmatcherMaxBiAnchoredWidth); + DUMP_U32(t, litProgramOffset); + DUMP_U32(t, litDelayRebuildProgramOffset); + DUMP_U32(t, reportProgramOffset); + DUMP_U32(t, reportProgramCount); + DUMP_U32(t, delayProgramOffset); + DUMP_U32(t, anchoredProgramOffset); + DUMP_U32(t, literalCount); + DUMP_U32(t, activeArrayCount); + DUMP_U32(t, activeLeftCount); + DUMP_U32(t, queueCount); + DUMP_U32(t, activeQueueArraySize); + DUMP_U32(t, eagerIterOffset); + DUMP_U32(t, handledKeyCount); + DUMP_U32(t, handledKeyFatbitSize); + DUMP_U32(t, leftOffset); + DUMP_U32(t, roseCount); + DUMP_U32(t, lookaroundTableOffset); + DUMP_U32(t, lookaroundReachOffset); + DUMP_U32(t, eodProgramOffset); + DUMP_U32(t, lastByteHistoryIterOffset); + DUMP_U32(t, minWidth); + DUMP_U32(t, minWidthExcludingBoundaries); + DUMP_U32(t, maxBiAnchoredWidth); + DUMP_U32(t, anchoredDistance); + DUMP_U32(t, anchoredMinDistance); + DUMP_U32(t, floatingDistance); + DUMP_U32(t, floatingMinDistance); + DUMP_U32(t, smallBlockDistance); + DUMP_U32(t, floatingMinLiteralMatchOffset); + DUMP_U32(t, nfaInfoOffset); + DUMP_U64(t, initialGroups); + DUMP_U64(t, floating_group_mask); + DUMP_U32(t, size); + DUMP_U32(t, delay_count); + DUMP_U32(t, delay_fatbit_size); + DUMP_U32(t, delay_base_id); + DUMP_U32(t, anchored_count); + DUMP_U32(t, anchored_fatbit_size); + DUMP_U32(t, anchored_base_id); + DUMP_U32(t, maxFloatingDelayedMatch); + DUMP_U32(t, delayRebuildLength); + DUMP_U32(t, stateOffsets.history); + DUMP_U32(t, stateOffsets.exhausted); + DUMP_U32(t, stateOffsets.activeLeafArray); + DUMP_U32(t, stateOffsets.activeLeftArray); + DUMP_U32(t, stateOffsets.activeLeftArray_size); + DUMP_U32(t, stateOffsets.leftfixLagTable); + DUMP_U32(t, stateOffsets.anchorState); + DUMP_U32(t, stateOffsets.groups); + DUMP_U32(t, stateOffsets.groups_size); + DUMP_U32(t, stateOffsets.longLitState); + DUMP_U32(t, stateOffsets.somLocation); + DUMP_U32(t, stateOffsets.somValid); + DUMP_U32(t, stateOffsets.somWritable); + DUMP_U32(t, stateOffsets.end); + DUMP_U32(t, boundary.reportEodOffset); + DUMP_U32(t, boundary.reportZeroOffset); + DUMP_U32(t, boundary.reportZeroEodOffset); + DUMP_U32(t, totalNumLiterals); + DUMP_U32(t, asize); + DUMP_U32(t, outfixBeginQueue); + DUMP_U32(t, outfixEndQueue); + DUMP_U32(t, leftfixBeginQueue); + DUMP_U32(t, initMpvNfa); + DUMP_U32(t, rosePrefixCount); + DUMP_U32(t, activeLeftIterOffset); + DUMP_U32(t, ematcherRegionSize); + DUMP_U32(t, somRevCount); + DUMP_U32(t, somRevOffsetOffset); + DUMP_U32(t, longLitStreamState); + fprintf(f, "}\n"); + fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine)); +} + +static +void roseDumpComponents(const RoseEngine *t, bool dump_raw, + const string &base) { + dumpComponentInfo(t, base); + dumpComponentInfoCsv(t, base); + dumpNfas(t, dump_raw, base); + dumpAnchored(t, base); + dumpRevComponentInfo(t, base); + dumpRevNfas(t, dump_raw, base); + dumpRoseLitPrograms(t, base + "/rose_lit_programs.txt"); + dumpRoseEodPrograms(t, base + "/rose_eod_programs.txt"); + dumpRoseReportPrograms(t, base + "/rose_report_programs.txt"); + dumpRoseDelayPrograms(t, base + "/rose_delay_programs.txt"); + dumpRoseAnchoredPrograms(t, base + "/rose_anchored_programs.txt"); +} + void dumpRose(const RoseBuild &build_base, const RoseEngine *t, const Grey &grey) { if (!grey.dumpFlags) { diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp deleted file mode 100644 index d83f8f9e..00000000 --- a/src/rose/rose_dump.cpp +++ /dev/null @@ -1,1481 +0,0 @@ -/* - * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include "hwlm/hwlm_build.h" -#include "hwlm/hwlm_dump.h" -#include "rose_build.h" -#include "rose_dump.h" -#include "rose_common.h" -#include "rose_internal.h" -#include "rose_program.h" -#include "hs_compile.h" -#include "ue2common.h" -#include "nfa/nfa_build_util.h" -#include "nfa/nfa_dump_api.h" -#include "nfa/nfa_internal.h" -#include "nfa/nfa_kind.h" -#include "util/dump_charclass.h" -#include "util/multibit_build.h" -#include "util/multibit.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifndef DUMP_SUPPORT -#error No dump support! -#endif - -using namespace std; - -namespace ue2 { - -namespace /* anonymous */ { - -struct rose_off { - explicit rose_off(u32 j) : i(j) {} - string str(void) const; - u32 i; -}; - -ostream &operator<< (ostream &o, const rose_off &to) { - if (to.i == ROSE_BOUND_INF) { - o << "inf"; - } else { - o << to.i; - } - return o; -} - -string rose_off::str(void) const { - ostringstream out; - out << *this; - return out.str(); -} - -} - -static -const void *loadFromByteCodeOffset(const RoseEngine *t, u32 offset) { - if (!offset) { - return nullptr; - } - - const char *lt = (const char *)t + offset; - return lt; -} - -static -const void *getAnchoredMatcher(const RoseEngine *t) { - return loadFromByteCodeOffset(t, t->amatcherOffset); -} - -static -const HWLM *getFloatingMatcher(const RoseEngine *t) { - return (const HWLM *)loadFromByteCodeOffset(t, t->fmatcherOffset); -} - -static -const HWLM *getDelayRebuildMatcher(const RoseEngine *t) { - return (const HWLM *)loadFromByteCodeOffset(t, t->drmatcherOffset); -} - -static -const HWLM *getEodMatcher(const RoseEngine *t) { - return (const HWLM *)loadFromByteCodeOffset(t, t->ematcherOffset); -} - -static -const HWLM *getSmallBlockMatcher(const RoseEngine *t) { - return (const HWLM *)loadFromByteCodeOffset(t, t->sbmatcherOffset); -} - -static -CharReach bitvectorToReach(const u8 *reach) { - CharReach cr; - - for (size_t i = 0; i < 256; i++) { - if (reach[i / 8] & (1U << (i % 8))) { - cr.set(i); - - } - } - return cr; -} - -static -void dumpLookaround(ofstream &os, const RoseEngine *t, - const ROSE_STRUCT_CHECK_LOOKAROUND *ri) { - assert(ri); - - const u8 *base = (const u8 *)t; - const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset); - const u8 *reach_base = base + t->lookaroundReachOffset; - - const s8 *look = look_base + ri->index; - const s8 *look_end = look + ri->count; - const u8 *reach = reach_base + ri->index * REACH_BITVECTOR_LEN; - - os << " contents:" << endl; - - for (; look < look_end; look++, reach += REACH_BITVECTOR_LEN) { - os << " " << std::setw(4) << std::setfill(' ') << int{*look} - << ": "; - describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT); - os << endl; - } -} - -static -vector sparseIterValues(const mmbit_sparse_iter *it, u32 num_bits) { - vector keys; - - if (num_bits == 0) { - return keys; - } - - vector bits(mmbit_size(num_bits), u8{0xff}); // All bits on. - vector state(MAX_SPARSE_ITER_STATES); - - const u8 *b = bits.data(); - mmbit_sparse_state *s = state.data(); - - u32 idx = 0; - u32 i = mmbit_sparse_iter_begin(b, num_bits, &idx, it, s); - while (i != MMB_INVALID) { - keys.push_back(i); - i = mmbit_sparse_iter_next(b, num_bits, i, &idx, it, s); - } - - return keys; -} - -static -void dumpJumpTable(ofstream &os, const RoseEngine *t, - const ROSE_STRUCT_SPARSE_ITER_BEGIN *ri) { - auto *it = - (const mmbit_sparse_iter *)loadFromByteCodeOffset(t, ri->iter_offset); - auto *jumps = (const u32 *)loadFromByteCodeOffset(t, ri->jump_table); - - for (const auto &key : sparseIterValues(it, t->rolesWithStateCount)) { - os << " " << std::setw(4) << std::setfill(' ') << key << " : +" - << *jumps << endl; - ++jumps; - } -} - -static -void dumpSomOperation(ofstream &os, const som_operation &op) { - os << " som (type=" << u32{op.type} << ", onmatch=" << op.onmatch; - switch (op.type) { - case SOM_EXTERNAL_CALLBACK_REV_NFA: - case SOM_INTERNAL_LOC_SET_REV_NFA: - case SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET: - case SOM_INTERNAL_LOC_SET_REV_NFA_IF_WRITABLE: - os << ", revNfaIndex=" << op.aux.revNfaIndex; - break; - default: - os << ", somDistance=" << op.aux.somDistance; - break; - } - os << ")" << endl; -} - -static -string dumpStrMask(const u8 *mask, size_t len) { - ostringstream oss; - for (size_t i = 0; i < len; i++) { - oss << std::hex << std::setw(2) << std::setfill('0') << u32{mask[i]} - << " "; - } - return oss.str(); -} - -#define PROGRAM_CASE(name) \ - case ROSE_INSTR_##name: { \ - os << " " << std::setw(4) << std::setfill('0') << (pc - pc_base) \ - << ": " #name " (" << (int)ROSE_INSTR_##name << ")" << endl; \ - const auto *ri = (const struct ROSE_STRUCT_##name *)pc; - -#define PROGRAM_NEXT_INSTRUCTION \ - pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ - break; \ - } - -static -void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { - const char *pc_base = pc; - for (;;) { - u8 code = *(const u8 *)pc; - assert(code <= LAST_ROSE_INSTRUCTION); - const size_t offset = pc - pc_base; - switch (code) { - PROGRAM_CASE(END) { return; } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(ANCHORED_DELAY) { - os << " groups 0x" << std::hex << ri->groups << std::dec - << endl; - os << " done_jump " << offset + ri->done_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_LIT_EARLY) { - os << " min_offset " << ri->min_offset << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_GROUPS) { - os << " groups 0x" << std::hex << ri->groups << std::dec - << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_ONLY_EOD) { - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_BOUNDS) { - os << " min_bound " << ri->min_bound << endl; - os << " max_bound " << ri->max_bound << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_NOT_HANDLED) { - os << " key " << ri->key << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_SINGLE_LOOKAROUND) { - os << " offset " << int{ri->offset} << endl; - os << " reach_index " << ri->reach_index << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - const u8 *base = (const u8 *)t; - const u8 *reach_base = base + t->lookaroundReachOffset; - const u8 *reach = reach_base + - ri->reach_index * REACH_BITVECTOR_LEN; - os << " contents "; - describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT); - os << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_LOOKAROUND) { - os << " index " << ri->index << endl; - os << " count " << ri->count << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - dumpLookaround(os, t, ri); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_MASK) { - os << " and_mask 0x" << std::hex << std::setw(16) - << std::setfill('0') << ri->and_mask << std::dec << endl; - os << " cmp_mask 0x" << std::hex << std::setw(16) - << std::setfill('0') << ri->cmp_mask << std::dec << endl; - os << " neg_mask 0x" << std::hex << std::setw(16) - << std::setfill('0') << ri->neg_mask << std::dec << endl; - os << " offset " << ri->offset << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_MASK_32) { - os << " and_mask " - << dumpStrMask(ri->and_mask, sizeof(ri->and_mask)) - << endl; - os << " cmp_mask " - << dumpStrMask(ri->cmp_mask, sizeof(ri->cmp_mask)) - << endl; - os << " neg_mask 0x" << std::hex << std::setw(8) - << std::setfill('0') << ri->neg_mask << std::dec << endl; - os << " offset " << ri->offset << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_BYTE) { - os << " and_mask 0x" << std::hex << std::setw(2) - << std::setfill('0') << u32{ri->and_mask} << std::dec - << endl; - os << " cmp_mask 0x" << std::hex << std::setw(2) - << std::setfill('0') << u32{ri->cmp_mask} << std::dec - << endl; - os << " negation " << u32{ri->negation} << endl; - os << " offset " << ri->offset << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_SHUFTI_16x8) { - os << " nib_mask " - << dumpStrMask(ri->nib_mask, sizeof(ri->nib_mask)) - << endl; - os << " bucket_select_mask " - << dumpStrMask(ri->bucket_select_mask, - sizeof(ri->bucket_select_mask)) - << endl; - os << " offset " << ri->offset << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_SHUFTI_32x8) { - os << " hi_mask " - << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) - << endl; - os << " lo_mask " - << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) - << endl; - os << " bucket_select_mask " - << dumpStrMask(ri->bucket_select_mask, - sizeof(ri->bucket_select_mask)) - << endl; - os << " offset " << ri->offset << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_SHUFTI_16x16) { - os << " hi_mask " - << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) - << endl; - os << " lo_mask " - << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) - << endl; - os << " bucket_select_mask " - << dumpStrMask(ri->bucket_select_mask, - sizeof(ri->bucket_select_mask)) - << endl; - os << " offset " << ri->offset << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_SHUFTI_32x16) { - os << " hi_mask " - << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) - << endl; - os << " lo_mask " - << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) - << endl; - os << " bucket_select_mask_hi " - << dumpStrMask(ri->bucket_select_mask_hi, - sizeof(ri->bucket_select_mask_hi)) - << endl; - os << " bucket_select_mask_lo " - << dumpStrMask(ri->bucket_select_mask_lo, - sizeof(ri->bucket_select_mask_lo)) - << endl; - os << " offset " << ri->offset << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_INFIX) { - os << " queue " << ri->queue << endl; - os << " lag " << ri->lag << endl; - os << " report " << ri->report << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_PREFIX) { - os << " queue " << ri->queue << endl; - os << " lag " << ri->lag << endl; - os << " report " << ri->report << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(PUSH_DELAYED) { - os << " delay " << u32{ri->delay} << endl; - os << " index " << ri->index << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(RECORD_ANCHORED) { - os << " id " << ri->id << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CATCH_UP) {} - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CATCH_UP_MPV) {} - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SOM_ADJUST) { - os << " distance " << ri->distance << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SOM_LEFTFIX) { - os << " queue " << ri->queue << endl; - os << " lag " << ri->lag << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SOM_FROM_REPORT) { - dumpSomOperation(os, ri->som); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SOM_ZERO) {} - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(TRIGGER_INFIX) { - os << " queue " << ri->queue << endl; - os << " event " << ri->event << endl; - os << " cancel " << u32{ri->cancel} << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(TRIGGER_SUFFIX) { - os << " queue " << ri->queue << endl; - os << " event " << ri->event << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(DEDUPE) { - os << " quash_som " << u32{ri->quash_som} << endl; - os << " dkey " << ri->dkey << endl; - os << " offset_adjust " << ri->offset_adjust << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(DEDUPE_SOM) { - os << " quash_som " << u32{ri->quash_som} << endl; - os << " dkey " << ri->dkey << endl; - os << " offset_adjust " << ri->offset_adjust << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_CHAIN) { - os << " event " << ri->event << endl; - os << " top_squash_distance " << ri->top_squash_distance - << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_SOM_INT) { - dumpSomOperation(os, ri->som); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_SOM_AWARE) { - dumpSomOperation(os, ri->som); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT) { - os << " onmatch " << ri->onmatch << endl; - os << " offset_adjust " << ri->offset_adjust << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_EXHAUST) { - os << " onmatch " << ri->onmatch << endl; - os << " offset_adjust " << ri->offset_adjust << endl; - os << " ekey " << ri->ekey << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_SOM) { - os << " onmatch " << ri->onmatch << endl; - os << " offset_adjust " << ri->offset_adjust << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_SOM_EXHAUST) { - os << " onmatch " << ri->onmatch << endl; - os << " offset_adjust " << ri->offset_adjust << endl; - os << " ekey " << ri->ekey << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(DEDUPE_AND_REPORT) { - os << " quash_som " << u32{ri->quash_som} << endl; - os << " dkey " << ri->dkey << endl; - os << " onmatch " << ri->onmatch << endl; - os << " offset_adjust " << ri->offset_adjust << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(FINAL_REPORT) { - os << " onmatch " << ri->onmatch << endl; - os << " offset_adjust " << ri->offset_adjust << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_EXHAUSTED) { - os << " ekey " << ri->ekey << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_MIN_LENGTH) { - os << " end_adj " << ri->end_adj << endl; - os << " min_length " << ri->min_length << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SET_STATE) { - os << " index " << ri->index << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SET_GROUPS) { - os << " groups 0x" << std::hex << ri->groups << std::dec - << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SQUASH_GROUPS) { - os << " groups 0x" << std::hex << ri->groups << std::dec - << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_STATE) { - os << " index " << ri->index << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SPARSE_ITER_BEGIN) { - os << " iter_offset " << ri->iter_offset << endl; - os << " jump_table " << ri->jump_table << endl; - dumpJumpTable(os, t, ri); - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SPARSE_ITER_NEXT) { - os << " iter_offset " << ri->iter_offset << endl; - os << " jump_table " << ri->jump_table << endl; - os << " state " << ri->state << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SPARSE_ITER_ANY) { - os << " iter_offset " << ri->iter_offset << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(ENGINES_EOD) { - os << " iter_offset " << ri->iter_offset << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SUFFIXES_EOD) {} - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(MATCHER_EOD) {} - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_LONG_LIT) { - os << " lit_offset " << ri->lit_offset << endl; - os << " lit_length " << ri->lit_length << endl; - const char *lit = (const char *)t + ri->lit_offset; - os << " literal: \"" - << escapeString(string(lit, ri->lit_length)) << "\"" << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) { - os << " lit_offset " << ri->lit_offset << endl; - os << " lit_length " << ri->lit_length << endl; - const char *lit = (const char *)t + ri->lit_offset; - os << " literal: \"" - << escapeString(string(lit, ri->lit_length)) << "\"" << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_MED_LIT) { - os << " lit_offset " << ri->lit_offset << endl; - os << " lit_length " << ri->lit_length << endl; - const char *lit = (const char *)t + ri->lit_offset; - os << " literal: \"" - << escapeString(string(lit, ri->lit_length)) << "\"" << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_MED_LIT_NOCASE) { - os << " lit_offset " << ri->lit_offset << endl; - os << " lit_length " << ri->lit_length << endl; - const char *lit = (const char *)t + ri->lit_offset; - os << " literal: \"" - << escapeString(string(lit, ri->lit_length)) << "\"" << endl; - os << " fail_jump " << offset + ri->fail_jump << endl; - } - PROGRAM_NEXT_INSTRUCTION - - default: - os << " UNKNOWN (code " << int{code} << ")" << endl; - os << " " << endl; - return; - } - } -} - -#undef PROGRAM_CASE -#undef PROGRAM_NEXT_INSTRUCTION - -static -void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) { - ofstream os(filename); - - const u32 *litPrograms = - (const u32 *)loadFromByteCodeOffset(t, t->litProgramOffset); - const u32 *delayRebuildPrograms = - (const u32 *)loadFromByteCodeOffset(t, t->litDelayRebuildProgramOffset); - - for (u32 i = 0; i < t->literalCount; i++) { - os << "Literal " << i << endl; - os << "---------------" << endl; - - if (litPrograms[i]) { - os << "Program @ " << litPrograms[i] << ":" << endl; - const char *prog = - (const char *)loadFromByteCodeOffset(t, litPrograms[i]); - dumpProgram(os, t, prog); - } else { - os << "" << endl; - } - - if (delayRebuildPrograms[i]) { - os << "Delay Rebuild Program @ " << delayRebuildPrograms[i] << ":" - << endl; - const char *prog = (const char *)loadFromByteCodeOffset( - t, delayRebuildPrograms[i]); - dumpProgram(os, t, prog); - } - - os << endl; - } - - os.close(); -} - -static -void dumpRoseEodPrograms(const RoseEngine *t, const string &filename) { - ofstream os(filename); - const char *base = (const char *)t; - - if (t->eodProgramOffset) { - os << "EOD Program @ " << t->eodProgramOffset << ":" << endl; - dumpProgram(os, t, base + t->eodProgramOffset); - os << endl; - } else { - os << "" << endl; - } - - os.close(); -} - -static -void dumpRoseReportPrograms(const RoseEngine *t, const string &filename) { - ofstream os(filename); - - const u32 *programs = - (const u32 *)loadFromByteCodeOffset(t, t->reportProgramOffset); - - for (u32 i = 0; i < t->reportProgramCount; i++) { - os << "Report " << i << endl; - os << "---------------" << endl; - - if (programs[i]) { - os << "Program @ " << programs[i] << ":" << endl; - const char *prog = - (const char *)loadFromByteCodeOffset(t, programs[i]); - dumpProgram(os, t, prog); - } else { - os << "" << endl; - } - } - - os.close(); -} - -static -void dumpRoseDelayPrograms(const RoseEngine *t, const string &filename) { - ofstream os(filename); - - const u32 *programs = - (const u32 *)loadFromByteCodeOffset(t, t->delayProgramOffset); - - for (u32 i = 0; i < t->delay_count; i++) { - os << "Delay entry " << i << endl; - os << "---------------" << endl; - - if (programs[i]) { - os << "Program @ " << programs[i] << ":" << endl; - const char *prog = - (const char *)loadFromByteCodeOffset(t, programs[i]); - dumpProgram(os, t, prog); - } else { - os << "" << endl; - } - } - - os.close(); -} - -static -void dumpRoseAnchoredPrograms(const RoseEngine *t, const string &filename) { - ofstream os(filename); - - const u32 *programs = - (const u32 *)loadFromByteCodeOffset(t, t->anchoredProgramOffset); - - for (u32 i = 0; i < t->anchored_count; i++) { - os << "Anchored entry " << i << endl; - os << "---------------" << endl; - - if (programs[i]) { - os << "Program @ " << programs[i] << ":" << endl; - const char *prog = - (const char *)loadFromByteCodeOffset(t, programs[i]); - dumpProgram(os, t, prog); - } else { - os << "" << endl; - } - } - - os.close(); -} - -static -void dumpNfaNotes(ofstream &fout, const RoseEngine *t, const NFA *n) { - const u32 qindex = n->queueIndex; - - if (qindex < t->outfixBeginQueue) { - fout << "chained"; - return; - } - - if (qindex < t->outfixEndQueue) { - fout << "outfix"; - return; - } - - const NfaInfo *nfa_info = getNfaInfoByQueue(t, qindex); - const NFA *nfa = getNfaByInfo(t, nfa_info); - - if (nfa_info->eod) { - fout << "eod "; - } - - if (qindex < t->leftfixBeginQueue) { - fout << "suffix"; - return; - } - - const LeftNfaInfo *left = getLeftInfoByQueue(t, qindex); - if (left->eager) { - fout << "eager "; - } - if (left->transient) { - fout << "transient " << (u32)left->transient << " "; - } - if (left->infix) { - fout << "infix"; - u32 maxQueueLen = left->maxQueueLen; - if (maxQueueLen != (u32)(-1)) { - fout << " maxqlen=" << maxQueueLen; - } - } else { - fout << "prefix"; - } - fout << " maxlag=" << left->maxLag; - if (left->stopTable) { - fout << " miracles"; - } - if (left->countingMiracleOffset) { - const RoseCountingMiracle *cm - = (const RoseCountingMiracle *)((const char *)t - + left->countingMiracleOffset); - fout << " counting_miracle:" << (int)cm->count - << (cm->shufti ? "s" : "v"); - } - if (nfaSupportsZombie(nfa)) { - fout << " zombie"; - } - if (left->eod_check) { - fout << " eod"; - } -} - -static -void dumpComponentInfo(const RoseEngine *t, const string &base) { - stringstream ss; - ss << base << "rose_components.txt"; - ofstream fout(ss.str().c_str()); - - fout << "Index Offset\tEngine \tStates S.State Bytes Notes\n"; - - for (u32 i = 0; i < t->queueCount; i++) { - const NfaInfo *nfa_info = getNfaInfoByQueue(t, i); - const NFA *n = getNfaByInfo(t, nfa_info); - - fout << left << setw(6) << i << " "; - - fout << left << ((const char *)n - (const char *)t) << "\t"; /* offset */ - - fout << left << setw(16) << describe(*n) << "\t"; - - fout << left << setw(6) << n->nPositions << " "; - fout << left << setw(7) << n->streamStateSize << " "; - fout << left << setw(7) << n->length << " "; - - dumpNfaNotes(fout, t, n); - - fout << endl; - } -} - - -static -void dumpComponentInfoCsv(const RoseEngine *t, const string &base) { - FILE *f = fopen((base +"rose_components.csv").c_str(), "w"); - - fprintf(f, "Index, Offset,Engine Type,States,Stream State,Bytecode Size," - "Kind,Notes\n"); - - for (u32 i = 0; i < t->queueCount; i++) { - const NfaInfo *nfa_info = getNfaInfoByQueue(t, i); - const NFA *n = getNfaByInfo(t, nfa_info); - nfa_kind kind; - stringstream notes; - - if (i < t->outfixBeginQueue) { - notes << "chained;"; - } - - if (nfa_info->eod) { - notes << "eod;"; - } - - if (i < t->outfixEndQueue) { - kind = NFA_OUTFIX; - } else if (i < t->leftfixBeginQueue) { - kind = NFA_SUFFIX; - } else { - const LeftNfaInfo *left = getLeftInfoByQueue(t, i); - if (left->eager) { - notes << "eager;"; - } - if (left->transient) { - notes << "transient " << (u32)left->transient << ";"; - } - if (left->infix) { - kind = NFA_INFIX; - u32 maxQueueLen = left->maxQueueLen; - if (maxQueueLen != (u32)(-1)) { - notes << "maxqlen=" << maxQueueLen << ";"; - } - } else { - kind = NFA_PREFIX; - } - notes << "maxlag=" << left->maxLag << ";"; - if (left->stopTable) { - notes << "miracles;"; - } - if (left->countingMiracleOffset) { - auto cm = (const RoseCountingMiracle *) - ((const char *)t + left->countingMiracleOffset); - notes << "counting_miracle:" << (int)cm->count - << (cm->shufti ? "s" : "v") << ";"; - } - if (nfaSupportsZombie(n)) { - notes << " zombie;"; - } - if (left->eod_check) { - notes << "left_eod;"; - } - } - - fprintf(f, "%u,%zd,\"%s\",%u,%u,%u,%s,%s\n", i, - (const char *)n - (const char *)t, describe(*n).c_str(), - n->nPositions, n->streamStateSize, n->length, - to_string(kind).c_str(), notes.str().c_str()); - } - fclose(f); -} - - -static -void dumpExhaust(const RoseEngine *t, const string &base) { - stringstream sstxt; - sstxt << base << "rose_exhaust.txt"; - FILE *f = fopen(sstxt.str().c_str(), "w"); - - const NfaInfo *infos - = (const NfaInfo *)((const char *)t + t->nfaInfoOffset); - - u32 queue_count = t->activeArrayCount; - - for (u32 i = 0; i < queue_count; ++i) { - u32 ekey_offset = infos[i].ekeyListOffset; - - fprintf(f, "%u (%u):", i, ekey_offset); - - if (ekey_offset) { - const u32 *ekeys = (const u32 *)((const char *)t + ekey_offset); - while (1) { - u32 e = *ekeys; - ++ekeys; - if (e == ~0U) { - break; - } - fprintf(f, " %u", e); - } - } - - fprintf(f, "\n"); - } - - fclose(f); -} - -static -void dumpNfas(const RoseEngine *t, bool dump_raw, const string &base) { - dumpExhaust(t, base); - - for (u32 i = 0; i < t->queueCount; i++) { - const NfaInfo *nfa_info = getNfaInfoByQueue(t, i); - const NFA *n = getNfaByInfo(t, nfa_info); - - stringstream ssbase; - ssbase << base << "rose_nfa_" << i; - nfaGenerateDumpFiles(n, ssbase.str()); - - if (dump_raw) { - stringstream ssraw; - ssraw << base << "rose_nfa_" << i << ".raw"; - FILE *f = fopen(ssraw.str().c_str(), "w"); - fwrite(n, 1, n->length, f); - fclose(f); - } - } -} - -static -void dumpRevComponentInfo(const RoseEngine *t, const string &base) { - stringstream ss; - ss << base << "som_rev_components.txt"; - ofstream fout(ss.str().c_str()); - - fout << "Index Offset\tEngine \tStates S.State Bytes\n"; - - const char *tp = (const char *)t; - const u32 *rev_offsets = (const u32 *)(tp + t->somRevOffsetOffset); - - for (u32 i = 0; i < t->somRevCount; i++) { - u32 offset = rev_offsets[i]; - const NFA *n = (const NFA *)(tp + offset); - - fout << left << setw(6) << i << " "; - - fout << left << offset << "\t"; /* offset */ - - fout << left << setw(16) << describe(*n) << "\t"; - - fout << left << setw(6) << n->nPositions << " "; - fout << left << setw(7) << n->streamStateSize << " "; - fout << left << setw(7) << n->length; - fout << endl; - } -} - -static -void dumpRevNfas(const RoseEngine *t, bool dump_raw, const string &base) { - const char *tp = (const char *)t; - const u32 *rev_offsets = (const u32 *)(tp + t->somRevOffsetOffset); - - for (u32 i = 0; i < t->somRevCount; i++) { - const NFA *n = (const NFA *)(tp + rev_offsets[i]); - - stringstream ssbase; - ssbase << base << "som_rev_nfa_" << i; - nfaGenerateDumpFiles(n, ssbase.str()); - - if (dump_raw) { - stringstream ssraw; - ssraw << base << "som_rev_nfa_" << i << ".raw"; - FILE *f = fopen(ssraw.str().c_str(), "w"); - fwrite(n, 1, n->length, f); - fclose(f); - } - } -} - -static -void dumpAnchored(const RoseEngine *t, const string &base) { - u32 i = 0; - const anchored_matcher_info *curr - = (const anchored_matcher_info *)getALiteralMatcher(t); - - while (curr) { - const NFA *n = (const NFA *)((const char *)curr + sizeof(*curr)); - - stringstream ssbase; - ssbase << base << "anchored_" << i; - nfaGenerateDumpFiles(n, ssbase.str()); - - curr = curr->next_offset ? (const anchored_matcher_info *) - ((const char *)curr + curr->next_offset) : nullptr; - i++; - }; -} - -static -void dumpAnchoredStats(const void *atable, FILE *f) { - assert(atable); - - u32 i = 0; - const anchored_matcher_info *curr = (const anchored_matcher_info *)atable; - - while (curr) { - const NFA *n = (const NFA *)((const char *)curr + sizeof(*curr)); - - fprintf(f, " NFA %u: %s, %u states (%u bytes)\n", i, - describe(*n).c_str(), n->nPositions, n->length); - - curr = curr->next_offset ? (const anchored_matcher_info *) - ((const char *)curr + curr->next_offset) : nullptr; - i++; - }; - -} - -static -void dumpLongLiteralSubtable(const RoseLongLitTable *ll_table, - const RoseLongLitSubtable *ll_sub, FILE *f) { - if (!ll_sub->hashBits) { - fprintf(f, " \n"); - return; - } - - const char *base = (const char *)ll_table; - - u32 nbits = ll_sub->hashBits; - u32 num_entries = 1U << nbits; - const auto *tab = (const RoseLongLitHashEntry *)(base + ll_sub->hashOffset); - u32 hash_occ = - count_if(tab, tab + num_entries, [](const RoseLongLitHashEntry &ent) { - return ent.str_offset != 0; - }); - float hash_occ_percent = ((float)hash_occ / (float)num_entries) * 100; - - fprintf(f, " hash table : %u bits, occupancy %u/%u (%0.1f%%)\n", - nbits, hash_occ, num_entries, hash_occ_percent); - - u32 bloom_bits = ll_sub->bloomBits; - u32 bloom_size = 1U << bloom_bits; - const u8 *bloom = (const u8 *)base + ll_sub->bloomOffset; - u32 bloom_occ = accumulate(bloom, bloom + bloom_size / 8, 0, - [](const u32 &sum, const u8 &elem) { return sum + popcount32(elem); }); - float bloom_occ_percent = ((float)bloom_occ / (float)(bloom_size)) * 100; - - fprintf(f, " bloom filter : %u bits, occupancy %u/%u (%0.1f%%)\n", - bloom_bits, bloom_occ, bloom_size, bloom_occ_percent); -} - -static -void dumpLongLiteralTable(const RoseEngine *t, FILE *f) { - if (!t->longLitTableOffset) { - return; - } - - fprintf(f, "\n"); - fprintf(f, "Long literal table (streaming):\n"); - - const auto *ll_table = - (const struct RoseLongLitTable *)loadFromByteCodeOffset( - t, t->longLitTableOffset); - - fprintf(f, " total size : %u bytes\n", ll_table->size); - fprintf(f, " longest len : %u\n", ll_table->maxLen); - fprintf(f, " stream state : %u bytes\n", ll_table->streamStateBytes); - - fprintf(f, " caseful:\n"); - dumpLongLiteralSubtable(ll_table, &ll_table->caseful, f); - - fprintf(f, " nocase:\n"); - dumpLongLiteralSubtable(ll_table, &ll_table->nocase, f); -} - -// Externally accessible functions - -void roseDumpText(const RoseEngine *t, FILE *f) { - if (!t) { - fprintf(f, "<< no rose >>\n"); - return; - } - - const void *atable = getAnchoredMatcher(t); - const HWLM *ftable = getFloatingMatcher(t); - const HWLM *drtable = getDelayRebuildMatcher(t); - const HWLM *etable = getEodMatcher(t); - const HWLM *sbtable = getSmallBlockMatcher(t); - - fprintf(f, "Rose:\n\n"); - - fprintf(f, "mode: : "); - switch(t->mode) { - case HS_MODE_BLOCK: - fprintf(f, "block"); - break; - case HS_MODE_STREAM: - fprintf(f, "streaming"); - break; - case HS_MODE_VECTORED: - fprintf(f, "vectored"); - break; - } - fprintf(f, "\n"); - - fprintf(f, "properties :"); - if (t->canExhaust) { - fprintf(f, " canExhaust"); - } - if (t->hasSom) { - fprintf(f, " hasSom"); - } - if (t->runtimeImpl == ROSE_RUNTIME_PURE_LITERAL) { - fprintf(f, " pureLiteral"); - } - if (t->runtimeImpl == ROSE_RUNTIME_SINGLE_OUTFIX) { - fprintf(f, " soleOutfix"); - } - fprintf(f, "\n"); - - fprintf(f, "dkey count : %u\n", t->dkeyCount); - fprintf(f, "som slot count : %u\n", t->somLocationCount); - fprintf(f, "som width : %u bytes\n", t->somHorizon); - fprintf(f, "rose count : %u\n", t->roseCount); - fprintf(f, "\n"); - - fprintf(f, "total engine size : %u bytes\n", t->size); - fprintf(f, " - anchored matcher : %u bytes over %u bytes\n", t->asize, - t->anchoredDistance); - fprintf(f, " - floating matcher : %zu bytes%s", - ftable ? hwlmSize(ftable) : 0, t->noFloatingRoots ? " (cond)":""); - if (t->floatingMinDistance) { - fprintf(f, " from %s bytes\n", - rose_off(t->floatingMinDistance).str().c_str()); - } - if (t->floatingDistance != ROSE_BOUND_INF && ftable) { - fprintf(f, " over %u bytes\n", t->floatingDistance); - } else { - fprintf(f, "\n"); - } - fprintf(f, " - delay-rb matcher : %zu bytes\n", - drtable ? hwlmSize(drtable) : 0); - fprintf(f, " - eod-anch matcher : %zu bytes over last %u bytes\n", - etable ? hwlmSize(etable) : 0, t->ematcherRegionSize); - fprintf(f, " - small-blk matcher : %zu bytes over %u bytes\n", - sbtable ? hwlmSize(sbtable) : 0, t->smallBlockDistance); - fprintf(f, " - role state table : %zu bytes\n", - t->rolesWithStateCount * sizeof(u32)); - fprintf(f, " - nfa info table : %zu bytes\n", - t->queueCount * sizeof(NfaInfo)); - fprintf(f, " - lookaround table : %u bytes\n", - t->nfaInfoOffset - t->lookaroundTableOffset); - fprintf(f, " - lookaround reach : %u bytes\n", - t->lookaroundTableOffset - t->lookaroundReachOffset); - - fprintf(f, "state space required : %u bytes\n", t->stateOffsets.end); - fprintf(f, " - history buffer : %u bytes\n", t->historyRequired); - fprintf(f, " - exhaustion vector : %u bytes\n", (t->ekeyCount + 7) / 8); - fprintf(f, " - role state mmbit : %u bytes\n", t->stateSize); - fprintf(f, " - long lit matcher : %u bytes\n", t->longLitStreamState); - fprintf(f, " - active array : %u bytes\n", - mmbit_size(t->activeArrayCount)); - fprintf(f, " - active rose : %u bytes\n", - mmbit_size(t->activeLeftCount)); - fprintf(f, " - anchored state : %u bytes\n", t->anchorStateSize); - fprintf(f, " - nfa state : %u bytes\n", t->nfaStateSize); - fprintf(f, " - (trans. nfa state): %u bytes\n", t->tStateSize); - fprintf(f, " - one whole bytes : %u bytes\n", - t->stateOffsets.anchorState - t->stateOffsets.leftfixLagTable); - fprintf(f, " - groups : %u bytes\n", - t->stateOffsets.groups_size); - fprintf(f, "\n"); - - fprintf(f, "initial groups : 0x%016llx\n", t->initialGroups); - fprintf(f, "floating groups : 0x%016llx\n", t->floating_group_mask); - fprintf(f, "handled key count : %u\n", t->handledKeyCount); - fprintf(f, "\n"); - - fprintf(f, "total literal count : %u\n", t->totalNumLiterals); - fprintf(f, " prog table size : %u\n", t->literalCount); - fprintf(f, " delayed literals : %u\n", t->delay_count); - - fprintf(f, "\n"); - fprintf(f, " minWidth : %u\n", t->minWidth); - fprintf(f, " minWidthExcludingBoundaries : %u\n", - t->minWidthExcludingBoundaries); - fprintf(f, " maxBiAnchoredWidth : %s\n", - rose_off(t->maxBiAnchoredWidth).str().c_str()); - fprintf(f, " minFloatLitMatchOffset : %s\n", - rose_off(t->floatingMinLiteralMatchOffset).str().c_str()); - fprintf(f, " delay_base_id : %u\n", t->delay_base_id); - fprintf(f, " maxFloatingDelayedMatch : %s\n", - rose_off(t->maxFloatingDelayedMatch).str().c_str()); - - if (atable) { - fprintf(f, "\nAnchored literal matcher stats:\n\n"); - dumpAnchoredStats(atable, f); - } - - if (ftable) { - fprintf(f, "\nFloating literal matcher stats:\n\n"); - hwlmPrintStats(ftable, f); - } - - if (drtable) { - fprintf(f, "\nDelay Rebuild literal matcher stats:\n\n"); - hwlmPrintStats(drtable, f); - } - - if (etable) { - fprintf(f, "\nEOD-anchored literal matcher stats:\n\n"); - hwlmPrintStats(etable, f); - } - - if (sbtable) { - fprintf(f, "\nSmall-block literal matcher stats:\n\n"); - hwlmPrintStats(sbtable, f); - } - - dumpLongLiteralTable(t, f); -} - -#define DUMP_U8(o, member) \ - fprintf(f, " %-32s: %hhu/%hhx\n", #member, o->member, o->member) -#define DUMP_U32(o, member) \ - fprintf(f, " %-32s: %u/%08x\n", #member, o->member, o->member) -#define DUMP_U64(o, member) \ - fprintf(f, " %-32s: %llu/%016llx\n", #member, o->member, o->member) - -void roseDumpStructRaw(const RoseEngine *t, FILE *f) { - fprintf(f, "struct RoseEngine {\n"); - DUMP_U8(t, noFloatingRoots); - DUMP_U8(t, requiresEodCheck); - DUMP_U8(t, hasOutfixesInSmallBlock); - DUMP_U8(t, runtimeImpl); - DUMP_U8(t, mpvTriggeredByLeaf); - DUMP_U8(t, canExhaust); - DUMP_U8(t, hasSom); - DUMP_U8(t, somHorizon); - DUMP_U8(t, needsCatchup); - DUMP_U32(t, mode); - DUMP_U32(t, historyRequired); - DUMP_U32(t, ekeyCount); - DUMP_U32(t, dkeyCount); - DUMP_U32(t, dkeyLogSize); - DUMP_U32(t, invDkeyOffset); - DUMP_U32(t, somLocationCount); - DUMP_U32(t, somLocationFatbitSize); - DUMP_U32(t, rolesWithStateCount); - DUMP_U32(t, stateSize); - DUMP_U32(t, anchorStateSize); - DUMP_U32(t, nfaStateSize); - DUMP_U32(t, tStateSize); - DUMP_U32(t, smallWriteOffset); - DUMP_U32(t, amatcherOffset); - DUMP_U32(t, ematcherOffset); - DUMP_U32(t, fmatcherOffset); - DUMP_U32(t, drmatcherOffset); - DUMP_U32(t, sbmatcherOffset); - DUMP_U32(t, longLitTableOffset); - DUMP_U32(t, amatcherMinWidth); - DUMP_U32(t, fmatcherMinWidth); - DUMP_U32(t, eodmatcherMinWidth); - DUMP_U32(t, amatcherMaxBiAnchoredWidth); - DUMP_U32(t, fmatcherMaxBiAnchoredWidth); - DUMP_U32(t, litProgramOffset); - DUMP_U32(t, litDelayRebuildProgramOffset); - DUMP_U32(t, reportProgramOffset); - DUMP_U32(t, reportProgramCount); - DUMP_U32(t, delayProgramOffset); - DUMP_U32(t, anchoredProgramOffset); - DUMP_U32(t, literalCount); - DUMP_U32(t, activeArrayCount); - DUMP_U32(t, activeLeftCount); - DUMP_U32(t, queueCount); - DUMP_U32(t, activeQueueArraySize); - DUMP_U32(t, eagerIterOffset); - DUMP_U32(t, handledKeyCount); - DUMP_U32(t, handledKeyFatbitSize); - DUMP_U32(t, leftOffset); - DUMP_U32(t, roseCount); - DUMP_U32(t, lookaroundTableOffset); - DUMP_U32(t, lookaroundReachOffset); - DUMP_U32(t, eodProgramOffset); - DUMP_U32(t, lastByteHistoryIterOffset); - DUMP_U32(t, minWidth); - DUMP_U32(t, minWidthExcludingBoundaries); - DUMP_U32(t, maxBiAnchoredWidth); - DUMP_U32(t, anchoredDistance); - DUMP_U32(t, anchoredMinDistance); - DUMP_U32(t, floatingDistance); - DUMP_U32(t, floatingMinDistance); - DUMP_U32(t, smallBlockDistance); - DUMP_U32(t, floatingMinLiteralMatchOffset); - DUMP_U32(t, nfaInfoOffset); - DUMP_U64(t, initialGroups); - DUMP_U64(t, floating_group_mask); - DUMP_U32(t, size); - DUMP_U32(t, delay_count); - DUMP_U32(t, delay_fatbit_size); - DUMP_U32(t, delay_base_id); - DUMP_U32(t, anchored_count); - DUMP_U32(t, anchored_fatbit_size); - DUMP_U32(t, anchored_base_id); - DUMP_U32(t, maxFloatingDelayedMatch); - DUMP_U32(t, delayRebuildLength); - DUMP_U32(t, stateOffsets.history); - DUMP_U32(t, stateOffsets.exhausted); - DUMP_U32(t, stateOffsets.activeLeafArray); - DUMP_U32(t, stateOffsets.activeLeftArray); - DUMP_U32(t, stateOffsets.activeLeftArray_size); - DUMP_U32(t, stateOffsets.leftfixLagTable); - DUMP_U32(t, stateOffsets.anchorState); - DUMP_U32(t, stateOffsets.groups); - DUMP_U32(t, stateOffsets.groups_size); - DUMP_U32(t, stateOffsets.longLitState); - DUMP_U32(t, stateOffsets.somLocation); - DUMP_U32(t, stateOffsets.somValid); - DUMP_U32(t, stateOffsets.somWritable); - DUMP_U32(t, stateOffsets.end); - DUMP_U32(t, boundary.reportEodOffset); - DUMP_U32(t, boundary.reportZeroOffset); - DUMP_U32(t, boundary.reportZeroEodOffset); - DUMP_U32(t, totalNumLiterals); - DUMP_U32(t, asize); - DUMP_U32(t, outfixBeginQueue); - DUMP_U32(t, outfixEndQueue); - DUMP_U32(t, leftfixBeginQueue); - DUMP_U32(t, initMpvNfa); - DUMP_U32(t, rosePrefixCount); - DUMP_U32(t, activeLeftIterOffset); - DUMP_U32(t, ematcherRegionSize); - DUMP_U32(t, somRevCount); - DUMP_U32(t, somRevOffsetOffset); - DUMP_U32(t, longLitStreamState); - fprintf(f, "}\n"); - fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine)); -} - -void roseDumpComponents(const RoseEngine *t, bool dump_raw, - const string &base) { - dumpComponentInfo(t, base); - dumpComponentInfoCsv(t, base); - dumpNfas(t, dump_raw, base); - dumpAnchored(t, base); - dumpRevComponentInfo(t, base); - dumpRevNfas(t, dump_raw, base); - dumpRoseLitPrograms(t, base + "/rose_lit_programs.txt"); - dumpRoseEodPrograms(t, base + "/rose_eod_programs.txt"); - dumpRoseReportPrograms(t, base + "/rose_report_programs.txt"); - dumpRoseDelayPrograms(t, base + "/rose_delay_programs.txt"); - dumpRoseAnchoredPrograms(t, base + "/rose_anchored_programs.txt"); -} - -void roseDumpInternals(const RoseEngine *t, const string &base) { - if (!t) { - DEBUG_PRINTF("no rose\n"); - return; - } - - const void *atable = getAnchoredMatcher(t); - const HWLM *ftable = getFloatingMatcher(t); - const HWLM *etable = getEodMatcher(t); - - if (atable) { - FILE *f = fopen((base + "/anchored.raw").c_str(), "w"); - if (f) { - fwrite(atable, 1, t->asize, f); - fclose(f); - } - } - - if (ftable) { - FILE *f = fopen((base + "/floating.raw").c_str(), "w"); - if (f) { - fwrite(ftable, 1, hwlmSize(ftable), f); - fclose(f); - } - } - - if (etable) { - FILE *f = fopen((base + "/eod.raw").c_str(), "w"); - if (f) { - fwrite(etable, 1, hwlmSize(etable), f); - fclose(f); - } - } - - FILE *f = fopen((base + "/rose.raw").c_str(), "w"); - assert(f); - fwrite(t, 1, roseSize(t), f); - fclose(f); - - f = fopen((base + "/rose_struct.txt").c_str(), "w"); - roseDumpStructRaw(t, f); - fclose(f); - - roseDumpComponents(t, true, base); -} - -} // namespace ue2 diff --git a/src/rose/rose_dump.h b/src/rose/rose_dump.h deleted file mode 100644 index fe66302d..00000000 --- a/src/rose/rose_dump.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef ROSE_DUMP_H -#define ROSE_DUMP_H - -#ifdef DUMP_SUPPORT - -#include -#include - -struct RoseEngine; - -namespace ue2 { - -void roseDumpText(const RoseEngine *t, FILE *f); -void roseDumpInternals(const RoseEngine *t, const std::string &base); -void roseDumpComponents(const RoseEngine *t, bool dump_raw, - const std::string &base); -void roseDumpStructRaw(const RoseEngine *t, FILE *f); - -} // namespace ue2 - -#endif -#endif From bd3357d3acfd1c75a1ec0768afda634a2348f79b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 31 Jan 2017 11:10:30 +1100 Subject: [PATCH 060/326] rose: dump lit programs from frag map --- src/rose/rose_build_dump.cpp | 73 ++++++++++++------------------------ 1 file changed, 24 insertions(+), 49 deletions(-) diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index e3497898..608141ed 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -1140,35 +1140,29 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { #undef PROGRAM_NEXT_INSTRUCTION static -void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) { +void dumpRoseLitPrograms(const RoseBuildImpl &build, const RoseEngine *t, + const string &filename) { ofstream os(filename); - const u32 *litPrograms = - (const u32 *)loadFromByteCodeOffset(t, t->litProgramOffset); - const u32 *delayRebuildPrograms = - (const u32 *)loadFromByteCodeOffset(t, t->litDelayRebuildProgramOffset); + // Collect all programs referenced by a literal fragment. + vector programs; + programs.reserve(build.final_to_frag_map.size()); - for (u32 i = 0; i < t->literalCount; i++) { - os << "Literal " << i << endl; - os << "---------------" << endl; - - if (litPrograms[i]) { - os << "Program @ " << litPrograms[i] << ":" << endl; - const char *prog = - (const char *)loadFromByteCodeOffset(t, litPrograms[i]); - dumpProgram(os, t, prog); - } else { - os << "" << endl; + for (const auto &m : build.final_to_frag_map) { + const auto &frag = m.second; + if (frag.lit_program_offset) { + programs.push_back(frag.lit_program_offset); } - - if (delayRebuildPrograms[i]) { - os << "Delay Rebuild Program @ " << delayRebuildPrograms[i] << ":" - << endl; - const char *prog = (const char *)loadFromByteCodeOffset( - t, delayRebuildPrograms[i]); - dumpProgram(os, t, prog); + if (frag.delay_program_offset) { + programs.push_back(frag.delay_program_offset); } + } + sort_and_unique(programs); + for (u32 prog_offset : programs) { + os << "Program @ " << prog_offset << ":" << endl; + const char *prog = (const char *)loadFromByteCodeOffset(t, prog_offset); + dumpProgram(os, t, prog); os << endl; } @@ -1215,30 +1209,6 @@ void dumpRoseReportPrograms(const RoseEngine *t, const string &filename) { os.close(); } -static -void dumpRoseDelayPrograms(const RoseEngine *t, const string &filename) { - ofstream os(filename); - - const u32 *programs = - (const u32 *)loadFromByteCodeOffset(t, t->delayProgramOffset); - - for (u32 i = 0; i < t->delay_count; i++) { - os << "Delay entry " << i << endl; - os << "---------------" << endl; - - if (programs[i]) { - os << "Program @ " << programs[i] << ":" << endl; - const char *prog = - (const char *)loadFromByteCodeOffset(t, programs[i]); - dumpProgram(os, t, prog); - } else { - os << "" << endl; - } - } - - os.close(); -} - static void dumpRoseAnchoredPrograms(const RoseEngine *t, const string &filename) { ofstream os(filename); @@ -1896,10 +1866,14 @@ void roseDumpComponents(const RoseEngine *t, bool dump_raw, dumpAnchored(t, base); dumpRevComponentInfo(t, base); dumpRevNfas(t, dump_raw, base); - dumpRoseLitPrograms(t, base + "/rose_lit_programs.txt"); +} + +static +void roseDumpPrograms(const RoseBuildImpl &build, const RoseEngine *t, + const string &base) { + dumpRoseLitPrograms(build, t, base + "/rose_lit_programs.txt"); dumpRoseEodPrograms(t, base + "/rose_eod_programs.txt"); dumpRoseReportPrograms(t, base + "/rose_report_programs.txt"); - dumpRoseDelayPrograms(t, base + "/rose_delay_programs.txt"); dumpRoseAnchoredPrograms(t, base + "/rose_anchored_programs.txt"); } @@ -1928,6 +1902,7 @@ void dumpRose(const RoseBuild &build_base, const RoseEngine *t, fclose(f); roseDumpComponents(t, false, grey.dumpPath); + roseDumpPrograms(build, t, grey.dumpPath); // Graph. dumpRoseGraph(build, t, "rose.dot"); From 9550058e7529e87c654b5d6e7d92e7203fcd5898 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 31 Jan 2017 11:14:51 +1100 Subject: [PATCH 061/326] remove lit program tables from bytecode --- src/rose/rose_build_bytecode.cpp | 26 ++++---------------------- src/rose/rose_build_dump.cpp | 2 -- src/rose/rose_internal.h | 6 ------ 3 files changed, 4 insertions(+), 30 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 42142936..d8404db1 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4700,15 +4700,10 @@ map groupByFragment(const RoseBuildImpl &build) { /** * \brief Build the interpreter programs for each literal. * - * Returns the following as a tuple: - * - * - base of the literal program list - * - base of the delay rebuild program list - * - total number of literal fragments + * Returns the total number of literal fragments. */ static -tuple buildLiteralPrograms(RoseBuildImpl &build, - build_context &bc) { +u32 buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { // Build a reverse mapping from fragment -> final_id. map> frag_to_final_map; for (const auto &m : build.final_to_frag_map) { @@ -4740,13 +4735,7 @@ tuple buildLiteralPrograms(RoseBuildImpl &build, frag.delay_program_offset = delayRebuildPrograms[frag.fragment_id]; } - u32 litProgramsOffset = - bc.engine_blob.add(begin(litPrograms), end(litPrograms)); - u32 delayRebuildProgramsOffset = bc.engine_blob.add( - begin(delayRebuildPrograms), end(delayRebuildPrograms)); - - return tuple{litProgramsOffset, delayRebuildProgramsOffset, - num_fragments}; + return num_fragments; } static @@ -5475,12 +5464,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { queue_count - leftfixBeginQueue, leftInfoTable, &laggedRoseCount, &historyRequired); - u32 litProgramOffset; - u32 litDelayRebuildProgramOffset; - u32 litProgramCount; - tie(litProgramOffset, litDelayRebuildProgramOffset, litProgramCount) = - buildLiteralPrograms(*this, bc); - + u32 litProgramCount = buildLiteralPrograms(*this, bc); u32 delayProgramOffset = buildDelayPrograms(*this, bc); u32 anchoredProgramOffset = buildAnchoredPrograms(*this, bc); @@ -5676,8 +5660,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->needsCatchup = bc.needs_catchup ? 1 : 0; engine->literalCount = litProgramCount; - engine->litProgramOffset = litProgramOffset; - engine->litDelayRebuildProgramOffset = litDelayRebuildProgramOffset; engine->reportProgramOffset = reportProgramOffset; engine->reportProgramCount = reportProgramCount; engine->delayProgramOffset = delayProgramOffset; diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 608141ed..f5cc86f0 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -1783,8 +1783,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, eodmatcherMinWidth); DUMP_U32(t, amatcherMaxBiAnchoredWidth); DUMP_U32(t, fmatcherMaxBiAnchoredWidth); - DUMP_U32(t, litProgramOffset); - DUMP_U32(t, litDelayRebuildProgramOffset); DUMP_U32(t, reportProgramOffset); DUMP_U32(t, reportProgramCount); DUMP_U32(t, delayProgramOffset); diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 8e55a37d..c8f47041 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -344,12 +344,6 @@ struct RoseEngine { u32 fmatcherMaxBiAnchoredWidth; /**< maximum number of bytes that can still * produce a match for a pattern involved * with the anchored table. */ - /** \brief Offset of u32 array of program offsets for literals. */ - u32 litProgramOffset; - - /** \brief Offset of u32 array of delay rebuild program offsets for - * literals. */ - u32 litDelayRebuildProgramOffset; /** * \brief Offset of u32 array of program offsets for reports used by From a5b3bc814f80fcbb908a0f1a9a3e8d2dde50b715 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 31 Jan 2017 11:31:17 +1100 Subject: [PATCH 062/326] rose: delete RoseEngine::literalCount --- src/rose/rose_build_bytecode.cpp | 9 ++------- src/rose/rose_build_dump.cpp | 2 -- src/rose/rose_internal.h | 8 -------- 3 files changed, 2 insertions(+), 17 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index d8404db1..c89d2073 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4699,11 +4699,9 @@ map groupByFragment(const RoseBuildImpl &build) { /** * \brief Build the interpreter programs for each literal. - * - * Returns the total number of literal fragments. */ static -u32 buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { +void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { // Build a reverse mapping from fragment -> final_id. map> frag_to_final_map; for (const auto &m : build.final_to_frag_map) { @@ -4734,8 +4732,6 @@ u32 buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { frag.lit_program_offset = litPrograms[frag.fragment_id]; frag.delay_program_offset = delayRebuildPrograms[frag.fragment_id]; } - - return num_fragments; } static @@ -5464,7 +5460,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { queue_count - leftfixBeginQueue, leftInfoTable, &laggedRoseCount, &historyRequired); - u32 litProgramCount = buildLiteralPrograms(*this, bc); + buildLiteralPrograms(*this, bc); u32 delayProgramOffset = buildDelayPrograms(*this, bc); u32 anchoredProgramOffset = buildAnchoredPrograms(*this, bc); @@ -5659,7 +5655,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->needsCatchup = bc.needs_catchup ? 1 : 0; - engine->literalCount = litProgramCount; engine->reportProgramOffset = reportProgramOffset; engine->reportProgramCount = reportProgramCount; engine->delayProgramOffset = delayProgramOffset; diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index f5cc86f0..6ac80ef9 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -1696,7 +1696,6 @@ void roseDumpText(const RoseEngine *t, FILE *f) { fprintf(f, "\n"); fprintf(f, "total literal count : %u\n", t->totalNumLiterals); - fprintf(f, " prog table size : %u\n", t->literalCount); fprintf(f, " delayed literals : %u\n", t->delay_count); fprintf(f, "\n"); @@ -1787,7 +1786,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, reportProgramCount); DUMP_U32(t, delayProgramOffset); DUMP_U32(t, anchoredProgramOffset); - DUMP_U32(t, literalCount); DUMP_U32(t, activeArrayCount); DUMP_U32(t, activeLeftCount); DUMP_U32(t, queueCount); diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index c8f47041..9ef8b0a1 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -367,14 +367,6 @@ struct RoseEngine { */ u32 anchoredProgramOffset; - /** - * \brief Number of entries in the arrays pointed to by litProgramOffset, - * litDelayRebuildProgramOffset. - * - * Note: NOT the total number of literals. - */ - u32 literalCount; - u32 activeArrayCount; //number of nfas tracked in the active array u32 activeLeftCount; //number of nfas tracked in the active rose array u32 queueCount; /**< number of nfa queues */ From 6bf35cb637d99bae526d3309f44c6e69fb33ce4b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 31 Jan 2017 12:25:11 +1100 Subject: [PATCH 063/326] rose: make groupByFragment local --- src/rose/rose_build_bytecode.cpp | 1 + src/rose/rose_build_impl.h | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index c89d2073..0dbe20ea 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4649,6 +4649,7 @@ rose_literal_id getFragment(const rose_literal_id &lit) { return frag; } +static map groupByFragment(const RoseBuildImpl &build) { u32 frag_id = 0; map final_to_frag; diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index e9ba5f55..dc696ddd 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -651,8 +651,6 @@ void normaliseLiteralMask(const ue2_literal &s, std::vector &msk, bool canImplementGraphs(const RoseBuildImpl &tbi); #endif -std::map groupByFragment(const RoseBuildImpl &build); - } // namespace ue2 #endif /* ROSE_BUILD_IMPL_H_17E20A3C6935D6 */ From a0260c036264091e064fafb785c9902d38923d92 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 31 Jan 2017 12:49:43 +1100 Subject: [PATCH 064/326] rose: do fragment group assignment earlier --- src/rose/rose_build_bytecode.cpp | 40 ++++++++++++++++++++++++-------- src/rose/rose_build_impl.h | 4 +++- src/rose/rose_build_matchers.cpp | 23 +----------------- 3 files changed, 34 insertions(+), 33 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 0dbe20ea..82e1945f 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4649,48 +4649,68 @@ rose_literal_id getFragment(const rose_literal_id &lit) { return frag; } +static +rose_group getGroups(const RoseBuildImpl &build, const set &lit_ids) { + rose_group groups = 0; + for (auto lit_id : lit_ids) { + auto &info = build.literal_info.at(lit_id); + groups |= info.group_mask; + } + return groups; +} + static map groupByFragment(const RoseBuildImpl &build) { u32 frag_id = 0; map final_to_frag; - map> frag_lits; + struct FragmentInfo { + vector final_ids; + rose_group groups = 0; + }; + + map frag_info; + for (const auto &m : build.final_id_to_literal) { u32 final_id = m.first; const auto &lit_ids = m.second; assert(!lit_ids.empty()); + auto groups = getGroups(build, lit_ids); + if (lit_ids.size() > 1) { - final_to_frag.emplace(final_id, LitFragment(frag_id++)); + final_to_frag.emplace(final_id, LitFragment(frag_id++, groups)); continue; } const auto lit_id = *lit_ids.begin(); const auto &lit = build.literals.right.at(lit_id); if (lit.s.length() < ROSE_SHORT_LITERAL_LEN_MAX) { - final_to_frag.emplace(final_id, LitFragment(frag_id++)); + final_to_frag.emplace(final_id, LitFragment(frag_id++, groups)); continue; } // Combining fragments that squash their groups is unsafe. const auto &info = build.literal_info[lit_id]; if (info.squash_group) { - final_to_frag.emplace(final_id, LitFragment(frag_id++)); + final_to_frag.emplace(final_id, LitFragment(frag_id++, groups)); continue; } DEBUG_PRINTF("fragment candidate: final_id=%u %s\n", final_id, dumpString(lit.s).c_str()); - auto frag = getFragment(lit); - frag_lits[frag].push_back(final_id); + auto &fi = frag_info[getFragment(lit)]; + fi.final_ids.push_back(final_id); + fi.groups |= groups; } - for (const auto &m : frag_lits) { + for (const auto &m : frag_info) { + const auto &fi = m.second; DEBUG_PRINTF("frag %s -> ids: %s\n", dumpString(m.first.s).c_str(), - as_string_list(m.second).c_str()); - for (const auto final_id : m.second) { + as_string_list(fi.final_ids).c_str()); + for (const auto final_id : fi.final_ids) { assert(!contains(final_to_frag, final_id)); - final_to_frag.emplace(final_id, LitFragment(frag_id)); + final_to_frag.emplace(final_id, LitFragment(frag_id, fi.groups)); } frag_id++; } diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index dc696ddd..0c881f64 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -437,8 +437,10 @@ private: std::set all_reports(const OutfixInfo &outfix); struct LitFragment { - explicit LitFragment(u32 fragment_id_in) : fragment_id(fragment_id_in) {} + LitFragment(u32 fragment_id_in, rose_group groups_in) + : fragment_id(fragment_id_in), groups(groups_in) {} u32 fragment_id; + rose_group groups; u32 lit_program_offset = 0; u32 delay_program_offset = 0; }; diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index 998cd545..b92941d7 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -635,24 +635,6 @@ u64a literalMinReportOffset(const RoseBuildImpl &build, return lit_min_offset; } -static -map makeFragGroupMap(const RoseBuildImpl &build) { - map frag_to_group; - - for (const auto &m : build.final_to_frag_map) { - u32 final_id = m.first; - u32 frag_id = m.second.fragment_id; - hwlm_group_t groups = 0; - const auto &lits = build.final_id_to_literal.at(final_id); - for (auto lit_id : lits) { - groups |= build.literal_info[lit_id].group_mask; - } - frag_to_group[frag_id] |= groups; - } - - return frag_to_group; -} - template void trim_to_suffix(Container &c, size_t len) { if (c.size() <= len) { @@ -751,16 +733,13 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, cmp); } - auto frag_group_map = makeFragGroupMap(build); - for (auto &lit : mp.lits) { u32 final_id = lit.id; assert(contains(build.final_to_frag_map, final_id)); const auto &frag = build.final_to_frag_map.at(final_id); lit.id = delay_rebuild ? frag.delay_program_offset : frag.lit_program_offset; - assert(contains(frag_group_map, frag.fragment_id)); - lit.groups = frag_group_map.at(frag.fragment_id); + lit.groups = frag.groups; } sort_and_unique(mp.lits); From a83b7cb3488fe424d7e04eb8a05df9f19b2342f7 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 31 Jan 2017 13:11:04 +1100 Subject: [PATCH 065/326] move final_id_to_literal into build_context --- src/rose/rose_build_bytecode.cpp | 92 +++++++++++++++++--------------- src/rose/rose_build_impl.h | 3 -- 2 files changed, 50 insertions(+), 45 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 82e1945f..253d76ce 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -243,6 +243,9 @@ struct build_context : boost::noncopyable { /** \brief Global bitmap of groups that can be squashed. */ rose_group squashable_groups = 0; + + /** \brief Mapping from final ID to the set of literals it is used for. */ + map> final_id_to_literal; }; /** \brief subengine info including built engine and @@ -2500,10 +2503,11 @@ void fillInSomRevNfas(RoseEngine *engine, const SomSlotManager &ssm, static vector -getLiteralInfoByFinalId(const RoseBuildImpl &build, u32 final_id) { +getLiteralInfoByFinalId(const RoseBuildImpl &build, const build_context &bc, + u32 final_id) { vector out; - const auto &final_id_to_literal = build.final_id_to_literal; + const auto &final_id_to_literal = bc.final_id_to_literal; assert(contains(final_id_to_literal, final_id)); const auto &lits = final_id_to_literal.find(final_id)->second; @@ -4154,9 +4158,10 @@ void addPredBlocks(build_context &bc, map &pred_blocks, } static -void makePushDelayedInstructions(const RoseBuildImpl &build, u32 final_id, +void makePushDelayedInstructions(const RoseBuildImpl &build, + const build_context &bc, u32 final_id, RoseProgram &program) { - const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); + const auto &lit_infos = getLiteralInfoByFinalId(build, bc, final_id); const auto &arb_lit_info = **lit_infos.begin(); if (arb_lit_info.delayed_ids.empty()) { return; @@ -4177,9 +4182,10 @@ void makePushDelayedInstructions(const RoseBuildImpl &build, u32 final_id, } static -rose_group getFinalIdGroupsUnion(const RoseBuildImpl &build, u32 final_id) { - assert(contains(build.final_id_to_literal, final_id)); - const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); +rose_group getFinalIdGroupsUnion(const RoseBuildImpl &build, + const build_context &bc, u32 final_id) { + assert(contains(bc.final_id_to_literal, final_id)); + const auto &lit_infos = getLiteralInfoByFinalId(build, bc, final_id); rose_group groups = 0; for (const auto &li : lit_infos) { @@ -4189,9 +4195,10 @@ rose_group getFinalIdGroupsUnion(const RoseBuildImpl &build, u32 final_id) { } static -void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 final_id, +void makeGroupCheckInstruction(const RoseBuildImpl &build, + const build_context &bc, u32 final_id, RoseProgram &program) { - rose_group groups = getFinalIdGroupsUnion(build, final_id); + rose_group groups = getFinalIdGroupsUnion(build, bc, final_id); if (!groups) { return; } @@ -4201,8 +4208,7 @@ void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 final_id, static void makeCheckLitMaskInstruction(const RoseBuildImpl &build, build_context &bc, u32 final_id, RoseProgram &program) { - assert(contains(build.final_id_to_literal, final_id)); - const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); + const auto &lit_infos = getLiteralInfoByFinalId(build, bc, final_id); assert(!lit_infos.empty()); if (!lit_infos.front()->requires_benefits) { @@ -4211,8 +4217,8 @@ void makeCheckLitMaskInstruction(const RoseBuildImpl &build, build_context &bc, vector look; - assert(build.final_id_to_literal.at(final_id).size() == 1); - u32 lit_id = *build.final_id_to_literal.at(final_id).begin(); + assert(bc.final_id_to_literal.at(final_id).size() == 1); + u32 lit_id = *bc.final_id_to_literal.at(final_id).begin(); const ue2_literal &s = build.literals.right.at(lit_id).s; DEBUG_PRINTF("building mask for lit %u (final id %u) %s\n", lit_id, final_id, dumpString(s).c_str()); @@ -4230,16 +4236,17 @@ void makeCheckLitMaskInstruction(const RoseBuildImpl &build, build_context &bc, } static -void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 final_id, +void makeGroupSquashInstruction(const RoseBuildImpl &build, + const build_context &bc, u32 final_id, RoseProgram &program) { - assert(contains(build.final_id_to_literal, final_id)); - const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); + assert(contains(bc.final_id_to_literal, final_id)); + const auto &lit_infos = getLiteralInfoByFinalId(build, bc, final_id); if (!lit_infos.front()->squash_group) { return; } - rose_group groups = getFinalIdGroupsUnion(build, final_id); + rose_group groups = getFinalIdGroupsUnion(build, bc, final_id); if (!groups) { return; } @@ -4266,8 +4273,8 @@ static void makeRecordAnchoredInstruction(const RoseBuildImpl &build, build_context &bc, u32 final_id, RoseProgram &program) { - assert(contains(build.final_id_to_literal, final_id)); - const auto &lit_ids = build.final_id_to_literal.at(final_id); + assert(contains(bc.final_id_to_literal, final_id)); + const auto &lit_ids = bc.final_id_to_literal.at(final_id); // Must be anchored. assert(!lit_ids.empty()); @@ -4321,7 +4328,7 @@ void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc, return; } - const auto &lit_ids = build.final_id_to_literal.at(final_id); + const auto &lit_ids = bc.final_id_to_literal.at(final_id); if (lit_ids.empty()) { return; } @@ -4367,7 +4374,7 @@ void makeCheckLiteralInstruction(const RoseBuildImpl &build, DEBUG_PRINTF("final_id %u, long lit threshold %zu\n", final_id, bc.longLitLengthThreshold); - const auto &lits = build.final_id_to_literal.at(final_id); + const auto &lits = bc.final_id_to_literal.at(final_id); if (lits.size() != 1) { // final_id sharing is only allowed for literals that are short enough // to not require any additional confirm work. @@ -4464,11 +4471,11 @@ RoseProgram buildLitInitialProgram(RoseBuildImpl &build, build_context &bc, // delayed literals, as their groups may be switched off; ordinarily, we // can trust the HWLM matcher. if (hasDelayedLiteral(build, lit_edges)) { - makeGroupCheckInstruction(build, final_id, program); + makeGroupCheckInstruction(build, bc, final_id, program); } // Add instructions for pushing delayed matches, if there are any. - makePushDelayedInstructions(build, final_id, program); + makePushDelayedInstructions(build, bc, final_id, program); // Add pre-check for early literals in the floating table. makeCheckLitEarlyInstruction(build, bc, final_id, lit_edges, program); @@ -4521,7 +4528,7 @@ RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, RoseProgram root_block; // Literal may squash groups. - makeGroupSquashInstruction(build, final_id, root_block); + makeGroupSquashInstruction(build, bc, final_id, root_block); // Literal may be anchored and need to be recorded. makeRecordAnchoredInstruction(build, bc, final_id, root_block); @@ -4581,7 +4588,7 @@ u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc, RoseProgram program; for (const auto &final_id : final_ids) { - const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); + const auto &lit_infos = getLiteralInfoByFinalId(build, bc, final_id); const auto &arb_lit_info = **lit_infos.begin(); if (arb_lit_info.delayed_ids.empty()) { continue; // No delayed IDs, no work to do. @@ -4590,7 +4597,7 @@ u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc, RoseProgram prog; makeCheckLiteralInstruction(build, bc, final_id, prog); makeCheckLitMaskInstruction(build, bc, final_id, prog); - makePushDelayedInstructions(build, final_id, prog); + makePushDelayedInstructions(build, bc, final_id, prog); program.add_block(move(prog)); } @@ -4650,7 +4657,7 @@ rose_literal_id getFragment(const rose_literal_id &lit) { } static -rose_group getGroups(const RoseBuildImpl &build, const set &lit_ids) { +rose_group getGroups(const RoseBuildImpl &build, const flat_set &lit_ids) { rose_group groups = 0; for (auto lit_id : lit_ids) { auto &info = build.literal_info.at(lit_id); @@ -4660,7 +4667,8 @@ rose_group getGroups(const RoseBuildImpl &build, const set &lit_ids) { } static -map groupByFragment(const RoseBuildImpl &build) { +map groupByFragment(const RoseBuildImpl &build, + const build_context &bc) { u32 frag_id = 0; map final_to_frag; @@ -4671,7 +4679,7 @@ map groupByFragment(const RoseBuildImpl &build) { map frag_info; - for (const auto &m : build.final_id_to_literal) { + for (const auto &m : bc.final_id_to_literal) { u32 final_id = m.first; const auto &lit_ids = m.second; assert(!lit_ids.empty()); @@ -4762,7 +4770,7 @@ u32 buildDelayPrograms(RoseBuildImpl &build, build_context &bc) { vector programs; for (u32 final_id = build.delay_base_id; - final_id < build.final_id_to_literal.size(); final_id++) { + final_id < bc.final_id_to_literal.size(); final_id++) { u32 offset = writeLiteralProgram(build, bc, {final_id}, lit_edge_map); programs.push_back(offset); } @@ -5143,11 +5151,11 @@ u32 buildEagerQueueIter(const set &eager, u32 leftfixBeginQueue, } static -void allocateFinalIdToSet(RoseBuildImpl &build, const set &lits, - u32 *next_final_id) { +void allocateFinalIdToSet(RoseBuildImpl &build, build_context &bc, + const set &lits, u32 *next_final_id) { const auto &g = build.g; auto &literal_info = build.literal_info; - auto &final_id_to_literal = build.final_id_to_literal; + auto &final_id_to_literal = bc.final_id_to_literal; /* We can allocate the same final id to multiple literals of the same type * if they share the same vertex set and trigger the same delayed literal @@ -5262,13 +5270,13 @@ bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) { /** \brief Allocate final literal IDs for all literals. */ static -void allocateFinalLiteralId(RoseBuildImpl &build) { +void allocateFinalLiteralId(RoseBuildImpl &build, build_context &bc) { set anch; set norm; set delay; /* undelayed ids come first */ - assert(build.final_id_to_literal.empty()); + assert(bc.final_id_to_literal.empty()); u32 next_final_id = 0; for (u32 i = 0; i < build.literal_info.size(); i++) { assert(!build.hasFinalId(i)); @@ -5296,15 +5304,15 @@ void allocateFinalLiteralId(RoseBuildImpl &build) { } /* normal lits */ - allocateFinalIdToSet(build, norm, &next_final_id); + allocateFinalIdToSet(build, bc, norm, &next_final_id); /* next anchored stuff */ build.anchored_base_id = next_final_id; - allocateFinalIdToSet(build, anch, &next_final_id); + allocateFinalIdToSet(build, bc, anch, &next_final_id); /* delayed ids come last */ build.delay_base_id = next_final_id; - allocateFinalIdToSet(build, delay, &next_final_id); + allocateFinalIdToSet(build, bc, delay, &next_final_id); } static @@ -5418,12 +5426,12 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { historyRequired); DEBUG_PRINTF("longLitLengthThreshold=%zu\n", longLitLengthThreshold); - allocateFinalLiteralId(*this); - final_to_frag_map = groupByFragment(*this); + build_context bc; + allocateFinalLiteralId(*this, bc); + final_to_frag_map = groupByFragment(*this, bc); auto anchored_dfas = buildAnchoredDfas(*this); - build_context bc; bc.floatingMinLiteralMatchOffset = findMinFloatingLiteralMatch(*this, anchored_dfas); bc.longLitLengthThreshold = longLitLengthThreshold; @@ -5710,7 +5718,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->lastByteHistoryIterOffset = lastByteOffset; engine->delay_count = - verify_u32(final_id_to_literal.size() - delay_base_id); + verify_u32(bc.final_id_to_literal.size() - delay_base_id); engine->delay_fatbit_size = fatbit_size(engine->delay_count); engine->delay_base_id = delay_base_id; engine->anchored_base_id = anchored_base_id; diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 0c881f64..9eba29f0 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -593,9 +593,6 @@ public: * overlap calculation in history assignment. */ std::map anchoredLitSuffix; - std::map > final_id_to_literal; /* final literal id to - * literal id */ - std::map final_to_frag_map; unordered_set transient; From 7bdb32720367f7d229ed7009f0cc61d1bffcd99a Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 31 Jan 2017 14:28:56 +1100 Subject: [PATCH 066/326] rose: use final_ids less in program construction --- src/rose/rose_build_bytecode.cpp | 119 +++++++++++++------------------ 1 file changed, 49 insertions(+), 70 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 253d76ce..197ceb4a 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2501,27 +2501,6 @@ void fillInSomRevNfas(RoseEngine *engine, const SomSlotManager &ssm, } } -static -vector -getLiteralInfoByFinalId(const RoseBuildImpl &build, const build_context &bc, - u32 final_id) { - vector out; - - const auto &final_id_to_literal = bc.final_id_to_literal; - assert(contains(final_id_to_literal, final_id)); - - const auto &lits = final_id_to_literal.find(final_id)->second; - assert(!lits.empty()); - - for (const auto &lit_id : lits) { - const rose_literal_info &li = build.literal_info[lit_id]; - assert(li.final_id == final_id); - out.push_back(&li); - } - - return out; -} - static void applyFinalSpecialisation(RoseProgram &program) { assert(!program.empty()); @@ -4159,10 +4138,10 @@ void addPredBlocks(build_context &bc, map &pred_blocks, static void makePushDelayedInstructions(const RoseBuildImpl &build, - const build_context &bc, u32 final_id, + const flat_set &lit_ids, RoseProgram &program) { - const auto &lit_infos = getLiteralInfoByFinalId(build, bc, final_id); - const auto &arb_lit_info = **lit_infos.begin(); + assert(!lit_ids.empty()); + const auto &arb_lit_info = build.literal_info.at(*lit_ids.begin()); if (arb_lit_info.delayed_ids.empty()) { return; } @@ -4172,8 +4151,7 @@ void makePushDelayedInstructions(const RoseBuildImpl &build, u32 child_id = build.literal_info[int_id].final_id; u32 delay_index = child_id - build.delay_base_id; - DEBUG_PRINTF("final_id=%u delay=%u child_id=%u\n", final_id, - child_literal.delay, child_id); + DEBUG_PRINTF("delay=%u child_id=%u\n", child_literal.delay, child_id); auto ri = make_unique( verify_u8(child_literal.delay), delay_index); @@ -4182,23 +4160,21 @@ void makePushDelayedInstructions(const RoseBuildImpl &build, } static -rose_group getFinalIdGroupsUnion(const RoseBuildImpl &build, - const build_context &bc, u32 final_id) { - assert(contains(bc.final_id_to_literal, final_id)); - const auto &lit_infos = getLiteralInfoByFinalId(build, bc, final_id); - +rose_group getLitGroupsUnion(const RoseBuildImpl &build, + const flat_set &lit_ids) { rose_group groups = 0; - for (const auto &li : lit_infos) { - groups |= li->group_mask; + for (auto lit_id : lit_ids) { + const auto &info = build.literal_info.at(lit_id); + groups |= info.group_mask; } return groups; } static void makeGroupCheckInstruction(const RoseBuildImpl &build, - const build_context &bc, u32 final_id, + const flat_set &lit_ids, RoseProgram &program) { - rose_group groups = getFinalIdGroupsUnion(build, bc, final_id); + rose_group groups = getLitGroupsUnion(build, lit_ids); if (!groups) { return; } @@ -4207,21 +4183,20 @@ void makeGroupCheckInstruction(const RoseBuildImpl &build, static void makeCheckLitMaskInstruction(const RoseBuildImpl &build, build_context &bc, - u32 final_id, RoseProgram &program) { - const auto &lit_infos = getLiteralInfoByFinalId(build, bc, final_id); - assert(!lit_infos.empty()); - - if (!lit_infos.front()->requires_benefits) { + const flat_set &lit_ids, + RoseProgram &program) { + const auto &lit_info = build.literal_info.at(*lit_ids.begin()); + if (!lit_info.requires_benefits) { return; } vector look; - assert(bc.final_id_to_literal.at(final_id).size() == 1); - u32 lit_id = *bc.final_id_to_literal.at(final_id).begin(); + assert(lit_ids.size() == 1); + u32 lit_id = *lit_ids.begin(); const ue2_literal &s = build.literals.right.at(lit_id).s; - DEBUG_PRINTF("building mask for lit %u (final id %u) %s\n", lit_id, - final_id, dumpString(s).c_str()); + DEBUG_PRINTF("building mask for lit %u: %s\n", lit_id, + dumpString(s).c_str()); assert(s.length() <= MAX_MASK2_WIDTH); s32 i = 0 - s.length(); for (const auto &e : s) { @@ -4237,21 +4212,21 @@ void makeCheckLitMaskInstruction(const RoseBuildImpl &build, build_context &bc, static void makeGroupSquashInstruction(const RoseBuildImpl &build, - const build_context &bc, u32 final_id, + const flat_set &lit_ids, RoseProgram &program) { - assert(contains(bc.final_id_to_literal, final_id)); - const auto &lit_infos = getLiteralInfoByFinalId(build, bc, final_id); - - if (!lit_infos.front()->squash_group) { + assert(!lit_ids.empty()); + const u32 lit_id = *lit_ids.begin(); + const auto &info = build.literal_info[lit_id]; + if (!info.squash_group) { return; } - rose_group groups = getFinalIdGroupsUnion(build, bc, final_id); + rose_group groups = getLitGroupsUnion(build, lit_ids); if (!groups) { return; } - DEBUG_PRINTF("final_id %u squashes 0x%llx\n", final_id, groups); + DEBUG_PRINTF("squashes 0x%llx\n", groups); program.add_before_end( make_unique(~groups)); // Note negated. } @@ -4312,7 +4287,7 @@ u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id) { static void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc, - u32 final_id, + const flat_set &lit_ids, const vector &lit_edges, RoseProgram &program) { if (lit_edges.empty()) { @@ -4328,7 +4303,6 @@ void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc, return; } - const auto &lit_ids = bc.final_id_to_literal.at(final_id); if (lit_ids.empty()) { return; } @@ -4345,8 +4319,8 @@ void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc, min_offset = min(min_offset, lit_min_offset); } - DEBUG_PRINTF("final_id=%u has min_len=%zu, min_offset=%u, " - "global min is %u\n", final_id, min_len, min_offset, + DEBUG_PRINTF("has min_len=%zu, min_offset=%u, " + "global min is %u\n", min_len, min_offset, bc.floatingMinLiteralMatchOffset); // If we can't match before the min offset, we don't need the check. @@ -4367,14 +4341,14 @@ void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc, static void makeCheckLiteralInstruction(const RoseBuildImpl &build, - const build_context &bc, u32 final_id, + const build_context &bc, + const flat_set &lits, RoseProgram &program) { assert(bc.longLitLengthThreshold > 0); - DEBUG_PRINTF("final_id %u, long lit threshold %zu\n", final_id, - bc.longLitLengthThreshold); + DEBUG_PRINTF("lits [%s], long lit threshold %zu\n", + as_string_list(lits).c_str(), bc.longLitLengthThreshold); - const auto &lits = bc.final_id_to_literal.at(final_id); if (lits.size() != 1) { // final_id sharing is only allowed for literals that are short enough // to not require any additional confirm work. @@ -4461,24 +4435,26 @@ RoseProgram buildLitInitialProgram(RoseBuildImpl &build, build_context &bc, DEBUG_PRINTF("final_id %u\n", final_id); + const auto &lit_ids = bc.final_id_to_literal.at(final_id); + // Check long literal info. - makeCheckLiteralInstruction(build, bc, final_id, program); + makeCheckLiteralInstruction(build, bc, lit_ids, program); // Check lit mask. - makeCheckLitMaskInstruction(build, bc, final_id, program); + makeCheckLitMaskInstruction(build, bc, lit_ids, program); // Check literal groups. This is an optimisation that we only perform for // delayed literals, as their groups may be switched off; ordinarily, we // can trust the HWLM matcher. if (hasDelayedLiteral(build, lit_edges)) { - makeGroupCheckInstruction(build, bc, final_id, program); + makeGroupCheckInstruction(build, lit_ids, program); } // Add instructions for pushing delayed matches, if there are any. - makePushDelayedInstructions(build, bc, final_id, program); + makePushDelayedInstructions(build, lit_ids, program); // Add pre-check for early literals in the floating table. - makeCheckLitEarlyInstruction(build, bc, final_id, lit_edges, program); + makeCheckLitEarlyInstruction(build, bc, lit_ids, lit_edges, program); return program; } @@ -4525,10 +4501,11 @@ RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, } if (final_id != MO_INVALID_IDX) { + const auto &lit_ids = bc.final_id_to_literal.at(final_id); RoseProgram root_block; // Literal may squash groups. - makeGroupSquashInstruction(build, bc, final_id, root_block); + makeGroupSquashInstruction(build, lit_ids, root_block); // Literal may be anchored and need to be recorded. makeRecordAnchoredInstruction(build, bc, final_id, root_block); @@ -4588,16 +4565,18 @@ u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc, RoseProgram program; for (const auto &final_id : final_ids) { - const auto &lit_infos = getLiteralInfoByFinalId(build, bc, final_id); - const auto &arb_lit_info = **lit_infos.begin(); + const auto &lit_ids = bc.final_id_to_literal.at(final_id); + assert(!lit_ids.empty()); + + const auto &arb_lit_info = build.literal_info.at(*lit_ids.begin()); if (arb_lit_info.delayed_ids.empty()) { continue; // No delayed IDs, no work to do. } RoseProgram prog; - makeCheckLiteralInstruction(build, bc, final_id, prog); - makeCheckLitMaskInstruction(build, bc, final_id, prog); - makePushDelayedInstructions(build, bc, final_id, prog); + makeCheckLiteralInstruction(build, bc, lit_ids, prog); + makeCheckLitMaskInstruction(build, bc, lit_ids, prog); + makePushDelayedInstructions(build, lit_ids, prog); program.add_block(move(prog)); } From ebe12797e66973e6ca66934478f12012046a98a1 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 1 Feb 2017 10:37:15 +1100 Subject: [PATCH 067/326] assert program offsets are non-zero --- src/rose/match.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/rose/match.c b/src/rose/match.c index c7c73d25..488ee5d5 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -85,7 +85,7 @@ hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, DEBUG_PRINTF("STATE groups=0x%016llx\n", tctx->groups); - assert(id < t->size); // id is a program offset + assert(id && id < t->size); // id is a program offset const u64a som = 0; const size_t match_len = end - start + 1; const u8 flags = 0; @@ -236,10 +236,10 @@ int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx) { */ static really_inline hwlmcb_rv_t roseProcessMatchInline(const struct RoseEngine *t, - struct hs_scratch *scratch, u64a end, - size_t match_len, u32 id) { + struct hs_scratch *scratch, u64a end, + size_t match_len, u32 id) { DEBUG_PRINTF("id=%u\n", id); - assert(id < t->size); // id is an offset into bytecode + assert(id && id < t->size); // id is an offset into bytecode const u64a som = 0; const u8 flags = 0; return roseRunProgram_i(t, scratch, id, som, end, match_len, flags); From a4af801dd12bb6ea6ac43b3abd859170eb8e13f1 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 1 Feb 2017 10:41:32 +1100 Subject: [PATCH 068/326] rose: define invalid value for program offset --- src/rose/program_runtime.h | 2 ++ src/rose/rose_build_impl.h | 5 +++-- src/rose/rose_common.h | 3 +++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 8f4c528d..7172f6aa 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -41,6 +41,7 @@ #include "miracle.h" #include "report.h" #include "rose.h" +#include "rose_common.h" #include "rose_internal.h" #include "rose_program.h" #include "rose_types.h" @@ -1501,6 +1502,7 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, DEBUG_PRINTF("program=%u, offsets [%llu,%llu], flags=%u\n", programOffset, som, end, prog_flags); + assert(programOffset != ROSE_INVALID_PROG_OFFSET); assert(programOffset >= sizeof(struct RoseEngine)); assert(programOffset < t->size); diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 9eba29f0..8748a08c 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -31,6 +31,7 @@ #include "rose_build.h" #include "rose_build_util.h" +#include "rose_common.h" #include "rose_graph.h" #include "nfa/mpvcompile.h" #include "nfa/goughcompile.h" @@ -441,8 +442,8 @@ struct LitFragment { : fragment_id(fragment_id_in), groups(groups_in) {} u32 fragment_id; rose_group groups; - u32 lit_program_offset = 0; - u32 delay_program_offset = 0; + u32 lit_program_offset = ROSE_INVALID_PROG_OFFSET; + u32 delay_program_offset = ROSE_INVALID_PROG_OFFSET; }; // Concrete impl class diff --git a/src/rose/rose_common.h b/src/rose/rose_common.h index 3249f0b8..c0250aa5 100644 --- a/src/rose/rose_common.h +++ b/src/rose/rose_common.h @@ -41,4 +41,7 @@ /** \brief Length in bytes of a reach bitvector, used by the lookaround code. */ #define REACH_BITVECTOR_LEN 32 +/** \brief Value used to represent an invalid Rose program offset. */ +#define ROSE_INVALID_PROG_OFFSET 0 + #endif // ROSE_COMMON_H From 8ee8f5f236b0c4209ff123cfe07b293dff164b10 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 1 Feb 2017 10:50:44 +1100 Subject: [PATCH 069/326] safety assertions for delayed rebuild --- src/rose/rose_build_matchers.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index b92941d7..cd88c980 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -650,6 +650,11 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, size_t max_len, u32 max_offset) { MatcherProto mp; + if (delay_rebuild) { + assert(table == ROSE_FLOATING); + assert(build.cc.streaming); + } + for (const auto &e : build.literals.right) { const u32 id = e.first; if (!build.hasFinalId(id)) { From c4bfe0449c10ff71cb94ac373ce9c70c10167161 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 1 Feb 2017 14:47:05 +1100 Subject: [PATCH 070/326] lookaround: reduce compile time for trimLiterals --- src/rose/rose_build_lookaround.cpp | 61 ++++++++++++++++++++++-------- 1 file changed, 45 insertions(+), 16 deletions(-) diff --git a/src/rose/rose_build_lookaround.cpp b/src/rose/rose_build_lookaround.cpp index 10bd59de..ae990f7f 100644 --- a/src/rose/rose_build_lookaround.cpp +++ b/src/rose/rose_build_lookaround.cpp @@ -460,13 +460,24 @@ void findFloodReach(const RoseBuildImpl &tbi, const RoseVertex v, } } -static -map findLiteralReach(const rose_literal_id &lit) { - map look; - u32 i = lit.delay + 1; - for (auto it = lit.s.rbegin(), ite = lit.s.rend(); it != ite; ++it) { - look[0 - i] |= *it; +namespace { +struct LookProto { + LookProto(s32 offset_in, CharReach reach_in) + : offset(offset_in), reach(move(reach_in)) {} + s32 offset; + CharReach reach; +}; +} + +static +vector findLiteralReach(const rose_literal_id &lit) { + vector look; + look.reserve(lit.s.length()); + + s32 i = 0 - lit.s.length() - lit.delay; + for (const auto &c : lit.s) { + look.emplace_back(i, c); i++; } @@ -478,22 +489,40 @@ map findLiteralReach(const RoseBuildImpl &build, const RoseVertex v) { bool first = true; map look; + for (u32 lit_id : build.g[v].literals) { const rose_literal_id &lit = build.literals.right.at(lit_id); auto lit_look = findLiteralReach(lit); if (first) { - look = move(lit_look); + for (auto &p : lit_look) { + look.emplace(p.offset, p.reach); + } first = false; - } else { - for (auto it = look.begin(); it != look.end();) { - auto l_it = lit_look.find(it->first); - if (l_it == lit_look.end()) { - it = look.erase(it); - } else { - it->second |= l_it->second; - ++it; - } + continue; + } + + // Erase elements from look with keys not in lit_look. Where a key is + // in both maps, union its reach with the lookaround. + auto jt = begin(lit_look); + for (auto it = begin(look); it != end(look);) { + if (jt == end(lit_look)) { + // No further lit_look entries, erase remaining elements from + // look. + look.erase(it, end(look)); + break; + } + if (it->first < jt->offset) { + // Offset is present in look but not in lit_look, erase. + it = look.erase(it); + } else if (it->first > jt->offset) { + // Offset is preset in lit_look but not in look, ignore. + ++jt; + } else { + // Offset is present in both, union its reach with look. + it->second |= jt->reach; + ++it; + ++jt; } } } From 7acd8dcac847ea4a80236d58c2890a6db214e66e Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 2 Feb 2017 10:26:11 +1100 Subject: [PATCH 071/326] dead code: remove splitOffAnchoredLeadingLiteral --- src/nfagraph/ng_literal_analysis.cpp | 8 +------- src/nfagraph/ng_literal_analysis.h | 5 +---- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/src/nfagraph/ng_literal_analysis.cpp b/src/nfagraph/ng_literal_analysis.cpp index a5f3468b..88e052db 100644 --- a/src/nfagraph/ng_literal_analysis.cpp +++ b/src/nfagraph/ng_literal_analysis.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -853,12 +853,6 @@ bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out, return splitOffLeadingLiteral_i(g, false, lit_out, rhs); } -bool splitOffAnchoredLeadingLiteral(const NGHolder &g, ue2_literal *lit_out, - NGHolder *rhs) { - return splitOffLeadingLiteral_i(g, true, lit_out, rhs); -} - - bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out) { if (in_degree(g.acceptEod, g) != 1) { return false; diff --git a/src/nfagraph/ng_literal_analysis.h b/src/nfagraph/ng_literal_analysis.h index 6fd9c525..6bb87556 100644 --- a/src/nfagraph/ng_literal_analysis.h +++ b/src/nfagraph/ng_literal_analysis.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -87,9 +87,6 @@ u64a sanitizeAndCompressAndScore(std::set &s); bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out, NGHolder *rhs); -bool splitOffAnchoredLeadingLiteral(const NGHolder &g, ue2_literal *lit_out, - NGHolder *rhs); - bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out); /** \brief Returns true if the given literal is the only thing in the graph, From d48a11cbbddac1eb8ec41cad04a6f102964921b4 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 2 Feb 2017 10:33:20 +1100 Subject: [PATCH 072/326] ng_literal_analysis: refactor splitOffLeadingLiteral --- src/nfagraph/ng_literal_analysis.cpp | 73 ++++++++-------------------- 1 file changed, 21 insertions(+), 52 deletions(-) diff --git a/src/nfagraph/ng_literal_analysis.cpp b/src/nfagraph/ng_literal_analysis.cpp index 88e052db..a6664b07 100644 --- a/src/nfagraph/ng_literal_analysis.cpp +++ b/src/nfagraph/ng_literal_analysis.cpp @@ -734,55 +734,30 @@ vector scoreEdges(const NGHolder &g, const flat_set &known_bad) { return scores; } -static -bool splitOffLeadingLiteral_i(const NGHolder &g, bool anch, - ue2_literal *lit_out, - NGHolder *rhs) { - NFAVertex u; - NFAVertex v; +bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out, + NGHolder *rhs) { + DEBUG_PRINTF("looking for leading floating literal\n"); + set s_succ; + insert(&s_succ, adjacent_vertices(g.start, g)); - if (!anch) { - DEBUG_PRINTF("looking for leading floating literal\n"); - set s_succ; - insert(&s_succ, adjacent_vertices(g.start, g)); + set sds_succ; + insert(&sds_succ, adjacent_vertices(g.startDs, g)); - set sds_succ; - insert(&sds_succ, adjacent_vertices(g.startDs, g)); - - bool floating = is_subset_of(s_succ, sds_succ); - if (!floating) { - DEBUG_PRINTF("not floating\n"); - return false; - } - - sds_succ.erase(g.startDs); - if (sds_succ.size() != 1) { - DEBUG_PRINTF("branchy root\n"); - return false; - } - - u = g.startDs; - v = *sds_succ.begin(); - } else { - DEBUG_PRINTF("looking for leading anchored literal\n"); - - if (proper_out_degree(g.startDs, g)) { - DEBUG_PRINTF("not anchored\n"); - return false; - } - - set s_succ; - insert(&s_succ, adjacent_vertices(g.start, g)); - s_succ.erase(g.startDs); - if (s_succ.size() != 1) { - DEBUG_PRINTF("branchy root\n"); - return false; - } - - u = g.start; - v = *s_succ.begin(); + bool floating = is_subset_of(s_succ, sds_succ); + if (!floating) { + DEBUG_PRINTF("not floating\n"); + return false; } + sds_succ.erase(g.startDs); + if (sds_succ.size() != 1) { + DEBUG_PRINTF("branchy root\n"); + return false; + } + + NFAVertex u = g.startDs; + NFAVertex v = *sds_succ.begin(); + while (true) { DEBUG_PRINTF("validating vertex %zu\n", g[v].index); @@ -838,8 +813,7 @@ bool splitOffLeadingLiteral_i(const NGHolder &g, bool anch, assert(u != g.startDs); ue2::unordered_map rhs_map; - vector pivots; - insert(&pivots, pivots.end(), adjacent_vertices(u, g)); + vector pivots = make_vector_from(adjacent_vertices(u, g)); splitRHS(g, pivots, rhs, &rhs_map); DEBUG_PRINTF("literal is '%s' (len %zu)\n", dumpString(*lit_out).c_str(), @@ -848,11 +822,6 @@ bool splitOffLeadingLiteral_i(const NGHolder &g, bool anch, return true; } -bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out, - NGHolder *rhs) { - return splitOffLeadingLiteral_i(g, false, lit_out, rhs); -} - bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out) { if (in_degree(g.acceptEod, g) != 1) { return false; From d43e9d838f38f561072984c2365cce788939b751 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 2 Feb 2017 10:35:40 +1100 Subject: [PATCH 073/326] rose: delete dead code for cloneVertex --- src/rose/rose_build_impl.h | 2 -- src/rose/rose_build_misc.cpp | 11 ----------- 2 files changed, 13 deletions(-) diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 8748a08c..dd0752f6 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -509,8 +509,6 @@ public: // Is the Rose anchored? bool hasNoFloatingRoots() const; - RoseVertex cloneVertex(RoseVertex v); - u32 calcHistoryRequired() const; rose_group getInitialGroups() const; diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 142bf138..549cc4f1 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -866,17 +866,6 @@ bool operator<(const RoseEdgeProps &a, const RoseEdgeProps &b) { return false; } -// Note: only clones the vertex, you'll have to wire up your own edges. -RoseVertex RoseBuildImpl::cloneVertex(RoseVertex v) { - RoseVertex v2 = add_vertex(g[v], g); - - for (const auto &lit_id : g[v2].literals) { - literal_info[lit_id].vertices.insert(v2); - } - - return v2; -} - #ifndef NDEBUG bool roseHasTops(const RoseBuildImpl &build, RoseVertex v) { const RoseGraph &g = build.g; From 1875d55cf1d047da0a74b2df8311225ace604dcd Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 12 Jan 2017 12:35:54 +1100 Subject: [PATCH 074/326] parser: add initial parser for control verbs This more reliably handles control verbs like (*UTF8) that can only happen at the start of the pattern, and allows them in any ordering. --- CMakeLists.txt | 9 +++ src/parser/Parser.h | 4 +- src/parser/Parser.rl | 43 ++++++------ src/parser/control_verbs.h | 46 ++++++++++++ src/parser/control_verbs.rl | 121 ++++++++++++++++++++++++++++++++ unit/hyperscan/bad_patterns.txt | 9 ++- 6 files changed, 208 insertions(+), 24 deletions(-) create mode 100644 src/parser/control_verbs.h create mode 100644 src/parser/control_verbs.rl diff --git a/CMakeLists.txt b/CMakeLists.txt index 27d3e02b..4ec1f9e6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -459,6 +459,13 @@ set_source_files_properties( ragelmaker(src/parser/Parser.rl) +set_source_files_properties( + ${CMAKE_BINARY_DIR}/src/parser/control_verbs.cpp + PROPERTIES + COMPILE_FLAGS "${RAGEL_C_FLAGS}") + +ragelmaker(src/parser/control_verbs.rl) + SET(hs_HEADERS src/hs.h src/hs_common.h @@ -891,6 +898,8 @@ SET (hs_SRCS src/parser/buildstate.h src/parser/check_refs.cpp src/parser/check_refs.h + src/parser/control_verbs.cpp + src/parser/control_verbs.h src/parser/parse_error.cpp src/parser/parse_error.h src/parser/parser_util.cpp diff --git a/src/parser/Parser.h b/src/parser/Parser.h index 45c3ac7a..a034a18f 100644 --- a/src/parser/Parser.h +++ b/src/parser/Parser.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -69,7 +69,7 @@ struct ParseMode { * * This call will throw a ParseError on failure. */ -std::unique_ptr parse(const char *const ptr, ParseMode &mode); +std::unique_ptr parse(const char *ptr, ParseMode &mode); } // namespace ue2 diff --git a/src/parser/Parser.rl b/src/parser/Parser.rl index 53130ddf..dfa0beda 100644 --- a/src/parser/Parser.rl +++ b/src/parser/Parser.rl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,6 +34,7 @@ /* Parser.cpp is a built source, may not be in same dir as parser files */ #include "parser/check_refs.h" +#include "parser/control_verbs.h" #include "parser/ComponentAlternation.h" #include "parser/ComponentAssertion.h" #include "parser/ComponentAtomicGroup.h" @@ -549,27 +550,23 @@ unichar readUtf8CodePoint4c(const u8 *ts) { ############################################################# readVerb := |* 'UTF8)' => { - if (ts != ptr + 2) { - throw LocatedParseError("(*UTF8) must be at start of " - "expression, encountered"); - } - mode.utf8 = true; - globalMode.utf8 = true; /* once you unicode, you can't stop */ - ucp_start_p = te; /* (*UCP) can appear after us */ - fret; + throw LocatedParseError("(*UTF8) must be at start of " + "expression, encountered"); + }; + 'UTF)' => { + throw LocatedParseError("(*UTF) must be at start of " + "expression, encountered"); }; 'UCP)' => { - if (ts != ucp_start_p + 2) { - throw LocatedParseError("(*UCP) must be at start of " - "expression, encountered"); - } - mode.ucp = true; - globalMode.ucp = true; /* once you unicode, you can't stop */ - fret; + throw LocatedParseError("(*UCP) must be at start of " + "expression, encountered"); }; 'UTF16)' => { throw LocatedParseError("(*UTF16) not supported"); }; + 'UTF32)' => { + throw LocatedParseError("(*UTF32) not supported"); + }; any => { throw LocatedParseError("Unknown control verb"); }; @@ -1834,10 +1831,18 @@ unichar readUtf8CodePoint4c(const u8 *ts) { %% write data nofinal; /** \brief Main parser call, returns root Component or nullptr. */ -unique_ptr parse(const char *const c_ptr, ParseMode &globalMode) { - const u8 * const ptr = (const u8 * const)c_ptr; +unique_ptr parse(const char *c_ptr, ParseMode &globalMode) { + assert(c_ptr); + + const u8 *ptr = (const u8 *const)c_ptr; const u8 *p = ptr; const u8 *pe = ptr + strlen(c_ptr); + + // First, read the control verbs, set any global mode flags and move the + // ptr forward. + p = (const u8 *)read_control_verbs((const char *)p, (const char *)pe, + globalMode); + const u8 *eof = pe; int cs; UNUSED int act; @@ -1891,8 +1896,6 @@ unique_ptr parse(const char *const c_ptr, ParseMode &globalMode) { // Location at which the current character class began. const u8 *currentClsBegin = p; - const u8 *ucp_start_p = p; /* for (*UCP) verb */ - // We throw exceptions on various parsing failures beyond this point: we // use a try/catch block here to clean up our allocated memory before we // re-throw the exception to the caller. diff --git a/src/parser/control_verbs.h b/src/parser/control_verbs.h new file mode 100644 index 00000000..9cf5b116 --- /dev/null +++ b/src/parser/control_verbs.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Parser for control verbs that can occur at the beginning of a pattern. + */ + +#ifndef CONTROL_VERBS_H +#define CONTROL_VERBS_H + +namespace ue2 { + +struct ParseMode; + +const char *read_control_verbs(const char *ptr, const char *end, + ParseMode &mode); + +} // namespace ue2 + +#endif // CONTROL_VERBS_H diff --git a/src/parser/control_verbs.rl b/src/parser/control_verbs.rl new file mode 100644 index 00000000..7eb9b86c --- /dev/null +++ b/src/parser/control_verbs.rl @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Parser for control verbs that can occur at the beginning of a pattern. + */ + +#include "parser/control_verbs.h" + +#include "parser/Parser.h" +#include "parser/parse_error.h" + +#include +#include + +using namespace std; + +namespace ue2 { + +const char *read_control_verbs(const char *ptr, const char *end, + ParseMode &mode) { + const char *p = ptr; + const char *pe = end; + const char *eof = pe; + const char *ts, *te; + int cs; + UNUSED int act; + + %%{ + machine ControlVerbs; + + # Verbs that we recognise but do not support. + unhandledVerbs = '(*' ( + 'LIMIT_MATCH=' [0-9]+ | + 'LIMIT_RECURSION=' [0-9]+ | + 'NO_AUTO_POSSESS' | + 'NO_START_OPT' | + 'UTF16' | + 'UTF32' | + 'CR' | + 'LF' | + 'CRLF' | + 'ANYCRLF' | + 'ANY' | + 'BSR_ANYCRLF' | + 'BSR_UNICODE' + ) . ')'; + + main := |* + '(*UTF8)' | '(*UTF)' => { + mode.utf8 = true; + }; + + '(*UCP)' => { + mode.ucp = true; + }; + + unhandledVerbs => { + ostringstream str; + str << "Unsupported control verb " << string(ts, te - ts); + throw LocatedParseError(str.str()); + }; + + '(*' [^)]+ ')' => { + ostringstream str; + str << "Unknown control verb " << string(ts, te - ts); + throw LocatedParseError(str.str()); + }; + + # Anything else means we're done. + any => { + fhold; + fbreak; + }; + *|; + + write data; + write init; + }%% + + try { + %% write exec; + } catch (LocatedParseError &error) { + if (ts >= ptr && ts <= pe) { + error.locate(ts - ptr); + } else { + error.locate(0); + } + throw; + } + + return p; +} + +} // namespace ue2 diff --git a/unit/hyperscan/bad_patterns.txt b/unit/hyperscan/bad_patterns.txt index 37307bc9..52287ec0 100644 --- a/unit/hyperscan/bad_patterns.txt +++ b/unit/hyperscan/bad_patterns.txt @@ -90,8 +90,8 @@ 91:/a\owibble/ #Value in \o{...} sequence is non-octal or missing braces at index 1. 92:/a\o{wibble/ #Value in \o{...} sequence is non-octal or missing braces at index 1. 93:/a\o{777}/ #Value in \o{...} sequence is too large at index 1. -94:/(*UTF16)foo/ #(*UTF16) not supported at index 2. -95:/(*BSR_UNICODE)abc/ #Unknown control verb at index 2. +94:/(*UTF16)foo/ #Unsupported control verb (*UTF16) at index 0. +95:/(*BSR_UNICODE)abc/ #Unsupported control verb (*BSR_UNICODE) at index 0. 96:/a+(*SKIP)b/ #Unknown control verb at index 4. 97:/foo(*/ #Invalid repeat at index 4. 98:/[:\]:]/ #POSIX named classes are only supported inside a class at index 0. @@ -130,3 +130,8 @@ 133:/[a[.\].]]/ #Unsupported POSIX collating element at index 2. 134:/[a[=\]=]]/ #Unsupported POSIX collating element at index 2. 135:/[^\D\d]/8W #Pattern can never match. +136:/(*LIMIT_MATCH=1000)foobar/ #Unsupported control verb (*LIMIT_MATCH=1000) at index 0. +137:/(*UTF32)foobar/ #Unsupported control verb (*UTF32) at index 0. +138:/(*UNKNOWNVERB)foobar/ #Unknown control verb (*UNKNOWNVERB) at index 0. +139:/foo(*UTF8)bar/ #(*UTF8) must be at start of expression, encountered at index 5. +140:/(?i)(*UTF8)foobar/ #(*UTF8) must be at start of expression, encountered at index 6. From 4def0c8a52766e72f2345b06f974129dbfacbecc Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 2 Feb 2017 13:56:30 +1100 Subject: [PATCH 075/326] parser: switch to using char* pointers --- src/parser/Parser.rl | 61 +++++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 32 deletions(-) diff --git a/src/parser/Parser.rl b/src/parser/Parser.rl index dfa0beda..9cddace4 100644 --- a/src/parser/Parser.rl +++ b/src/parser/Parser.rl @@ -116,7 +116,7 @@ unsigned parseAsDecimal(unsigned oct) { static constexpr u32 MAX_NUMBER = INT_MAX; static -void pushDec(u32 *acc, u8 raw_digit) { +void pushDec(u32 *acc, char raw_digit) { assert(raw_digit >= '0' && raw_digit <= '9'); u32 digit_val = raw_digit - '0'; @@ -130,7 +130,7 @@ void pushDec(u32 *acc, u8 raw_digit) { } static -void pushOct(u32 *acc, u8 raw_digit) { +void pushOct(u32 *acc, char raw_digit) { assert(raw_digit >= '0' && raw_digit <= '7'); u32 digit_val = raw_digit - '0'; @@ -169,8 +169,7 @@ ComponentSequence *enterSequence(ComponentSequence *parent, } static -void addLiteral(ComponentSequence *currentSeq, unsigned char c, - const ParseMode &mode) { +void addLiteral(ComponentSequence *currentSeq, char c, const ParseMode &mode) { if (mode.utf8 && mode.caseless) { /* leverage ComponentClass to generate the vertices */ auto cc = getComponentClass(mode); @@ -197,7 +196,7 @@ void addEscaped(ComponentSequence *currentSeq, unichar accum, if (accum > 255) { throw LocatedParseError(err_msg); } - addLiteral(currentSeq, (unsigned char)accum, mode); + addLiteral(currentSeq, (char)accum, mode); } } @@ -217,7 +216,7 @@ void addEscapedHex(ComponentSequence *currentSeq, unichar accum, #define SLASH_C_ERROR "\\c must be followed by an ASCII character" static -u8 decodeCtrl(u8 raw) { +u8 decodeCtrl(char raw) { if (raw & 0x80) { throw LocatedParseError(SLASH_C_ERROR); } @@ -225,10 +224,10 @@ u8 decodeCtrl(u8 raw) { } static -unichar readUtf8CodePoint2c(const u8 *ts) { +unichar readUtf8CodePoint2c(const char *s) { + auto *ts = (const u8 *)s; assert(ts[0] >= 0xc0 && ts[0] < 0xe0); assert(ts[1] >= 0x80 && ts[1] < 0xc0); - unichar val = ts[0] & 0x1f; val <<= 6; val |= ts[1] & 0x3f; @@ -238,7 +237,8 @@ unichar readUtf8CodePoint2c(const u8 *ts) { } static -unichar readUtf8CodePoint3c(const u8 *ts) { +unichar readUtf8CodePoint3c(const char *s) { + auto *ts = (const u8 *)s; assert(ts[0] >= 0xe0 && ts[0] < 0xf0); assert(ts[1] >= 0x80 && ts[1] < 0xc0); assert(ts[2] >= 0x80 && ts[2] < 0xc0); @@ -253,7 +253,8 @@ unichar readUtf8CodePoint3c(const u8 *ts) { } static -unichar readUtf8CodePoint4c(const u8 *ts) { +unichar readUtf8CodePoint4c(const char *s) { + auto *ts = (const u8 *)s; assert(ts[0] >= 0xf0 && ts[0] < 0xf8); assert(ts[1] >= 0x80 && ts[1] < 0xc0); assert(ts[2] >= 0x80 && ts[2] < 0xc0); @@ -273,12 +274,10 @@ unichar readUtf8CodePoint4c(const u8 *ts) { %%{ machine regex; - alphtype unsigned char; - action throwUnsupportedEscape { ostringstream str; - str << "'\\" << (char)*(ts + 1) << "' at index " - << ts - ptr << " not supported in a character class."; + str << "'\\" << *(ts + 1) << "' at index " << ts - ptr + << " not supported in a character class."; throw ParseError(str.str()); } action unsupportedProperty { @@ -974,7 +973,7 @@ unichar readUtf8CodePoint4c(const u8 *ts) { }; '\\o{' [0-7]+ '}' => { - string oct((const char *)ts + 3, te - ts - 4); + string oct(ts + 3, te - ts - 4); long int val = strtol(oct.c_str(), nullptr, 8); if ((!mode.utf8 && val > 255) || val > MAX_UNICODE) { throw LocatedParseError("Value in \\o{...} sequence is too large"); @@ -999,7 +998,7 @@ unichar readUtf8CodePoint4c(const u8 *ts) { }; # Unicode Hex '\\x{' xdigit+ '}' => { - string hex((const char *)ts + 3, te - ts - 4); + string hex(ts + 3, te - ts - 4); long int val = strtol(hex.c_str(), nullptr, 16); if (val > MAX_UNICODE) { throw LocatedParseError("Value in \\x{...} sequence is too large"); @@ -1089,7 +1088,7 @@ unichar readUtf8CodePoint4c(const u8 *ts) { # Literal character (any - ']') => { - currentCls->add(*ts); + currentCls->add((u8)*ts); }; ']' => { @@ -1443,7 +1442,7 @@ unichar readUtf8CodePoint4c(const u8 *ts) { // Otherwise, we interpret the first three digits as an // octal escape, and the remaining characters stand for // themselves as literals. - const u8 *s = ts; + const char *s = ts; unsigned int accum = 0; unsigned int oct_digits = 0; assert(*s == '\\'); // token starts at backslash @@ -1488,7 +1487,7 @@ unichar readUtf8CodePoint4c(const u8 *ts) { throw LocatedParseError("Invalid reference after \\g"); }; '\\o{' [0-7]+ '}' => { - string oct((const char *)ts + 3, te - ts - 4); + string oct(ts + 3, te - ts - 4); long int val = strtol(oct.c_str(), nullptr, 8); if ((!mode.utf8 && val > 255) || val > MAX_UNICODE) { throw LocatedParseError("Value in \\o{...} sequence is too large"); @@ -1505,7 +1504,7 @@ unichar readUtf8CodePoint4c(const u8 *ts) { }; # Unicode Hex '\\x{' xdigit+ '}' => { - string hex((const char *)ts + 3, te - ts - 4); + string hex(ts + 3, te - ts - 4); long int val = strtol(hex.c_str(), nullptr, 16); if (val > MAX_UNICODE) { throw LocatedParseError("Value in \\x{...} sequence is too large"); @@ -1529,8 +1528,8 @@ unichar readUtf8CodePoint4c(const u8 *ts) { # A bunch of unsupported (for now) escapes escapedUnsupported => { ostringstream str; - str << "'\\" << (char)*(ts + 1) << "' at index " - << ts - ptr << " not supported."; + str << "'\\" << *(ts + 1) << "' at index " << ts - ptr + << " not supported."; throw ParseError(str.str()); }; @@ -1831,24 +1830,22 @@ unichar readUtf8CodePoint4c(const u8 *ts) { %% write data nofinal; /** \brief Main parser call, returns root Component or nullptr. */ -unique_ptr parse(const char *c_ptr, ParseMode &globalMode) { - assert(c_ptr); +unique_ptr parse(const char *ptr, ParseMode &globalMode) { + assert(ptr); - const u8 *ptr = (const u8 *const)c_ptr; - const u8 *p = ptr; - const u8 *pe = ptr + strlen(c_ptr); + const char *p = ptr; + const char *pe = ptr + strlen(ptr); // First, read the control verbs, set any global mode flags and move the // ptr forward. - p = (const u8 *)read_control_verbs((const char *)p, (const char *)pe, - globalMode); + p = read_control_verbs(p, pe, globalMode); - const u8 *eof = pe; + const char *eof = pe; int cs; UNUSED int act; int top; vector stack; - const u8 *ts, *te; + const char *ts, *te; unichar accumulator = 0; unichar octAccumulator = 0; /* required as we are also accumulating for * back ref when looking for octals */ @@ -1894,7 +1891,7 @@ unique_ptr parse(const char *c_ptr, ParseMode &globalMode) { bool inCharClassEarly = false; // Location at which the current character class began. - const u8 *currentClsBegin = p; + const char *currentClsBegin = p; // We throw exceptions on various parsing failures beyond this point: we // use a try/catch block here to clean up our allocated memory before we From bfc8be56755b745f78459953530c04b60ffc2ee1 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 2 Feb 2017 15:01:30 +1100 Subject: [PATCH 076/326] parser: use stoul(), not strtol() --- src/parser/Parser.rl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/parser/Parser.rl b/src/parser/Parser.rl index 9cddace4..913eaa0e 100644 --- a/src/parser/Parser.rl +++ b/src/parser/Parser.rl @@ -974,7 +974,7 @@ unichar readUtf8CodePoint4c(const char *s) { '\\o{' [0-7]+ '}' => { string oct(ts + 3, te - ts - 4); - long int val = strtol(oct.c_str(), nullptr, 8); + unsigned long val = stoul(oct, nullptr, 8); if ((!mode.utf8 && val > 255) || val > MAX_UNICODE) { throw LocatedParseError("Value in \\o{...} sequence is too large"); } @@ -999,7 +999,7 @@ unichar readUtf8CodePoint4c(const char *s) { # Unicode Hex '\\x{' xdigit+ '}' => { string hex(ts + 3, te - ts - 4); - long int val = strtol(hex.c_str(), nullptr, 16); + unsigned long val = stoul(hex, nullptr, 16); if (val > MAX_UNICODE) { throw LocatedParseError("Value in \\x{...} sequence is too large"); } @@ -1488,7 +1488,7 @@ unichar readUtf8CodePoint4c(const char *s) { }; '\\o{' [0-7]+ '}' => { string oct(ts + 3, te - ts - 4); - long int val = strtol(oct.c_str(), nullptr, 8); + unsigned long val = stoul(oct, nullptr, 8); if ((!mode.utf8 && val > 255) || val > MAX_UNICODE) { throw LocatedParseError("Value in \\o{...} sequence is too large"); } @@ -1505,7 +1505,7 @@ unichar readUtf8CodePoint4c(const char *s) { # Unicode Hex '\\x{' xdigit+ '}' => { string hex(ts + 3, te - ts - 4); - long int val = strtol(hex.c_str(), nullptr, 16); + unsigned long val = stoul(hex, nullptr, 16); if (val > MAX_UNICODE) { throw LocatedParseError("Value in \\x{...} sequence is too large"); } From bef6889844aac86fa3b6cdb6cea9217b225d9e36 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 2 Feb 2017 15:49:26 +1100 Subject: [PATCH 077/326] parser: use control_verb parser inline --- src/parser/Parser.rl | 16 ++++++++-------- src/parser/control_verbs.h | 4 +++- src/parser/control_verbs.rl | 4 ++-- src/parser/parse_error.cpp | 6 +++++- src/parser/parse_error.h | 16 +++++++++------- unit/hyperscan/bad_patterns.txt | 2 +- 6 files changed, 28 insertions(+), 20 deletions(-) diff --git a/src/parser/Parser.rl b/src/parser/Parser.rl index 913eaa0e..ea8e88a9 100644 --- a/src/parser/Parser.rl +++ b/src/parser/Parser.rl @@ -560,13 +560,13 @@ unichar readUtf8CodePoint4c(const char *s) { throw LocatedParseError("(*UCP) must be at start of " "expression, encountered"); }; - 'UTF16)' => { - throw LocatedParseError("(*UTF16) not supported"); - }; - 'UTF32)' => { - throw LocatedParseError("(*UTF32) not supported"); - }; - any => { + # Use the control verb mini-parser to report an error for this + # unsupported/unknown verb. + [^)]+ ')' => { + ParseMode temp_mode; + assert(ts - 2 >= ptr); // parser needs the '(*' at the start too. + read_control_verbs(ts - 2, te, (ts - 2 - ptr), temp_mode); + assert(0); // Should have thrown a parse error. throw LocatedParseError("Unknown control verb"); }; *|; @@ -1838,7 +1838,7 @@ unique_ptr parse(const char *ptr, ParseMode &globalMode) { // First, read the control verbs, set any global mode flags and move the // ptr forward. - p = read_control_verbs(p, pe, globalMode); + p = read_control_verbs(p, pe, 0, globalMode); const char *eof = pe; int cs; diff --git a/src/parser/control_verbs.h b/src/parser/control_verbs.h index 9cf5b116..58934ec2 100644 --- a/src/parser/control_verbs.h +++ b/src/parser/control_verbs.h @@ -34,11 +34,13 @@ #ifndef CONTROL_VERBS_H #define CONTROL_VERBS_H +#include "ue2common.h" + namespace ue2 { struct ParseMode; -const char *read_control_verbs(const char *ptr, const char *end, +const char *read_control_verbs(const char *ptr, const char *end, size_t start, ParseMode &mode); } // namespace ue2 diff --git a/src/parser/control_verbs.rl b/src/parser/control_verbs.rl index 7eb9b86c..1d3e33a9 100644 --- a/src/parser/control_verbs.rl +++ b/src/parser/control_verbs.rl @@ -43,7 +43,7 @@ using namespace std; namespace ue2 { -const char *read_control_verbs(const char *ptr, const char *end, +const char *read_control_verbs(const char *ptr, const char *end, size_t start, ParseMode &mode) { const char *p = ptr; const char *pe = end; @@ -108,7 +108,7 @@ const char *read_control_verbs(const char *ptr, const char *end, %% write exec; } catch (LocatedParseError &error) { if (ts >= ptr && ts <= pe) { - error.locate(ts - ptr); + error.locate(ts - ptr + start); } else { error.locate(0); } diff --git a/src/parser/parse_error.cpp b/src/parser/parse_error.cpp index 6245adb9..e7f60b26 100644 --- a/src/parser/parse_error.cpp +++ b/src/parser/parse_error.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -44,9 +44,13 @@ ParseError::~ParseError() {} LocatedParseError::~LocatedParseError() {} void LocatedParseError::locate(size_t offset) { + if (finalized) { + return; + } std::ostringstream str; str << reason << " at index " << offset << "."; reason = str.str(); + finalized = true; } } diff --git a/src/parser/parse_error.h b/src/parser/parse_error.h index e727991d..4556ed5e 100644 --- a/src/parser/parse_error.h +++ b/src/parser/parse_error.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,8 +30,8 @@ * \brief Parse/Compile exceptions. */ -#ifndef PARSE_ERROR_H_A02047D1AA16C9 -#define PARSE_ERROR_H_A02047D1AA16C9 +#ifndef PARSE_ERROR_H +#define PARSE_ERROR_H #include "util/compile_error.h" @@ -44,22 +44,24 @@ class ParseError : public CompileError { public: // Note: 'why' should describe why the error occurred and end with a // full stop, but no line break. - explicit ParseError(const std::string &why) : CompileError(why) {} + explicit ParseError(std::string why) : CompileError(std::move(why)) {} ~ParseError() override; }; class LocatedParseError : public ParseError { public: - explicit LocatedParseError(const std::string &why) : ParseError(".") { - reason = why; // don't use ParseError ctor + explicit LocatedParseError(std::string why) : ParseError(".") { + reason = std::move(why); // don't use ParseError ctor } ~LocatedParseError() override; void locate(size_t offset); +private: + bool finalized = false; //!< true when locate() has been called. }; } // namespace ue2 -#endif /* PARSE_ERROR_H_A02047D1AA16C9 */ +#endif /* PARSE_ERROR_H */ diff --git a/unit/hyperscan/bad_patterns.txt b/unit/hyperscan/bad_patterns.txt index 52287ec0..d4de452a 100644 --- a/unit/hyperscan/bad_patterns.txt +++ b/unit/hyperscan/bad_patterns.txt @@ -92,7 +92,7 @@ 93:/a\o{777}/ #Value in \o{...} sequence is too large at index 1. 94:/(*UTF16)foo/ #Unsupported control verb (*UTF16) at index 0. 95:/(*BSR_UNICODE)abc/ #Unsupported control verb (*BSR_UNICODE) at index 0. -96:/a+(*SKIP)b/ #Unknown control verb at index 4. +96:/a+(*SKIP)b/ #Unknown control verb (*SKIP) at index 2. 97:/foo(*/ #Invalid repeat at index 4. 98:/[:\]:]/ #POSIX named classes are only supported inside a class at index 0. 99:/[[:[:]/ #Invalid POSIX named class at index 1. From 821a1b81e6ce75e3547853d1d70e5418eb04a967 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 3 Feb 2017 09:40:00 +1100 Subject: [PATCH 078/326] rose: only dump delay rebuild table if streaming --- src/rose/rose_build_dump.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 6ac80ef9..9cab4087 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -541,8 +541,12 @@ void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) { mp = makeMatcherProto(build, ROSE_FLOATING, false, longLitLengthThreshold); dumpTestLiterals(base + "rose_float_test_literals.txt", mp.lits); - mp = makeMatcherProto(build, ROSE_FLOATING, true, longLitLengthThreshold); - dumpTestLiterals(base + "rose_delay_rebuild_test_literals.txt", mp.lits); + if (build.cc.streaming) { + mp = makeMatcherProto(build, ROSE_FLOATING, true, + longLitLengthThreshold); + dumpTestLiterals(base + "rose_delay_rebuild_test_literals.txt", + mp.lits); + } mp = makeMatcherProto(build, ROSE_EOD_ANCHORED, false, build.ematcher_region_size); From 084596bb5e24929380e7c3e645ed6ecb890e7a4d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 3 Feb 2017 11:17:47 +1100 Subject: [PATCH 079/326] parser: check for std::out_of_range from stoul --- src/parser/Parser.rl | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/src/parser/Parser.rl b/src/parser/Parser.rl index ea8e88a9..6f4fd80a 100644 --- a/src/parser/Parser.rl +++ b/src/parser/Parser.rl @@ -974,7 +974,12 @@ unichar readUtf8CodePoint4c(const char *s) { '\\o{' [0-7]+ '}' => { string oct(ts + 3, te - ts - 4); - unsigned long val = stoul(oct, nullptr, 8); + unsigned long val; + try { + val = stoul(oct, nullptr, 8); + } catch (const std::out_of_range &) { + val = MAX_UNICODE + 1; + } if ((!mode.utf8 && val > 255) || val > MAX_UNICODE) { throw LocatedParseError("Value in \\o{...} sequence is too large"); } @@ -999,7 +1004,12 @@ unichar readUtf8CodePoint4c(const char *s) { # Unicode Hex '\\x{' xdigit+ '}' => { string hex(ts + 3, te - ts - 4); - unsigned long val = stoul(hex, nullptr, 16); + unsigned long val; + try { + val = stoul(hex, nullptr, 16); + } catch (const std::out_of_range &) { + val = MAX_UNICODE + 1; + } if (val > MAX_UNICODE) { throw LocatedParseError("Value in \\x{...} sequence is too large"); } @@ -1488,7 +1498,12 @@ unichar readUtf8CodePoint4c(const char *s) { }; '\\o{' [0-7]+ '}' => { string oct(ts + 3, te - ts - 4); - unsigned long val = stoul(oct, nullptr, 8); + unsigned long val; + try { + val = stoul(oct, nullptr, 8); + } catch (const std::out_of_range &) { + val = MAX_UNICODE + 1; + } if ((!mode.utf8 && val > 255) || val > MAX_UNICODE) { throw LocatedParseError("Value in \\o{...} sequence is too large"); } @@ -1505,7 +1520,12 @@ unichar readUtf8CodePoint4c(const char *s) { # Unicode Hex '\\x{' xdigit+ '}' => { string hex(ts + 3, te - ts - 4); - unsigned long val = stoul(hex, nullptr, 16); + unsigned long val; + try { + val = stoul(hex, nullptr, 16); + } catch (const std::out_of_range &) { + val = MAX_UNICODE + 1; + } if (val > MAX_UNICODE) { throw LocatedParseError("Value in \\x{...} sequence is too large"); } From 1245156f44110f214c207ee2350be07faa8add29 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 3 Feb 2017 11:39:06 +1100 Subject: [PATCH 080/326] parser: handle "control verbs" without close paren --- src/parser/Parser.rl | 3 +++ unit/hyperscan/bad_patterns.txt | 1 + 2 files changed, 4 insertions(+) diff --git a/src/parser/Parser.rl b/src/parser/Parser.rl index 6f4fd80a..52b3340c 100644 --- a/src/parser/Parser.rl +++ b/src/parser/Parser.rl @@ -569,6 +569,9 @@ unichar readUtf8CodePoint4c(const char *s) { assert(0); // Should have thrown a parse error. throw LocatedParseError("Unknown control verb"); }; + any => { + throw LocatedParseError("Unknown control verb"); + }; *|; ############################################################# diff --git a/unit/hyperscan/bad_patterns.txt b/unit/hyperscan/bad_patterns.txt index d4de452a..d970761a 100644 --- a/unit/hyperscan/bad_patterns.txt +++ b/unit/hyperscan/bad_patterns.txt @@ -135,3 +135,4 @@ 138:/(*UNKNOWNVERB)foobar/ #Unknown control verb (*UNKNOWNVERB) at index 0. 139:/foo(*UTF8)bar/ #(*UTF8) must be at start of expression, encountered at index 5. 140:/(?i)(*UTF8)foobar/ #(*UTF8) must be at start of expression, encountered at index 6. +141:/(*@&/ #Unknown control verb at index 2. From 9363ae74860c638fe65a42a5bf415d8b210b4fd2 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Fri, 10 Feb 2017 11:29:42 +1100 Subject: [PATCH 081/326] Clear upper half of AVX register before libm call Clearing the upper half of the AVX register is required before calling SSE code to avoid AVX-to-SSE transition penalties. --- CMakeLists.txt | 1 + src/fdr/fdr_compile.cpp | 3 +- src/util/math.h | 73 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 src/util/math.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 4ec1f9e6..8329c0ba 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -991,6 +991,7 @@ SET (hs_SRCS src/util/fatbit_build.h src/util/graph.h src/util/hash.h + src/util/math.h src/util/multibit_build.cpp src/util/multibit_build.h src/util/order_check.h diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index 953b2dab..f99fcb65 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -43,6 +43,7 @@ #include "util/alloc.h" #include "util/compare.h" #include "util/dump_mask.h" +#include "util/math.h" #include "util/target_info.h" #include "util/ue2string.h" #include "util/verify_types.h" @@ -195,7 +196,7 @@ aligned_unique_ptr FDRCompiler::setupFDR() { static double getScoreUtil(u32 len, u32 count) { return len == 0 ? numeric_limits::max() - : pow(count, 1.05) * pow(len, -3.0); + : our_pow(count, 1.05) * our_pow(len, -3.0); } /** diff --git a/src/util/math.h b/src/util/math.h new file mode 100644 index 00000000..80ad4927 --- /dev/null +++ b/src/util/math.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UTIL_MATH_H_ +#define UTIL_MATH_H_ + +#include + +#ifdef __cplusplus +# if defined(HAVE_CXX_X86INTRIN_H) +# define USE_X86INTRIN_H +# endif +#else // C +# if defined(HAVE_C_X86INTRIN_H) +# define USE_X86INTRIN_H +# endif +#endif + +#ifdef __cplusplus +# if defined(HAVE_CXX_INTRIN_H) +# define USE_INTRIN_H +# endif +#else // C +# if defined(HAVE_C_INTRIN_H) +# define USE_INTRIN_H +# endif +#endif + +#if defined(USE_X86INTRIN_H) +#include +#elif defined(USE_INTRIN_H) +#include +#endif + +static really_inline +double our_pow(double x, double y) { +#if defined(__AVX__) + /* + * Clear the upper half of AVX registers before calling into the math lib. + * On some versions of glibc this can save thousands of AVX-to-SSE + * transitions. + */ + _mm256_zeroupper(); +#endif + return pow(x, y); +} + +#endif // UTIL_MATH_H_ From 1be1293491e95e0bb60429e7531167b569fc6312 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 10 Feb 2017 13:04:22 +1100 Subject: [PATCH 082/326] fdr: add grey box control for flood detection --- src/fdr/fdr_compile.cpp | 13 +++++++------ src/fdr/fdr_compile_internal.h | 3 ++- src/fdr/flood_compile.cpp | 13 +++++++++++-- src/fdr/teddy_compile.cpp | 15 ++++++++++----- src/fdr/teddy_compile.h | 3 ++- src/grey.cpp | 2 ++ src/grey.h | 1 + 7 files changed, 35 insertions(+), 15 deletions(-) diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index f99fcb65..015fa51e 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -74,6 +74,7 @@ namespace { class FDRCompiler : boost::noncopyable { private: const FDREngineDescription ŋ + const Grey &grey; vector tab; vector lits; map > bucketToLits; @@ -90,9 +91,9 @@ private: public: FDRCompiler(vector lits_in, const FDREngineDescription &eng_in, - bool make_small_in) - : eng(eng_in), tab(eng_in.getTabSizeBytes()), lits(move(lits_in)), - make_small(make_small_in) {} + bool make_small_in, const Grey &grey_in) + : eng(eng_in), grey(grey_in), tab(eng_in.getTabSizeBytes()), + lits(move(lits_in)), make_small(make_small_in) {} aligned_unique_ptr build(); }; @@ -146,7 +147,7 @@ void FDRCompiler::createInitialState(FDR *fdr) { aligned_unique_ptr FDRCompiler::setupFDR() { size_t tabSize = eng.getTabSizeBytes(); - auto floodControlTmp = setupFDRFloodControl(lits, eng); + auto floodControlTmp = setupFDRFloodControl(lits, eng, grey); auto confirmTmp = setupFullConfs(lits, eng, bucketToLits, make_small); assert(ISALIGNED_16(tabSize)); @@ -543,7 +544,7 @@ aligned_unique_ptr fdrBuildTableInternal(const vector &lits, DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2"); if (grey.fdrAllowTeddy) { - auto fdr = teddyBuildTableHinted(lits, make_small, hint, target); + auto fdr = teddyBuildTableHinted(lits, make_small, hint, target, grey); if (fdr) { DEBUG_PRINTF("build with teddy succeeded\n"); return fdr; @@ -566,7 +567,7 @@ aligned_unique_ptr fdrBuildTableInternal(const vector &lits, des->stride = 1; } - FDRCompiler fc(lits, *des, make_small); + FDRCompiler fc(lits, *des, make_small, grey); return fc.build(); } diff --git a/src/fdr/fdr_compile_internal.h b/src/fdr/fdr_compile_internal.h index 0fd59902..73de4d42 100644 --- a/src/fdr/fdr_compile_internal.h +++ b/src/fdr/fdr_compile_internal.h @@ -55,6 +55,7 @@ typedef u32 PositionInBucket; // zero is 'we are matching right now!", class EngineDescription; class FDREngineDescription; struct hwlmStreamingControl; +struct Grey; std::pair, size_t> setupFullConfs( const std::vector &lits, const EngineDescription &eng, @@ -67,7 +68,7 @@ std::pair, size_t> setupFullConfs( // right state yet to allow blindly advancing std::pair, size_t> setupFDRFloodControl(const std::vector &lits, - const EngineDescription &eng); + const EngineDescription &eng, const Grey &grey); std::pair, size_t> fdrBuildTableStreaming(const std::vector &lits, diff --git a/src/fdr/flood_compile.cpp b/src/fdr/flood_compile.cpp index 62693c30..b6d23c9d 100644 --- a/src/fdr/flood_compile.cpp +++ b/src/fdr/flood_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,6 +30,7 @@ #include "fdr_confirm.h" #include "fdr_compile_internal.h" #include "fdr_engine_description.h" +#include "grey.h" #include "ue2common.h" #include "util/alloc.h" #include "util/bitutils.h" @@ -92,7 +93,7 @@ void addFlood(vector &tmpFlood, u8 c, const hwlmLiteral &lit, pair, size_t> setupFDRFloodControl(const vector &lits, - const EngineDescription &eng) { + const EngineDescription &eng, const Grey &grey) { vector tmpFlood(N_CHARS); u32 default_suffix = eng.getDefaultFloodSuffixLength(); @@ -187,6 +188,14 @@ setupFDRFloodControl(const vector &lits, } #endif + // If flood detection has been switched off in the grey box, we comply by + // setting idCount too high for all floods. + if (!grey.fdrAllowFlood) { + for (auto &fl : tmpFlood) { + fl.idCount = FDR_FLOOD_MAX_IDS; + } + } + map flood2chars; for (u32 i = 0; i < N_CHARS; i++) { FDRFlood fl = tmpFlood[i]; diff --git a/src/fdr/teddy_compile.cpp b/src/fdr/teddy_compile.cpp index 66466e6c..09155280 100644 --- a/src/fdr/teddy_compile.cpp +++ b/src/fdr/teddy_compile.cpp @@ -31,6 +31,7 @@ #include "fdr_compile_internal.h" #include "fdr_confirm.h" #include "fdr_engine_description.h" +#include "grey.h" #include "ue2common.h" #include "util/alloc.h" #include "util/compare.h" @@ -66,13 +67,16 @@ namespace { class TeddyCompiler : boost::noncopyable { const TeddyEngineDescription ŋ + const Grey &grey; const vector &lits; bool make_small; public: TeddyCompiler(const vector &lits_in, - const TeddyEngineDescription &eng_in, bool make_small_in) - : eng(eng_in), lits(lits_in), make_small(make_small_in) {} + const TeddyEngineDescription &eng_in, bool make_small_in, + const Grey &grey_in) + : eng(eng_in), grey(grey_in), lits(lits_in), make_small(make_small_in) { + } aligned_unique_ptr build(); bool pack(map > &bucketToLits); @@ -307,7 +311,7 @@ aligned_unique_ptr TeddyCompiler::build() { size_t maskLen = eng.numMasks * 16 * 2 * maskWidth; - auto floodControlTmp = setupFDRFloodControl(lits, eng); + auto floodControlTmp = setupFDRFloodControl(lits, eng, grey); auto confirmTmp = setupFullConfs(lits, eng, bucketToLits, make_small); size_t size = ROUNDUP_N(sizeof(Teddy) + @@ -417,7 +421,8 @@ aligned_unique_ptr TeddyCompiler::build() { aligned_unique_ptr teddyBuildTableHinted(const vector &lits, bool make_small, u32 hint, - const target_t &target) { + const target_t &target, + const Grey &grey) { unique_ptr des; if (hint == HINT_INVALID) { des = chooseTeddyEngine(target, lits); @@ -427,7 +432,7 @@ aligned_unique_ptr teddyBuildTableHinted(const vector &lits, if (!des) { return nullptr; } - TeddyCompiler tc(lits, *des, make_small); + TeddyCompiler tc(lits, *des, make_small, grey); return tc.build(); } diff --git a/src/fdr/teddy_compile.h b/src/fdr/teddy_compile.h index bdd15865..07eb18f6 100644 --- a/src/fdr/teddy_compile.h +++ b/src/fdr/teddy_compile.h @@ -43,11 +43,12 @@ struct target_t; namespace ue2 { +struct Grey; struct hwlmLiteral; ue2::aligned_unique_ptr teddyBuildTableHinted(const std::vector &lits, bool make_small, - u32 hint, const target_t &target); + u32 hint, const target_t &target, const Grey &grey); } // namespace ue2 diff --git a/src/grey.cpp b/src/grey.cpp index 8881666e..cd19e863 100644 --- a/src/grey.cpp +++ b/src/grey.cpp @@ -63,6 +63,7 @@ Grey::Grey(void) : allowDecoratedLiteral(true), allowNoodle(true), fdrAllowTeddy(true), + fdrAllowFlood(true), violetAvoidSuffixes(true), violetAvoidWeakInfixes(true), violetDoubleCut(true), @@ -226,6 +227,7 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(allowDecoratedLiteral); G_UPDATE(allowNoodle); G_UPDATE(fdrAllowTeddy); + G_UPDATE(fdrAllowFlood); G_UPDATE(violetAvoidSuffixes); G_UPDATE(violetAvoidWeakInfixes); G_UPDATE(violetDoubleCut); diff --git a/src/grey.h b/src/grey.h index 17d82527..dcbc2e7d 100644 --- a/src/grey.h +++ b/src/grey.h @@ -64,6 +64,7 @@ struct Grey { bool allowNoodle; bool fdrAllowTeddy; + bool fdrAllowFlood; u32 violetAvoidSuffixes; /* 0=never, 1=sometimes, 2=always */ bool violetAvoidWeakInfixes; From fddcdbb12992e803772cf2bb425cfbf821ecd424 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Tue, 14 Feb 2017 09:56:43 +1100 Subject: [PATCH 083/326] determinisation: use unordered_map to hold state set -> id mapping --- CMakeLists.txt | 1 + src/nfagraph/ng_mcclellan.cpp | 6 ++- src/util/hash_dynamic_bitset.h | 95 ++++++++++++++++++++++++++++++++++ 3 files changed, 100 insertions(+), 2 deletions(-) create mode 100644 src/util/hash_dynamic_bitset.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 8329c0ba..85d97b9b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -991,6 +991,7 @@ SET (hs_SRCS src/util/fatbit_build.h src/util/graph.h src/util/hash.h + src/util/hash_dynamic_bitset.h src/util/math.h src/util/multibit_build.cpp src/util/multibit_build.h diff --git a/src/nfagraph/ng_mcclellan.cpp b/src/nfagraph/ng_mcclellan.cpp index 375086a4..e061084e 100644 --- a/src/nfagraph/ng_mcclellan.cpp +++ b/src/nfagraph/ng_mcclellan.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,6 +42,8 @@ #include "util/bitfield.h" #include "util/determinise.h" #include "util/graph_range.h" +#include "util/hash.h" +#include "util/hash_dynamic_bitset.h" #include "util/make_unique.h" #include "util/report_manager.h" #include "util/ue2_containers.h" @@ -467,7 +469,7 @@ public: struct Big_Traits { using StateSet = dynamic_bitset<>; - using StateMap = map; + using StateMap = unordered_map; static StateSet init_states(u32 num) { return StateSet(num); diff --git a/src/util/hash_dynamic_bitset.h b/src/util/hash_dynamic_bitset.h new file mode 100644 index 00000000..315aed34 --- /dev/null +++ b/src/util/hash_dynamic_bitset.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Hashing utility functions. + */ + +#ifndef UTIL_HASH_DYNAMIC_BITSET_H +#define UTIL_HASH_DYNAMIC_BITSET_H + +#include +#include + +#include + +namespace ue2 { + +/** + * \brief An output iterator which calculates the combined hash of all elements + * written to it. + * + * The location to output the hash is provided to the constructor and should + * already be zero initialised. + */ +struct hash_output_it { + using value_type = void; + using difference_type = ptrdiff_t; + using pointer = void *; + using reference = void; + using iterator_category = std::output_iterator_tag; + + hash_output_it(size_t *hash_out = nullptr) : out(hash_out) {} + hash_output_it &operator++() { + return *this; + } + hash_output_it &operator++(int) { + return *this; + } + + struct deref_proxy { + deref_proxy(size_t *hash_out) : out(hash_out) {} + + template + void operator=(const T &val) const { + boost::hash_combine(*out, val); + } + + private: + size_t *out; /* output location of the owning iterator */ + }; + + deref_proxy operator*() { return {out}; } + +private: + size_t *out; /* location to output the hashes to */ +}; + +/* Function object for hashing a dynamic bitset */ +struct hash_dynamic_bitset { + size_t operator()(const boost::dynamic_bitset<> &bs) const { + size_t rv = 0; + to_block_range(bs, hash_output_it(&rv)); + return rv; + } +}; + +} // namespace ue2 + +#endif From 893674d3c777ca04b74d05763b48238e9d1925e4 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 13 Feb 2017 15:35:38 +1100 Subject: [PATCH 084/326] rose_build_anchored: tbi->build --- src/rose/rose_build_anchored.cpp | 34 ++++++++++++++++---------------- src/rose/rose_build_anchored.h | 4 ++-- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index 8ba80f8e..258eee9c 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -469,7 +469,7 @@ bool check_dupe(const raw_dfa &rdfa, } static -bool check_dupe_simple(const RoseBuildImpl &tbi, u32 min_bound, u32 max_bound, +bool check_dupe_simple(const RoseBuildImpl &build, u32 min_bound, u32 max_bound, const ue2_literal &lit, ReportID *remap) { if (!remap) { DEBUG_PRINTF("no remap\n"); @@ -477,8 +477,8 @@ bool check_dupe_simple(const RoseBuildImpl &tbi, u32 min_bound, u32 max_bound, } simple_anchored_info sai(min_bound, max_bound, lit); - if (contains(tbi.anchored_simple, sai)) { - *remap = *tbi.anchored_simple.at(sai).begin(); + if (contains(build.anchored_simple, sai)) { + *remap = *build.anchored_simple.at(sai).begin(); return true; } @@ -642,7 +642,7 @@ bool isSimple(const NGHolder &h, u32 *min_bound, u32 *max_bound, } static -int finalise_out(RoseBuildImpl &tbi, const NGHolder &h, +int finalise_out(RoseBuildImpl &build, const NGHolder &h, const Automaton_Holder &autom, unique_ptr out_dfa, ReportID *remap) { u32 min_bound = ~0U; @@ -651,12 +651,12 @@ int finalise_out(RoseBuildImpl &tbi, const NGHolder &h, u32 simple_report = MO_INVALID_IDX; if (isSimple(h, &min_bound, &max_bound, &lit, &simple_report)) { assert(simple_report != MO_INVALID_IDX); - if (check_dupe_simple(tbi, min_bound, max_bound, lit, remap)) { + if (check_dupe_simple(build, min_bound, max_bound, lit, remap)) { DEBUG_PRINTF("found duplicate remapping to %u\n", *remap); return ANCHORED_REMAP; } DEBUG_PRINTF("add with report %u\n", simple_report); - tbi.anchored_simple[simple_anchored_info(min_bound, max_bound, lit)] + build.anchored_simple[simple_anchored_info(min_bound, max_bound, lit)] .insert(simple_report); return ANCHORED_SUCCESS; } @@ -666,15 +666,15 @@ int finalise_out(RoseBuildImpl &tbi, const NGHolder &h, out_dfa->alpha_size = autom.alphasize; out_dfa->alpha_remap = autom.alpha; auto hash = hash_dfa_no_reports(*out_dfa); - if (check_dupe(*out_dfa, tbi.anchored_nfas[hash], remap)) { + if (check_dupe(*out_dfa, build.anchored_nfas[hash], remap)) { return ANCHORED_REMAP; } - tbi.anchored_nfas[hash].push_back(move(out_dfa)); + build.anchored_nfas[hash].push_back(move(out_dfa)); return ANCHORED_SUCCESS; } static -int addAutomaton(RoseBuildImpl &tbi, const NGHolder &h, ReportID *remap) { +int addAutomaton(RoseBuildImpl &build, const NGHolder &h, ReportID *remap) { if (num_vertices(h) > ANCHORED_NFA_STATE_LIMIT) { DEBUG_PRINTF("autom bad!\n"); return ANCHORED_FAIL; @@ -684,7 +684,7 @@ int addAutomaton(RoseBuildImpl &tbi, const NGHolder &h, ReportID *remap) { unique_ptr out_dfa = ue2::make_unique(NFA_OUTFIX_RAW); if (!determinise(autom, out_dfa->states, MAX_DFA_STATES)) { - return finalise_out(tbi, h, autom, move(out_dfa), remap); + return finalise_out(build, h, autom, move(out_dfa), remap); } DEBUG_PRINTF("determinise failed\n"); @@ -702,7 +702,7 @@ void setReports(NGHolder &h, const map> &reportMap, } } -int addAnchoredNFA(RoseBuildImpl &tbi, const NGHolder &wrapper, +int addAnchoredNFA(RoseBuildImpl &build, const NGHolder &wrapper, const map> &reportMap) { NGHolder h; ue2::unordered_map orig_to_copy; @@ -713,10 +713,10 @@ int addAnchoredNFA(RoseBuildImpl &tbi, const NGHolder &wrapper, clearReports(h); setReports(h, reportMap, orig_to_copy); - return addAutomaton(tbi, h, nullptr); + return addAutomaton(build, h, nullptr); } -int addToAnchoredMatcher(RoseBuildImpl &tbi, const NGHolder &anchored, +int addToAnchoredMatcher(RoseBuildImpl &build, const NGHolder &anchored, u32 exit_id, ReportID *remap) { NGHolder h; cloneHolder(h, anchored); @@ -727,18 +727,18 @@ int addToAnchoredMatcher(RoseBuildImpl &tbi, const NGHolder &anchored, h[v].reports.insert(exit_id); } - return addAutomaton(tbi, h, remap); + return addAutomaton(build, h, remap); } static -void buildSimpleDfas(const RoseBuildImpl &tbi, +void buildSimpleDfas(const RoseBuildImpl &build, vector> *anchored_dfas) { /* we should have determinised all of these before so there should be no * chance of failure. */ - for (const auto &simple : tbi.anchored_simple) { + for (const auto &simple : build.anchored_simple) { set exit_ids; for (auto lit_id : simple.second) { - exit_ids.insert(tbi.literal_info[lit_id].final_id); + exit_ids.insert(build.literal_info[lit_id].final_id); } NGHolder h; populate_holder(simple.first, exit_ids, &h); diff --git a/src/rose/rose_build_anchored.h b/src/rose/rose_build_anchored.h index 618a46a3..ad89df65 100644 --- a/src/rose/rose_build_anchored.h +++ b/src/rose/rose_build_anchored.h @@ -67,10 +67,10 @@ u32 anchoredStateSize(const anchored_matcher_info &atable); #define ANCHORED_SUCCESS 1 #define ANCHORED_REMAP 2 -int addAnchoredNFA(RoseBuildImpl &tbi, const NGHolder &wrapper, +int addAnchoredNFA(RoseBuildImpl &build, const NGHolder &wrapper, const std::map> &reportMap); -int addToAnchoredMatcher(RoseBuildImpl &tbi, const NGHolder &anchored, +int addToAnchoredMatcher(RoseBuildImpl &build, const NGHolder &anchored, u32 exit_id, ReportID *remap); } // namespace ue2 From 1eae677d73cc83928daef934d65344fc0ff1a12b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 13 Feb 2017 15:38:05 +1100 Subject: [PATCH 085/326] rose_build_impl: fix header guard --- src/rose/rose_build_impl.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index dd0752f6..addd703c 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -26,8 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef ROSE_BUILD_IMPL_H_17E20A3C6935D6 -#define ROSE_BUILD_IMPL_H_17E20A3C6935D6 +#ifndef ROSE_BUILD_IMPL_H +#define ROSE_BUILD_IMPL_H #include "rose_build.h" #include "rose_build_util.h" @@ -651,4 +651,4 @@ bool canImplementGraphs(const RoseBuildImpl &tbi); } // namespace ue2 -#endif /* ROSE_BUILD_IMPL_H_17E20A3C6935D6 */ +#endif /* ROSE_BUILD_IMPL_H */ From 8b25d834157a4ae4ef6da86610dfca6bba498fa5 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 13 Feb 2017 15:54:16 +1100 Subject: [PATCH 086/326] rose: write fragment ids into literal_info --- src/rose/rose_build_bytecode.cpp | 9 +++++++++ src/rose/rose_build_impl.h | 3 ++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 197ceb4a..a50ebb8e 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -5409,6 +5409,15 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { allocateFinalLiteralId(*this, bc); final_to_frag_map = groupByFragment(*this, bc); + // Write the fragment IDs into the literal_info structures. + for (auto &info : literal_info) { + if (info.final_id == MO_INVALID_IDX) { + continue; + } + assert(contains(final_to_frag_map, info.final_id)); + info.fragment_id = final_to_frag_map.at(info.final_id).fragment_id; + } + auto anchored_dfas = buildAnchoredDfas(*this); bc.floatingMinLiteralMatchOffset = diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index addd703c..bf588553 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -264,7 +264,8 @@ struct rose_literal_info { ue2::flat_set vertices; rose_group group_mask = 0; u32 undelayed_id = MO_INVALID_IDX; - u32 final_id = MO_INVALID_IDX; /* id reported by fdr */ + u32 final_id = MO_INVALID_IDX; // TODO: remove + u32 fragment_id = MO_INVALID_IDX; //!< ID corresponding to literal prog. bool squash_group = false; bool requires_benefits = false; }; From 79512bd5c3ea0448fb52a9651e254a7e4687a1f2 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 13 Feb 2017 16:41:08 +1100 Subject: [PATCH 087/326] rose: use fragment ids earlier for anchored dfas --- src/rose/rose_build_anchored.cpp | 40 ++++++++++++++++---------------- src/rose/rose_build_bytecode.cpp | 33 +++++++++++++++----------- src/rose/rose_build_dump.cpp | 2 +- src/rose/rose_build_impl.h | 3 ++- src/rose/rose_build_matchers.cpp | 3 ++- 5 files changed, 45 insertions(+), 36 deletions(-) diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index 258eee9c..7c8c9023 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -183,7 +183,7 @@ void remapAnchoredReports(raw_dfa &rdfa, const RoseBuildImpl &build) { flat_set new_reports; for (auto id : ds.reports) { assert(id < build.literal_info.size()); - new_reports.insert(build.literal_info.at(id).final_id); + new_reports.insert(build.literal_info.at(id).fragment_id); } ds.reports = move(new_reports); } @@ -191,7 +191,7 @@ void remapAnchoredReports(raw_dfa &rdfa, const RoseBuildImpl &build) { /** * \brief Replaces the report ids currently in the dfas (rose graph literal - * ids) with the final id for each literal. + * ids) with the fragment id for each literal. */ static void remapAnchoredReports(RoseBuildImpl &build) { @@ -208,8 +208,7 @@ void remapAnchoredReports(RoseBuildImpl &build) { * raw_dfa with program offsets. */ static -void remapIdsToPrograms(raw_dfa &rdfa, - const map &final_to_frag_map) { +void remapIdsToPrograms(const RoseBuildImpl &build, raw_dfa &rdfa) { for (dstate &ds : rdfa.states) { assert(ds.reports_eod.empty()); // Not used in anchored matcher. if (ds.reports.empty()) { @@ -217,9 +216,8 @@ void remapIdsToPrograms(raw_dfa &rdfa, } flat_set new_reports; - for (auto final_id : ds.reports) { - assert(contains(final_to_frag_map, final_id)); - auto &frag = final_to_frag_map.at(final_id); + for (auto fragment_id : ds.reports) { + auto &frag = build.fragments.at(fragment_id); new_reports.insert(frag.lit_program_offset); } ds.reports = move(new_reports); @@ -227,16 +225,18 @@ void remapIdsToPrograms(raw_dfa &rdfa, } static -void populate_holder(const simple_anchored_info &sai, const set &exit_ids, - NGHolder *h_in) { +unique_ptr populate_holder(const simple_anchored_info &sai, + const flat_set &exit_ids) { DEBUG_PRINTF("populating holder for ^.{%u,%u}%s\n", sai.min_bound, sai.max_bound, dumpString(sai.literal).c_str()); - NGHolder &h = *h_in; - set ends = addDotsToGraph(h, h.start, sai.min_bound, - sai.max_bound, CharReach::dot()); + auto h_ptr = make_unique(); + NGHolder &h = *h_ptr; + auto ends = addDotsToGraph(h, h.start, sai.min_bound, sai.max_bound, + CharReach::dot()); NFAVertex v = addToGraph(h, ends, sai.literal); add_edge(v, h.accept, h); h[v].reports.insert(exit_ids.begin(), exit_ids.end()); + return h_ptr; } u32 anchoredStateSize(const anchored_matcher_info &atable) { @@ -735,15 +735,15 @@ void buildSimpleDfas(const RoseBuildImpl &build, vector> *anchored_dfas) { /* we should have determinised all of these before so there should be no * chance of failure. */ - for (const auto &simple : build.anchored_simple) { - set exit_ids; + flat_set exit_ids; + for (const auto &simple : build.anchored_simple) { + exit_ids.clear(); for (auto lit_id : simple.second) { - exit_ids.insert(build.literal_info[lit_id].final_id); + exit_ids.insert(build.literal_info[lit_id].fragment_id); } - NGHolder h; - populate_holder(simple.first, exit_ids, &h); - Automaton_Holder autom(h); - unique_ptr rdfa = ue2::make_unique(NFA_OUTFIX_RAW); + auto h = populate_holder(simple.first, exit_ids); + Automaton_Holder autom(*h); + auto rdfa = ue2::make_unique(NFA_OUTFIX_RAW); UNUSED int rv = determinise(autom, rdfa->states, MAX_DFA_STATES); assert(!rv); rdfa->start_anchored = INIT_STATE; @@ -858,7 +858,7 @@ buildAnchoredMatcher(RoseBuildImpl &build, vector &dfas, } for (auto &rdfa : dfas) { - remapIdsToPrograms(rdfa, build.final_to_frag_map); + remapIdsToPrograms(build, rdfa); } vector> nfas; diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index a50ebb8e..03bba972 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4646,10 +4646,8 @@ rose_group getGroups(const RoseBuildImpl &build, const flat_set &lit_ids) { } static -map groupByFragment(const RoseBuildImpl &build, - const build_context &bc) { +void groupByFragment(RoseBuildImpl &build, const build_context &bc) { u32 frag_id = 0; - map final_to_frag; struct FragmentInfo { vector final_ids; @@ -4658,6 +4656,9 @@ map groupByFragment(const RoseBuildImpl &build, map frag_info; + auto &final_to_frag = build.final_to_frag_map; + auto &fragments = build.fragments; + for (const auto &m : bc.final_id_to_literal) { u32 final_id = m.first; const auto &lit_ids = m.second; @@ -4666,21 +4667,27 @@ map groupByFragment(const RoseBuildImpl &build, auto groups = getGroups(build, lit_ids); if (lit_ids.size() > 1) { - final_to_frag.emplace(final_id, LitFragment(frag_id++, groups)); + final_to_frag.emplace(final_id, frag_id); + fragments.emplace_back(frag_id, groups); + frag_id++; continue; } const auto lit_id = *lit_ids.begin(); const auto &lit = build.literals.right.at(lit_id); if (lit.s.length() < ROSE_SHORT_LITERAL_LEN_MAX) { - final_to_frag.emplace(final_id, LitFragment(frag_id++, groups)); + final_to_frag.emplace(final_id, frag_id); + fragments.emplace_back(frag_id, groups); + frag_id++; continue; } // Combining fragments that squash their groups is unsafe. const auto &info = build.literal_info[lit_id]; if (info.squash_group) { - final_to_frag.emplace(final_id, LitFragment(frag_id++, groups)); + final_to_frag.emplace(final_id, frag_id); + fragments.emplace_back(frag_id, groups); + frag_id++; continue; } @@ -4695,14 +4702,13 @@ map groupByFragment(const RoseBuildImpl &build, const auto &fi = m.second; DEBUG_PRINTF("frag %s -> ids: %s\n", dumpString(m.first.s).c_str(), as_string_list(fi.final_ids).c_str()); + fragments.emplace_back(frag_id, fi.groups); for (const auto final_id : fi.final_ids) { assert(!contains(final_to_frag, final_id)); - final_to_frag.emplace(final_id, LitFragment(frag_id, fi.groups)); + final_to_frag.emplace(final_id, frag_id); } frag_id++; } - - return final_to_frag; } /** @@ -4713,7 +4719,7 @@ void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { // Build a reverse mapping from fragment -> final_id. map> frag_to_final_map; for (const auto &m : build.final_to_frag_map) { - frag_to_final_map[m.second.fragment_id].insert(m.first); + frag_to_final_map[m.second].insert(m.first); } const u32 num_fragments = verify_u32(frag_to_final_map.size()); @@ -4736,7 +4742,8 @@ void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { } // Update LitFragment entries. - for (auto &frag : build.final_to_frag_map | map_values) { + for (const auto &fragment_id : build.final_to_frag_map | map_values) { + auto &frag = build.fragments.at(fragment_id); frag.lit_program_offset = litPrograms[frag.fragment_id]; frag.delay_program_offset = delayRebuildPrograms[frag.fragment_id]; } @@ -5407,7 +5414,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { build_context bc; allocateFinalLiteralId(*this, bc); - final_to_frag_map = groupByFragment(*this, bc); + groupByFragment(*this, bc); // Write the fragment IDs into the literal_info structures. for (auto &info : literal_info) { @@ -5415,7 +5422,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { continue; } assert(contains(final_to_frag_map, info.final_id)); - info.fragment_id = final_to_frag_map.at(info.final_id).fragment_id; + info.fragment_id = final_to_frag_map.at(info.final_id); } auto anchored_dfas = buildAnchoredDfas(*this); diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 9cab4087..92a3935b 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -1153,7 +1153,7 @@ void dumpRoseLitPrograms(const RoseBuildImpl &build, const RoseEngine *t, programs.reserve(build.final_to_frag_map.size()); for (const auto &m : build.final_to_frag_map) { - const auto &frag = m.second; + const auto &frag = build.fragments.at(m.second); if (frag.lit_program_offset) { programs.push_back(frag.lit_program_offset); } diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index bf588553..321f54d8 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -593,7 +593,8 @@ public: * overlap calculation in history assignment. */ std::map anchoredLitSuffix; - std::map final_to_frag_map; + std::map final_to_frag_map; + std::vector fragments; unordered_set transient; unordered_map rose_squash_masks; diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index cd88c980..3ecec9d8 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -741,7 +741,8 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, for (auto &lit : mp.lits) { u32 final_id = lit.id; assert(contains(build.final_to_frag_map, final_id)); - const auto &frag = build.final_to_frag_map.at(final_id); + const auto &frag = + build.fragments.at(build.final_to_frag_map.at(final_id)); lit.id = delay_rebuild ? frag.delay_program_offset : frag.lit_program_offset; lit.groups = frag.groups; From ea8d0bcb1c76fef8fde1c9c7f2ebfd2f4729d205 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 14 Feb 2017 09:14:58 +1100 Subject: [PATCH 088/326] rose: build fragments directly --- src/rose/rose_build_bytecode.cpp | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 03bba972..da0195e9 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4722,31 +4722,18 @@ void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { frag_to_final_map[m.second].insert(m.first); } - const u32 num_fragments = verify_u32(frag_to_final_map.size()); - DEBUG_PRINTF("%u fragments\n", num_fragments); - + DEBUG_PRINTF("%zu fragments\n", build.fragments.size()); auto lit_edge_map = findEdgesByLiteral(build); - vector litPrograms(num_fragments); - vector delayRebuildPrograms(num_fragments); - - for (u32 frag_id = 0; frag_id != num_fragments; ++frag_id) { - const auto &final_ids = frag_to_final_map[frag_id]; - DEBUG_PRINTF("frag_id=%u, final_ids=[%s]\n", frag_id, + for (auto &frag : build.fragments) { + const auto &final_ids = frag_to_final_map[frag.fragment_id]; + DEBUG_PRINTF("frag_id=%u, final_ids=[%s]\n", frag.fragment_id, as_string_list(final_ids).c_str()); - - litPrograms[frag_id] = + frag.lit_program_offset = writeLiteralProgram(build, bc, final_ids, lit_edge_map); - delayRebuildPrograms[frag_id] = + frag.delay_program_offset = buildDelayRebuildProgram(build, bc, final_ids); } - - // Update LitFragment entries. - for (const auto &fragment_id : build.final_to_frag_map | map_values) { - auto &frag = build.fragments.at(fragment_id); - frag.lit_program_offset = litPrograms[frag.fragment_id]; - frag.delay_program_offset = delayRebuildPrograms[frag.fragment_id]; - } } static From c426d2dc7dd11ca2fdcd3e2537577f8cad07131a Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 14 Feb 2017 10:19:25 +1100 Subject: [PATCH 089/326] rose: reduce anchored program dep on final_id We only need to build anchored programs for cases where a RECORD_ANCHORED instruction has been generated, and we can key those directly rather than using final_id. --- src/rose/program_runtime.h | 11 ++++------- src/rose/rose_build_bytecode.cpp | 23 ++++++++++++++++++----- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 7172f6aa..d67c307f 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -103,7 +103,7 @@ void rosePushDelayedMatch(const struct RoseEngine *t, static rose_inline void recordAnchoredLiteralMatch(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 literal_id, + struct hs_scratch *scratch, u32 anch_id, u64a end) { assert(end); @@ -113,7 +113,7 @@ void recordAnchoredLiteralMatch(const struct RoseEngine *t, struct fatbit **anchoredLiteralRows = getAnchoredLiteralLog(scratch); - DEBUG_PRINTF("record %u @ %llu\n", literal_id, end); + DEBUG_PRINTF("record %u (of %u) @ %llu\n", anch_id, t->anchored_count, end); if (!bf64_set(&scratch->al_log_sum, end - 1)) { // first time, clear row @@ -121,11 +121,8 @@ void recordAnchoredLiteralMatch(const struct RoseEngine *t, fatbit_clear(anchoredLiteralRows[end - 1]); } - u32 rel_idx = literal_id - t->anchored_base_id; - DEBUG_PRINTF("record %u @ %llu index %u/%u\n", literal_id, end, rel_idx, - t->anchored_count); - assert(rel_idx < t->anchored_count); - fatbit_set(anchoredLiteralRows[end - 1], t->anchored_count, rel_idx); + assert(anch_id < t->anchored_count); + fatbit_set(anchoredLiteralRows[end - 1], t->anchored_count, anch_id); } static rose_inline diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index da0195e9..c050e683 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -246,6 +246,9 @@ struct build_context : boost::noncopyable { /** \brief Mapping from final ID to the set of literals it is used for. */ map> final_id_to_literal; + + /** \brief Mapping from final ID to anchored program index. */ + map anchored_programs; }; /** \brief subengine info including built engine and @@ -4269,7 +4272,14 @@ void makeRecordAnchoredInstruction(const RoseBuildImpl &build, return; } - program.add_before_end(make_unique(final_id)); + auto it = bc.anchored_programs.find(final_id); + if (it == bc.anchored_programs.end()) { + u32 anch_id = verify_u32(bc.anchored_programs.size()); + it = bc.anchored_programs.emplace(final_id, anch_id).first; + DEBUG_PRINTF("added anch_id=%u for final_id %u\n", anch_id, final_id); + } + u32 anch_id = it->second; + program.add_before_end(make_unique(anch_id)); } static @@ -4757,11 +4767,14 @@ u32 buildAnchoredPrograms(RoseBuildImpl &build, build_context &bc) { auto lit_edge_map = findEdgesByLiteral(build); vector programs; + programs.resize(bc.anchored_programs.size(), ROSE_INVALID_PROG_OFFSET); - for (u32 final_id = build.anchored_base_id; - final_id < build.delay_base_id; final_id++) { + for (const auto &m : bc.anchored_programs) { + u32 final_id = m.first; + u32 anch_id = m.second; u32 offset = writeLiteralProgram(build, bc, {final_id}, lit_edge_map); - programs.push_back(offset); + DEBUG_PRINTF("final_id %u -> anch prog at %u\n", final_id, offset); + programs[anch_id] = offset; } DEBUG_PRINTF("%zu anchored programs\n", programs.size()); @@ -5704,7 +5717,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->delay_fatbit_size = fatbit_size(engine->delay_count); engine->delay_base_id = delay_base_id; engine->anchored_base_id = anchored_base_id; - engine->anchored_count = delay_base_id - anchored_base_id; + engine->anchored_count = bc.anchored_programs.size(); engine->anchored_fatbit_size = fatbit_size(engine->anchored_count); engine->rosePrefixCount = rosePrefixCount; From dc8220648cf983bd911dc61a43f870110dadf2e7 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 14 Feb 2017 10:23:53 +1100 Subject: [PATCH 090/326] rose: remove now-unused anchored_base_id --- src/rose/rose_build_bytecode.cpp | 2 -- src/rose/rose_build_dump.cpp | 1 - src/rose/rose_build_impl.h | 2 -- src/rose/rose_build_misc.cpp | 1 - src/rose/rose_internal.h | 2 -- 5 files changed, 8 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index c050e683..b22f9149 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -5293,7 +5293,6 @@ void allocateFinalLiteralId(RoseBuildImpl &build, build_context &bc) { allocateFinalIdToSet(build, bc, norm, &next_final_id); /* next anchored stuff */ - build.anchored_base_id = next_final_id; allocateFinalIdToSet(build, bc, anch, &next_final_id); /* delayed ids come last */ @@ -5716,7 +5715,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { verify_u32(bc.final_id_to_literal.size() - delay_base_id); engine->delay_fatbit_size = fatbit_size(engine->delay_count); engine->delay_base_id = delay_base_id; - engine->anchored_base_id = anchored_base_id; engine->anchored_count = bc.anchored_programs.size(); engine->anchored_fatbit_size = fatbit_size(engine->anchored_count); diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 92a3935b..45fa7ece 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -1821,7 +1821,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, delay_base_id); DUMP_U32(t, anchored_count); DUMP_U32(t, anchored_fatbit_size); - DUMP_U32(t, anchored_base_id); DUMP_U32(t, maxFloatingDelayedMatch); DUMP_U32(t, delayRebuildLength); DUMP_U32(t, stateOffsets.history); diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 321f54d8..6da80c98 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -578,8 +578,6 @@ public: std::map > group_to_literal; u32 group_end; - u32 anchored_base_id; - u32 ematcher_region_size; /**< number of bytes the eod table runs over */ /** \brief Mapping from leftfix to queue ID (used in dump code). */ diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 549cc4f1..3eca20c5 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -78,7 +78,6 @@ RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in, delay_base_id(MO_INVALID_IDX), hasSom(false), group_end(0), - anchored_base_id(MO_INVALID_IDX), ematcher_region_size(0), eod_event_literal_id(MO_INVALID_IDX), max_rose_anchored_floating_overlap(0), diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 9ef8b0a1..ff3dd726 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -421,8 +421,6 @@ struct RoseEngine { * delayed literal ids are contiguous */ u32 anchored_count; /* number of anchored literal ids */ u32 anchored_fatbit_size; //!< size of each anch fatbit in scratch (bytes) - u32 anchored_base_id; /* literal id of the first literal in the A table. - * anchored literal ids are contiguous */ u32 maxFloatingDelayedMatch; /* max offset that a delayed literal can * usefully be reported */ u32 delayRebuildLength; /* length of the history region which needs to be From 6a945e27fbc3db75177162998eb81303b0f3157c Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 14 Feb 2017 11:11:10 +1100 Subject: [PATCH 091/326] rose: reduce delay program dep on final_id --- src/rose/rose_build_bytecode.cpp | 43 +++++++++++++++++++------------- src/rose/rose_build_dump.cpp | 2 -- src/rose/rose_build_impl.h | 1 - src/rose/rose_build_misc.cpp | 1 - src/rose/rose_internal.h | 2 -- 5 files changed, 26 insertions(+), 23 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index b22f9149..a673d07b 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -249,6 +249,9 @@ struct build_context : boost::noncopyable { /** \brief Mapping from final ID to anchored program index. */ map anchored_programs; + + /** \brief Mapping from final ID to delayed program index. */ + map delay_programs; }; /** \brief subengine info including built engine and @@ -4141,6 +4144,7 @@ void addPredBlocks(build_context &bc, map &pred_blocks, static void makePushDelayedInstructions(const RoseBuildImpl &build, + build_context &bc, const flat_set &lit_ids, RoseProgram &program) { assert(!lit_ids.empty()); @@ -4152,9 +4156,16 @@ void makePushDelayedInstructions(const RoseBuildImpl &build, for (const auto &int_id : arb_lit_info.delayed_ids) { const auto &child_literal = build.literals.right.at(int_id); u32 child_id = build.literal_info[int_id].final_id; - u32 delay_index = child_id - build.delay_base_id; - DEBUG_PRINTF("delay=%u child_id=%u\n", child_literal.delay, child_id); + auto it = bc.delay_programs.find(child_id); + if (it == bc.delay_programs.end()) { + u32 delay_index = verify_u32(bc.delay_programs.size()); + it = bc.delay_programs.emplace(child_id, delay_index).first; + } + u32 delay_index = it->second; + + DEBUG_PRINTF("delay=%u, child_id=%u, delay_index=%u\n", + child_literal.delay, child_id, delay_index); auto ri = make_unique( verify_u8(child_literal.delay), delay_index); @@ -4461,7 +4472,7 @@ RoseProgram buildLitInitialProgram(RoseBuildImpl &build, build_context &bc, } // Add instructions for pushing delayed matches, if there are any. - makePushDelayedInstructions(build, lit_ids, program); + makePushDelayedInstructions(build, bc, lit_ids, program); // Add pre-check for early literals in the floating table. makeCheckLitEarlyInstruction(build, bc, lit_ids, lit_edges, program); @@ -4586,7 +4597,7 @@ u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc, RoseProgram prog; makeCheckLiteralInstruction(build, bc, lit_ids, prog); makeCheckLitMaskInstruction(build, bc, lit_ids, prog); - makePushDelayedInstructions(build, lit_ids, prog); + makePushDelayedInstructions(build, bc, lit_ids, prog); program.add_block(move(prog)); } @@ -4750,15 +4761,17 @@ static u32 buildDelayPrograms(RoseBuildImpl &build, build_context &bc) { auto lit_edge_map = findEdgesByLiteral(build); - vector programs; + vector programs(bc.delay_programs.size(), ROSE_INVALID_PROG_OFFSET); + DEBUG_PRINTF("%zu delay programs\n", programs.size()); - for (u32 final_id = build.delay_base_id; - final_id < bc.final_id_to_literal.size(); final_id++) { + for (const auto &m : bc.delay_programs) { + u32 final_id = m.first; + u32 delay_id = m.second; u32 offset = writeLiteralProgram(build, bc, {final_id}, lit_edge_map); - programs.push_back(offset); + DEBUG_PRINTF("delay_id=%u, offset=%u\n", delay_id, offset); + programs[delay_id] = offset; } - DEBUG_PRINTF("%zu delay programs\n", programs.size()); return bc.engine_blob.add(begin(programs), end(programs)); } @@ -4766,8 +4779,8 @@ static u32 buildAnchoredPrograms(RoseBuildImpl &build, build_context &bc) { auto lit_edge_map = findEdgesByLiteral(build); - vector programs; - programs.resize(bc.anchored_programs.size(), ROSE_INVALID_PROG_OFFSET); + vector programs(bc.anchored_programs.size(), ROSE_INVALID_PROG_OFFSET); + DEBUG_PRINTF("%zu anchored programs\n", programs.size()); for (const auto &m : bc.anchored_programs) { u32 final_id = m.first; @@ -4777,7 +4790,6 @@ u32 buildAnchoredPrograms(RoseBuildImpl &build, build_context &bc) { programs[anch_id] = offset; } - DEBUG_PRINTF("%zu anchored programs\n", programs.size()); return bc.engine_blob.add(begin(programs), end(programs)); } @@ -5296,7 +5308,6 @@ void allocateFinalLiteralId(RoseBuildImpl &build, build_context &bc) { allocateFinalIdToSet(build, bc, anch, &next_final_id); /* delayed ids come last */ - build.delay_base_id = next_final_id; allocateFinalIdToSet(build, bc, delay, &next_final_id); } @@ -5711,11 +5722,9 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->lastByteHistoryIterOffset = lastByteOffset; - engine->delay_count = - verify_u32(bc.final_id_to_literal.size() - delay_base_id); + engine->delay_count = verify_u32(bc.delay_programs.size()); engine->delay_fatbit_size = fatbit_size(engine->delay_count); - engine->delay_base_id = delay_base_id; - engine->anchored_count = bc.anchored_programs.size(); + engine->anchored_count = verify_u32(bc.anchored_programs.size()); engine->anchored_fatbit_size = fatbit_size(engine->anchored_count); engine->rosePrefixCount = rosePrefixCount; diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 45fa7ece..b28bdbd6 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -1710,7 +1710,6 @@ void roseDumpText(const RoseEngine *t, FILE *f) { rose_off(t->maxBiAnchoredWidth).str().c_str()); fprintf(f, " minFloatLitMatchOffset : %s\n", rose_off(t->floatingMinLiteralMatchOffset).str().c_str()); - fprintf(f, " delay_base_id : %u\n", t->delay_base_id); fprintf(f, " maxFloatingDelayedMatch : %s\n", rose_off(t->maxFloatingDelayedMatch).str().c_str()); @@ -1818,7 +1817,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, size); DUMP_U32(t, delay_count); DUMP_U32(t, delay_fatbit_size); - DUMP_U32(t, delay_base_id); DUMP_U32(t, anchored_count); DUMP_U32(t, anchored_fatbit_size); DUMP_U32(t, maxFloatingDelayedMatch); diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 6da80c98..b7bf4f19 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -571,7 +571,6 @@ public: return next_nfa_report++; } std::deque literal_info; - u32 delay_base_id; bool hasSom; //!< at least one pattern requires SOM. std::map>> anchored_nfas; std::map> anchored_simple; diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 3eca20c5..5173596f 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -75,7 +75,6 @@ RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in, : cc(cc_in), root(add_vertex(g)), anchored_root(add_vertex(g)), - delay_base_id(MO_INVALID_IDX), hasSom(false), group_end(0), ematcher_region_size(0), diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index ff3dd726..48f15ff0 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -417,8 +417,6 @@ struct RoseEngine { u32 size; // (bytes) u32 delay_count; /* number of delayed literal ids. */ u32 delay_fatbit_size; //!< size of each delay fatbit in scratch (bytes) - u32 delay_base_id; /* literal id of the first delayed literal. - * delayed literal ids are contiguous */ u32 anchored_count; /* number of anchored literal ids */ u32 anchored_fatbit_size; //!< size of each anch fatbit in scratch (bytes) u32 maxFloatingDelayedMatch; /* max offset that a delayed literal can From f5dd20e4617d0b5d1a56430a1ac45f1665688b46 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 15 Feb 2017 13:54:09 +1100 Subject: [PATCH 092/326] rose: rearrange anchored program generation --- src/rose/rose_build_bytecode.cpp | 80 ++++++++++++++++++-------------- 1 file changed, 46 insertions(+), 34 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index a673d07b..1128d896 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -247,7 +247,7 @@ struct build_context : boost::noncopyable { /** \brief Mapping from final ID to the set of literals it is used for. */ map> final_id_to_literal; - /** \brief Mapping from final ID to anchored program index. */ + /** \brief Mapping from Rose literal ID to anchored program index. */ map anchored_programs; /** \brief Mapping from final ID to delayed program index. */ @@ -4260,37 +4260,26 @@ u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) { static void makeRecordAnchoredInstruction(const RoseBuildImpl &build, - build_context &bc, u32 final_id, + build_context &bc, + const flat_set &lit_ids, RoseProgram &program) { - assert(contains(bc.final_id_to_literal, final_id)); - const auto &lit_ids = bc.final_id_to_literal.at(final_id); + assert(!lit_ids.empty()); + u32 first_lit_id = *begin(lit_ids); // Must be anchored. - assert(!lit_ids.empty()); - if (build.literals.right.at(*begin(lit_ids)).table != ROSE_ANCHORED) { + if (build.literals.right.at(first_lit_id).table != ROSE_ANCHORED) { return; } - // If this anchored literal can never match past - // floatingMinLiteralMatchOffset, we will never have to record it. - u32 max_offset = 0; - for (u32 lit_id : lit_ids) { + for (const auto &lit_id : lit_ids) { assert(build.literals.right.at(lit_id).table == ROSE_ANCHORED); - max_offset = max(max_offset, findMaxOffset(build, lit_id)); + if (!contains(bc.anchored_programs, lit_id)) { + continue; + } + u32 anch_id = bc.anchored_programs.at(lit_id); + DEBUG_PRINTF("adding RECORD_ANCHORED for anch_id=%u\n", anch_id); + program.add_before_end(make_unique(anch_id)); } - - if (max_offset <= bc.floatingMinLiteralMatchOffset) { - return; - } - - auto it = bc.anchored_programs.find(final_id); - if (it == bc.anchored_programs.end()) { - u32 anch_id = verify_u32(bc.anchored_programs.size()); - it = bc.anchored_programs.emplace(final_id, anch_id).first; - DEBUG_PRINTF("added anch_id=%u for final_id %u\n", anch_id, final_id); - } - u32 anch_id = it->second; - program.add_before_end(make_unique(anch_id)); } static @@ -4529,7 +4518,7 @@ RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, makeGroupSquashInstruction(build, lit_ids, root_block); // Literal may be anchored and need to be recorded. - makeRecordAnchoredInstruction(build, bc, final_id, root_block); + makeRecordAnchoredInstruction(build, bc, lit_ids, root_block); program.add_block(move(root_block)); } @@ -4776,20 +4765,42 @@ u32 buildDelayPrograms(RoseBuildImpl &build, build_context &bc) { } static -u32 buildAnchoredPrograms(RoseBuildImpl &build, build_context &bc) { +u32 writeAnchoredPrograms(RoseBuildImpl &build, build_context &bc) { auto lit_edge_map = findEdgesByLiteral(build); - vector programs(bc.anchored_programs.size(), ROSE_INVALID_PROG_OFFSET); - DEBUG_PRINTF("%zu anchored programs\n", programs.size()); + vector programs; + + for (const auto &m : build.literals.right) { + u32 lit_id = m.first; + const auto &lit = m.second; + + if (lit.table != ROSE_ANCHORED) { + continue; + } + + u32 final_id = build.literal_info.at(lit_id).final_id; + if (final_id == MO_INVALID_IDX) { + continue; + } + + // If this anchored literal can never match past + // floatingMinLiteralMatchOffset, we will never have to record it. + if (findMaxOffset(build, lit_id) <= bc.floatingMinLiteralMatchOffset) { + DEBUG_PRINTF("can never match after " + "floatingMinLiteralMatchOffset=%u\n", + bc.floatingMinLiteralMatchOffset); + continue; + } - for (const auto &m : bc.anchored_programs) { - u32 final_id = m.first; - u32 anch_id = m.second; u32 offset = writeLiteralProgram(build, bc, {final_id}, lit_edge_map); - DEBUG_PRINTF("final_id %u -> anch prog at %u\n", final_id, offset); - programs[anch_id] = offset; + DEBUG_PRINTF("lit_id=%u, final_id %u -> anch prog at %u\n", lit_id, + final_id, offset); + u32 anch_id = verify_u32(programs.size()); + programs.push_back(offset); + bc.anchored_programs.emplace(lit_id, anch_id); } + DEBUG_PRINTF("%zu anchored programs\n", programs.size()); return bc.engine_blob.add(begin(programs), end(programs)); } @@ -5494,9 +5505,10 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { queue_count - leftfixBeginQueue, leftInfoTable, &laggedRoseCount, &historyRequired); + u32 anchoredProgramOffset = writeAnchoredPrograms(*this, bc); + buildLiteralPrograms(*this, bc); u32 delayProgramOffset = buildDelayPrograms(*this, bc); - u32 anchoredProgramOffset = buildAnchoredPrograms(*this, bc); u32 eodProgramOffset = writeEodProgram(*this, bc, eodNfaIterOffset); From 75c7f4231420f521c782f88ce467633c08d4467c Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 15 Feb 2017 14:03:30 +1100 Subject: [PATCH 093/326] rose: don't emit RECORD_ANCHORED in anchored progs --- src/rose/rose_build_bytecode.cpp | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 1128d896..81bd3af0 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4471,8 +4471,8 @@ RoseProgram buildLitInitialProgram(RoseBuildImpl &build, build_context &bc, static RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, - u32 final_id, - const vector &lit_edges) { + u32 final_id, const vector &lit_edges, + bool is_anchored_program) { const auto &g = build.g; DEBUG_PRINTF("final id %u, %zu lit edges\n", final_id, lit_edges.size()); @@ -4518,7 +4518,9 @@ RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, makeGroupSquashInstruction(build, lit_ids, root_block); // Literal may be anchored and need to be recorded. - makeRecordAnchoredInstruction(build, bc, lit_ids, root_block); + if (!is_anchored_program) { + makeRecordAnchoredInstruction(build, bc, lit_ids, root_block); + } program.add_block(move(root_block)); } @@ -4533,7 +4535,8 @@ RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, static RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, const flat_set &final_ids, - const map> &lit_edges) { + const map> &lit_edges, + bool is_anchored_program) { assert(!final_ids.empty()); DEBUG_PRINTF("entry, %zu final ids\n", final_ids.size()); @@ -4545,7 +4548,8 @@ RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, if (contains(lit_edges, final_id)) { edges_ptr = &(lit_edges.at(final_id)); } - auto prog = buildLiteralProgram(build, bc, final_id, *edges_ptr); + auto prog = buildLiteralProgram(build, bc, final_id, *edges_ptr, + is_anchored_program); DEBUG_PRINTF("final_id=%u, prog has %zu entries\n", final_id, prog.size()); program.add_block(move(prog)); @@ -4556,8 +4560,10 @@ RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, static u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc, const flat_set &final_ids, - const map> &lit_edges) { - RoseProgram program = buildLiteralProgram(build, bc, final_ids, lit_edges); + const map> &lit_edges, + bool is_anchored_program) { + RoseProgram program = buildLiteralProgram(build, bc, final_ids, lit_edges, + is_anchored_program); if (program.empty()) { return 0; } @@ -4740,7 +4746,7 @@ void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { DEBUG_PRINTF("frag_id=%u, final_ids=[%s]\n", frag.fragment_id, as_string_list(final_ids).c_str()); frag.lit_program_offset = - writeLiteralProgram(build, bc, final_ids, lit_edge_map); + writeLiteralProgram(build, bc, final_ids, lit_edge_map, false); frag.delay_program_offset = buildDelayRebuildProgram(build, bc, final_ids); } @@ -4756,7 +4762,8 @@ u32 buildDelayPrograms(RoseBuildImpl &build, build_context &bc) { for (const auto &m : bc.delay_programs) { u32 final_id = m.first; u32 delay_id = m.second; - u32 offset = writeLiteralProgram(build, bc, {final_id}, lit_edge_map); + u32 offset = + writeLiteralProgram(build, bc, {final_id}, lit_edge_map, false); DEBUG_PRINTF("delay_id=%u, offset=%u\n", delay_id, offset); programs[delay_id] = offset; } @@ -4792,7 +4799,8 @@ u32 writeAnchoredPrograms(RoseBuildImpl &build, build_context &bc) { continue; } - u32 offset = writeLiteralProgram(build, bc, {final_id}, lit_edge_map); + u32 offset = + writeLiteralProgram(build, bc, {final_id}, lit_edge_map, true); DEBUG_PRINTF("lit_id=%u, final_id %u -> anch prog at %u\n", lit_id, final_id, offset); u32 anch_id = verify_u32(programs.size()); @@ -4983,7 +4991,7 @@ void addEodEventProgram(RoseBuildImpl &build, build_context &bc, }); program.add_block( - buildLiteralProgram(build, bc, MO_INVALID_IDX, edge_list)); + buildLiteralProgram(build, bc, MO_INVALID_IDX, edge_list, false)); } static From a2d2f7cb95909ffacd1675e73173a24bfdc90c66 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 16 Feb 2017 09:19:44 +1100 Subject: [PATCH 094/326] rose: dedupe anch programs and RECORD_ANCHOREDs --- src/rose/rose_build_bytecode.cpp | 44 ++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 81bd3af0..7f00a4ce 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4271,12 +4271,18 @@ void makeRecordAnchoredInstruction(const RoseBuildImpl &build, return; } + // Dedupe anch_ids to fire. + flat_set anch_ids; + for (const auto &lit_id : lit_ids) { assert(build.literals.right.at(lit_id).table == ROSE_ANCHORED); if (!contains(bc.anchored_programs, lit_id)) { continue; } - u32 anch_id = bc.anchored_programs.at(lit_id); + anch_ids.insert(bc.anchored_programs.at(lit_id)); + } + + for (const auto &anch_id : anch_ids) { DEBUG_PRINTF("adding RECORD_ANCHORED for anch_id=%u\n", anch_id); program.add_before_end(make_unique(anch_id)); } @@ -4771,11 +4777,18 @@ u32 buildDelayPrograms(RoseBuildImpl &build, build_context &bc) { return bc.engine_blob.add(begin(programs), end(programs)); } +/** + * \brief Write anchored replay programs to the bytecode. + * + * Returns the offset of the beginning of the program array, and the number of + * programs. + */ static -u32 writeAnchoredPrograms(RoseBuildImpl &build, build_context &bc) { +pair writeAnchoredPrograms(RoseBuildImpl &build, build_context &bc) { auto lit_edge_map = findEdgesByLiteral(build); - vector programs; + vector programs; // program offsets indexed by anchored id + unordered_map cache; // program offsets we have already seen for (const auto &m : build.literals.right) { u32 lit_id = m.first; @@ -4803,13 +4816,25 @@ u32 writeAnchoredPrograms(RoseBuildImpl &build, build_context &bc) { writeLiteralProgram(build, bc, {final_id}, lit_edge_map, true); DEBUG_PRINTF("lit_id=%u, final_id %u -> anch prog at %u\n", lit_id, final_id, offset); - u32 anch_id = verify_u32(programs.size()); - programs.push_back(offset); + + u32 anch_id; + auto it = cache.find(offset); + if (it != end(cache)) { + anch_id = it->second; + DEBUG_PRINTF("reusing anch_id %u for offset %u\n", anch_id, offset); + } else { + anch_id = verify_u32(programs.size()); + programs.push_back(offset); + cache.emplace(offset, anch_id); + DEBUG_PRINTF("assigned new anch_id %u for offset %u\n", anch_id, + offset); + } bc.anchored_programs.emplace(lit_id, anch_id); } DEBUG_PRINTF("%zu anchored programs\n", programs.size()); - return bc.engine_blob.add(begin(programs), end(programs)); + return {bc.engine_blob.add(begin(programs), end(programs)), + verify_u32(programs.size())}; } /** @@ -5513,7 +5538,10 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { queue_count - leftfixBeginQueue, leftInfoTable, &laggedRoseCount, &historyRequired); - u32 anchoredProgramOffset = writeAnchoredPrograms(*this, bc); + u32 anchoredProgramOffset; + u32 anchoredProgramCount; + tie(anchoredProgramOffset, anchoredProgramCount) = + writeAnchoredPrograms(*this, bc); buildLiteralPrograms(*this, bc); u32 delayProgramOffset = buildDelayPrograms(*this, bc); @@ -5744,7 +5772,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->delay_count = verify_u32(bc.delay_programs.size()); engine->delay_fatbit_size = fatbit_size(engine->delay_count); - engine->anchored_count = verify_u32(bc.anchored_programs.size()); + engine->anchored_count = anchoredProgramCount; engine->anchored_fatbit_size = fatbit_size(engine->anchored_count); engine->rosePrefixCount = rosePrefixCount; From cea8f452f2b4da5d73300f9bfd51358da1e0e6bc Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 16 Feb 2017 10:47:24 +1100 Subject: [PATCH 095/326] rose: reorganise delay program generation --- src/rose/rose_build_bytecode.cpp | 96 +++++++++++++++++++++----------- 1 file changed, 63 insertions(+), 33 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 7f00a4ce..68e88971 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -250,7 +250,7 @@ struct build_context : boost::noncopyable { /** \brief Mapping from Rose literal ID to anchored program index. */ map anchored_programs; - /** \brief Mapping from final ID to delayed program index. */ + /** \brief Mapping from Rose literal ID to delayed program index. */ map delay_programs; }; @@ -4148,28 +4148,31 @@ void makePushDelayedInstructions(const RoseBuildImpl &build, const flat_set &lit_ids, RoseProgram &program) { assert(!lit_ids.empty()); - const auto &arb_lit_info = build.literal_info.at(*lit_ids.begin()); - if (arb_lit_info.delayed_ids.empty()) { - return; + + vector delay_instructions; + + for (const auto &lit_id : lit_ids) { + const auto &info = build.literal_info.at(lit_id); + for (const auto &delayed_lit_id : info.delayed_ids) { + DEBUG_PRINTF("delayed lit id %u\n", delayed_lit_id); + assert(contains(bc.delay_programs, delayed_lit_id)); + u32 delay_id = bc.delay_programs.at(delayed_lit_id); + const auto &delay_lit = build.literals.right.at(delayed_lit_id); + delay_instructions.emplace_back(verify_u8(delay_lit.delay), + delay_id); + } } - for (const auto &int_id : arb_lit_info.delayed_ids) { - const auto &child_literal = build.literals.right.at(int_id); - u32 child_id = build.literal_info[int_id].final_id; + sort(begin(delay_instructions), end(delay_instructions), + [](const RoseInstrPushDelayed &a, const RoseInstrPushDelayed &b) { + return tie(a.delay, a.index) < tie(b.delay, b.index); + }); + delay_instructions.erase( + unique(begin(delay_instructions), end(delay_instructions)), + end(delay_instructions)); - auto it = bc.delay_programs.find(child_id); - if (it == bc.delay_programs.end()) { - u32 delay_index = verify_u32(bc.delay_programs.size()); - it = bc.delay_programs.emplace(child_id, delay_index).first; - } - u32 delay_index = it->second; - - DEBUG_PRINTF("delay=%u, child_id=%u, delay_index=%u\n", - child_literal.delay, child_id, delay_index); - - auto ri = make_unique( - verify_u8(child_literal.delay), delay_index); - program.add_before_end(move(ri)); + for (const auto &ri : delay_instructions) { + program.add_before_end(make_unique(ri)); } } @@ -4758,23 +4761,47 @@ void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { } } +/** + * \brief Write delay replay programs to the bytecode. + * + * Returns the offset of the beginning of the program array, and the number of + * programs. + */ static -u32 buildDelayPrograms(RoseBuildImpl &build, build_context &bc) { +pair writeDelayPrograms(RoseBuildImpl &build, build_context &bc) { auto lit_edge_map = findEdgesByLiteral(build); - vector programs(bc.delay_programs.size(), ROSE_INVALID_PROG_OFFSET); - DEBUG_PRINTF("%zu delay programs\n", programs.size()); + vector programs; // program offsets indexed by (delayed) lit id + unordered_map cache; // program offsets we have already seen - for (const auto &m : bc.delay_programs) { - u32 final_id = m.first; - u32 delay_id = m.second; - u32 offset = - writeLiteralProgram(build, bc, {final_id}, lit_edge_map, false); - DEBUG_PRINTF("delay_id=%u, offset=%u\n", delay_id, offset); - programs[delay_id] = offset; + for (const auto &lit_id : build.literals.right | map_keys) { + const auto &info = build.literal_info.at(lit_id); + for (const auto &delayed_lit_id : info.delayed_ids) { + DEBUG_PRINTF("lit id %u delay id %u\n", lit_id, delayed_lit_id); + u32 final_id = build.literal_info.at(delayed_lit_id).final_id; + u32 offset = + writeLiteralProgram(build, bc, {final_id}, lit_edge_map, false); + + u32 delay_id; + auto it = cache.find(offset); + if (it != end(cache)) { + delay_id = it->second; + DEBUG_PRINTF("reusing delay_id %u for offset %u\n", delay_id, + offset); + } else { + delay_id = verify_u32(programs.size()); + programs.push_back(offset); + cache.emplace(offset, delay_id); + DEBUG_PRINTF("assigned new delay_id %u for offset %u\n", + delay_id, offset); + } + bc.delay_programs.emplace(delayed_lit_id, delay_id); + } } - return bc.engine_blob.add(begin(programs), end(programs)); + DEBUG_PRINTF("%zu delay programs\n", programs.size()); + return {bc.engine_blob.add(begin(programs), end(programs)), + verify_u32(programs.size())}; } /** @@ -5543,8 +5570,11 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { tie(anchoredProgramOffset, anchoredProgramCount) = writeAnchoredPrograms(*this, bc); + u32 delayProgramOffset; + u32 delayProgramCount; + tie(delayProgramOffset, delayProgramCount) = writeDelayPrograms(*this, bc); + buildLiteralPrograms(*this, bc); - u32 delayProgramOffset = buildDelayPrograms(*this, bc); u32 eodProgramOffset = writeEodProgram(*this, bc, eodNfaIterOffset); @@ -5770,7 +5800,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->lastByteHistoryIterOffset = lastByteOffset; - engine->delay_count = verify_u32(bc.delay_programs.size()); + engine->delay_count = delayProgramCount; engine->delay_fatbit_size = fatbit_size(engine->delay_count); engine->anchored_count = anchoredProgramCount; engine->anchored_fatbit_size = fatbit_size(engine->anchored_count); From dc50ab291b4da2e5b8d0c1742a4985ec9a5fbd6e Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 16 Feb 2017 11:03:05 +1100 Subject: [PATCH 096/326] container: allow sort_and_unique to have a comparator --- src/rose/rose_build_bytecode.cpp | 23 +++++++++-------------- src/util/container.h | 6 +++--- 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 68e88971..0a872ae4 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -3658,13 +3658,11 @@ void makeRoleInfixTriggers(RoseBuildImpl &build, build_context &bc, } // Order, de-dupe and add instructions to the end of program. - sort(begin(infix_program), end(infix_program), - [](const RoseInstrTriggerInfix &a, const RoseInstrTriggerInfix &b) { - return tie(a.cancel, a.queue, a.event) < - tie(b.cancel, b.queue, b.event); - }); - infix_program.erase(unique(begin(infix_program), end(infix_program)), - end(infix_program)); + sort_and_unique(infix_program, [](const RoseInstrTriggerInfix &a, + const RoseInstrTriggerInfix &b) { + return tie(a.cancel, a.queue, a.event) < + tie(b.cancel, b.queue, b.event); + }); for (const auto &ri : infix_program) { program.add_before_end(make_unique(ri)); } @@ -4163,13 +4161,10 @@ void makePushDelayedInstructions(const RoseBuildImpl &build, } } - sort(begin(delay_instructions), end(delay_instructions), - [](const RoseInstrPushDelayed &a, const RoseInstrPushDelayed &b) { - return tie(a.delay, a.index) < tie(b.delay, b.index); - }); - delay_instructions.erase( - unique(begin(delay_instructions), end(delay_instructions)), - end(delay_instructions)); + sort_and_unique(delay_instructions, [](const RoseInstrPushDelayed &a, + const RoseInstrPushDelayed &b) { + return tie(a.delay, a.index) < tie(b.delay, b.index); + }); for (const auto &ri : delay_instructions) { program.add_before_end(make_unique(ri)); diff --git a/src/util/container.h b/src/util/container.h index e8a16418..68f60e99 100644 --- a/src/util/container.h +++ b/src/util/container.h @@ -90,9 +90,9 @@ auto make_vector_from(const std::pair &range) } /** \brief Sort a sequence container and remove duplicates. */ -template -void sort_and_unique(C &container) { - std::sort(std::begin(container), std::end(container)); +template > +void sort_and_unique(C &container, Compare comp = Compare()) { + std::sort(std::begin(container), std::end(container), comp); container.erase(std::unique(std::begin(container), std::end(container)), std::end(container)); } From bcbd85ab672f71ebe06e2a715a997c2bf53c8b90 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 16 Feb 2017 15:39:11 +1100 Subject: [PATCH 097/326] rose: dump support for delay programs --- src/rose/rose_build_dump.cpp | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index b28bdbd6..af8c96a7 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -1232,6 +1232,32 @@ void dumpRoseAnchoredPrograms(const RoseEngine *t, const string &filename) { } else { os << "" << endl; } + os << endl; + } + + os.close(); +} + +static +void dumpRoseDelayPrograms(const RoseEngine *t, const string &filename) { + ofstream os(filename); + + const u32 *programs = + (const u32 *)loadFromByteCodeOffset(t, t->delayProgramOffset); + + for (u32 i = 0; i < t->delay_count; i++) { + os << "Delay entry " << i << endl; + os << "---------------" << endl; + + if (programs[i]) { + os << "Program @ " << programs[i] << ":" << endl; + const char *prog = + (const char *)loadFromByteCodeOffset(t, programs[i]); + dumpProgram(os, t, prog); + } else { + os << "" << endl; + } + os << endl; } os.close(); @@ -1872,6 +1898,7 @@ void roseDumpPrograms(const RoseBuildImpl &build, const RoseEngine *t, dumpRoseEodPrograms(t, base + "/rose_eod_programs.txt"); dumpRoseReportPrograms(t, base + "/rose_report_programs.txt"); dumpRoseAnchoredPrograms(t, base + "/rose_anchored_programs.txt"); + dumpRoseDelayPrograms(t, base + "/rose_delay_programs.txt"); } void dumpRose(const RoseBuild &build_base, const RoseEngine *t, From 6c5b4e657ce9a781befcc5b3a922aab36d34d86e Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 22 Feb 2017 09:53:09 +1100 Subject: [PATCH 098/326] rose_build_matchers: simplify literal build --- src/rose/rose_build_matchers.cpp | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index 3ecec9d8..1643a06c 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -671,8 +671,6 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, assert(id < build.literal_info.size()); const rose_literal_info &info = build.literal_info[id]; - u32 final_id = info.final_id; - rose_group groups = info.group_mask; /* Note: requires_benefits are handled in the literal entries */ const ue2_literal &lit = e.second.s; @@ -723,8 +721,8 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, " "cmp=%s\n", - final_id, escapeString(s).c_str(), (int)nocase, noruns, - dumpMask(msk).c_str(), dumpMask(cmp).c_str()); + info.fragment_id, escapeString(s).c_str(), (int)nocase, + noruns, dumpMask(msk).c_str(), dumpMask(cmp).c_str()); if (!maskIsConsistent(s, nocase, msk, cmp)) { DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); @@ -732,20 +730,17 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, } mp.accel_lits.emplace_back(lit.get_string(), lit.any_nocase(), msk, cmp, - groups); + info.group_mask); mp.history_required = max(mp.history_required, lit_hist_len); - mp.lits.emplace_back(move(s), nocase, noruns, final_id, groups, msk, - cmp); - } - for (auto &lit : mp.lits) { - u32 final_id = lit.id; - assert(contains(build.final_to_frag_map, final_id)); - const auto &frag = - build.fragments.at(build.final_to_frag_map.at(final_id)); - lit.id = delay_rebuild ? frag.delay_program_offset - : frag.lit_program_offset; - lit.groups = frag.groups; + assert(info.fragment_id < build.fragments.size()); + const auto &frag = build.fragments.at(info.fragment_id); + u32 prog_offset = + delay_rebuild ? frag.delay_program_offset : frag.lit_program_offset; + const auto &groups = frag.groups; + + mp.lits.emplace_back(move(s), nocase, noruns, prog_offset, groups, msk, + cmp); } sort_and_unique(mp.lits); From 454fbf33d51d9a5a056244b359b8ea72011d17f6 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 22 Feb 2017 10:12:56 +1100 Subject: [PATCH 099/326] rose: tidy --- src/rose/rose_build_bytecode.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 0a872ae4..36f7707d 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4729,6 +4729,15 @@ void groupByFragment(RoseBuildImpl &build, const build_context &bc) { } frag_id++; } + + // Write the fragment IDs into the literal_info structures. + for (auto &info : build.literal_info) { + if (info.final_id == MO_INVALID_IDX) { + continue; + } + assert(contains(final_to_frag, info.final_id)); + info.fragment_id = final_to_frag.at(info.final_id); + } } /** @@ -5492,15 +5501,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { allocateFinalLiteralId(*this, bc); groupByFragment(*this, bc); - // Write the fragment IDs into the literal_info structures. - for (auto &info : literal_info) { - if (info.final_id == MO_INVALID_IDX) { - continue; - } - assert(contains(final_to_frag_map, info.final_id)); - info.fragment_id = final_to_frag_map.at(info.final_id); - } - auto anchored_dfas = buildAnchoredDfas(*this); bc.floatingMinLiteralMatchOffset = From a06e877fde0429cd0b95020d06de47447d84a01f Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 22 Feb 2017 10:19:56 +1100 Subject: [PATCH 100/326] rose dump: simplify --- src/rose/rose_build_dump.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index af8c96a7..b3966169 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -1150,10 +1150,7 @@ void dumpRoseLitPrograms(const RoseBuildImpl &build, const RoseEngine *t, // Collect all programs referenced by a literal fragment. vector programs; - programs.reserve(build.final_to_frag_map.size()); - - for (const auto &m : build.final_to_frag_map) { - const auto &frag = build.fragments.at(m.second); + for (const auto &frag : build.fragments) { if (frag.lit_program_offset) { programs.push_back(frag.lit_program_offset); } From 24ffb156e9766722d48cd7e7c38519b715ea4063 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 22 Feb 2017 10:28:40 +1100 Subject: [PATCH 101/326] rose: eliminate global final to fragment map --- src/rose/rose_build_bytecode.cpp | 9 ++++++--- src/rose/rose_build_impl.h | 1 - 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 36f7707d..c17c8b86 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4675,8 +4675,8 @@ void groupByFragment(RoseBuildImpl &build, const build_context &bc) { }; map frag_info; + map final_to_frag; - auto &final_to_frag = build.final_to_frag_map; auto &fragments = build.fragments; for (const auto &m : bc.final_id_to_literal) { @@ -4747,8 +4747,11 @@ static void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { // Build a reverse mapping from fragment -> final_id. map> frag_to_final_map; - for (const auto &m : build.final_to_frag_map) { - frag_to_final_map[m.second].insert(m.first); + for (const auto &info : build.literal_info) { + if (info.fragment_id == MO_INVALID_IDX) { + continue; + } + frag_to_final_map[info.fragment_id].insert(info.final_id); } DEBUG_PRINTF("%zu fragments\n", build.fragments.size()); diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index b7bf4f19..860404b4 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -590,7 +590,6 @@ public: * overlap calculation in history assignment. */ std::map anchoredLitSuffix; - std::map final_to_frag_map; std::vector fragments; unordered_set transient; From 6a0dc261a2b489b12647497f831671e81440e4ee Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 22 Feb 2017 10:56:26 +1100 Subject: [PATCH 102/326] rose_build_bytecode: less final_id --- src/rose/rose_build_bytecode.cpp | 64 ++++++++++++++++---------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index c17c8b86..03fba37b 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4438,19 +4438,10 @@ bool hasDelayedLiteral(RoseBuildImpl &build, static RoseProgram buildLitInitialProgram(RoseBuildImpl &build, build_context &bc, - u32 final_id, + const flat_set &lit_ids, const vector &lit_edges) { RoseProgram program; - // No initial program for EOD. - if (final_id == MO_INVALID_IDX) { - return program; - } - - DEBUG_PRINTF("final_id %u\n", final_id); - - const auto &lit_ids = bc.final_id_to_literal.at(final_id); - // Check long literal info. makeCheckLiteralInstruction(build, bc, lit_ids, program); @@ -4475,11 +4466,13 @@ RoseProgram buildLitInitialProgram(RoseBuildImpl &build, build_context &bc, static RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, - u32 final_id, const vector &lit_edges, + const flat_set &lit_ids, + const vector &lit_edges, bool is_anchored_program) { const auto &g = build.g; - DEBUG_PRINTF("final id %u, %zu lit edges\n", final_id, lit_edges.size()); + DEBUG_PRINTF("lit ids {%s}, %zu lit edges\n", + as_string_list(lit_ids).c_str(), lit_edges.size()); RoseProgram program; @@ -4514,25 +4507,27 @@ RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, program.add_block(makeProgram(build, bc, e)); } - if (final_id != MO_INVALID_IDX) { - const auto &lit_ids = bc.final_id_to_literal.at(final_id); - RoseProgram root_block; - - // Literal may squash groups. - makeGroupSquashInstruction(build, lit_ids, root_block); - - // Literal may be anchored and need to be recorded. - if (!is_anchored_program) { - makeRecordAnchoredInstruction(build, bc, lit_ids, root_block); - } - - program.add_block(move(root_block)); + if (lit_ids.empty()) { + return program; } - // Construct initial program up front, as its early checks must be able to - // jump to end and terminate processing for this literal. - auto lit_program = buildLitInitialProgram(build, bc, final_id, lit_edges); + RoseProgram root_block; + + // Literal may squash groups. + makeGroupSquashInstruction(build, lit_ids, root_block); + + // Literal may be anchored and need to be recorded. + if (!is_anchored_program) { + makeRecordAnchoredInstruction(build, bc, lit_ids, root_block); + } + + program.add_block(move(root_block)); + + // Construct initial program up front, as its early checks must be able + // to jump to end and terminate processing for this literal. + auto lit_program = buildLitInitialProgram(build, bc, lit_ids, lit_edges); lit_program.add_before_end(move(program)); + return lit_program; } @@ -4543,7 +4538,8 @@ RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, bool is_anchored_program) { assert(!final_ids.empty()); - DEBUG_PRINTF("entry, %zu final ids\n", final_ids.size()); + DEBUG_PRINTF("entry, %zu final ids: {%s}\n", final_ids.size(), + as_string_list(final_ids).c_str()); const vector no_edges; RoseProgram program; @@ -4552,7 +4548,9 @@ RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, if (contains(lit_edges, final_id)) { edges_ptr = &(lit_edges.at(final_id)); } - auto prog = buildLiteralProgram(build, bc, final_id, *edges_ptr, + assert(contains(bc.final_id_to_literal, final_id)); + const auto &lit_ids = bc.final_id_to_literal.at(final_id); + auto prog = buildLiteralProgram(build, bc, lit_ids, *edges_ptr, is_anchored_program); DEBUG_PRINTF("final_id=%u, prog has %zu entries\n", final_id, prog.size()); @@ -4786,6 +4784,9 @@ pair writeDelayPrograms(RoseBuildImpl &build, build_context &bc) { for (const auto &delayed_lit_id : info.delayed_ids) { DEBUG_PRINTF("lit id %u delay id %u\n", lit_id, delayed_lit_id); u32 final_id = build.literal_info.at(delayed_lit_id).final_id; + if (final_id == MO_INVALID_IDX) { + continue; + } u32 offset = writeLiteralProgram(build, bc, {final_id}, lit_edge_map, false); @@ -5049,8 +5050,7 @@ void addEodEventProgram(RoseBuildImpl &build, build_context &bc, tie(g[source(b, g)].index, g[target(b, g)].index); }); - program.add_block( - buildLiteralProgram(build, bc, MO_INVALID_IDX, edge_list, false)); + program.add_block(buildLiteralProgram(build, bc, {}, edge_list, false)); } static From 176c61aeaa4a25d7722335bbdb526ca7e765149e Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 22 Feb 2017 12:46:52 +1100 Subject: [PATCH 103/326] rose_build_bytecode: clean up findEdgesByLiteral() --- src/rose/rose_build_bytecode.cpp | 44 +++++++++++++++++++------------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 03fba37b..264b9529 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4534,23 +4534,33 @@ RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, static RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, const flat_set &final_ids, - const map> &lit_edges, + const map> &lit_edge_map, bool is_anchored_program) { assert(!final_ids.empty()); DEBUG_PRINTF("entry, %zu final ids: {%s}\n", final_ids.size(), as_string_list(final_ids).c_str()); - const vector no_edges; + + const auto &g = build.g; + vector lit_edges; RoseProgram program; for (const auto &final_id : final_ids) { - const auto *edges_ptr = &no_edges; - if (contains(lit_edges, final_id)) { - edges_ptr = &(lit_edges.at(final_id)); - } assert(contains(bc.final_id_to_literal, final_id)); const auto &lit_ids = bc.final_id_to_literal.at(final_id); - auto prog = buildLiteralProgram(build, bc, lit_ids, *edges_ptr, + + lit_edges.clear(); + for (const auto &lit_id : lit_ids) { + if (contains(lit_edge_map, lit_id)) { + insert(&lit_edges, lit_edges.end(), lit_edge_map.at(lit_id)); + } + } + sort_and_unique(lit_edges, [&g](const RoseEdge &a, const RoseEdge &b) { + return tie(g[source(a, g)].index, g[target(a, g)].index) < + tie(g[source(b, g)].index, g[target(b, g)].index); + }); + + auto prog = buildLiteralProgram(build, bc, lit_ids, lit_edges, is_anchored_program); DEBUG_PRINTF("final_id=%u, prog has %zu entries\n", final_id, prog.size()); @@ -4562,10 +4572,10 @@ RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, static u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc, const flat_set &final_ids, - const map> &lit_edges, + const map> &lit_edge_map, bool is_anchored_program) { - RoseProgram program = buildLiteralProgram(build, bc, final_ids, lit_edges, - is_anchored_program); + auto program = buildLiteralProgram(build, bc, final_ids, lit_edge_map, + is_anchored_program); if (program.empty()) { return 0; } @@ -4605,6 +4615,10 @@ u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc, return writeProgram(bc, move(program)); } +/** + * \brief Returns a map from literal ID to a list of edges leading into + * vertices with that literal ID. + */ static map> findEdgesByLiteral(const RoseBuildImpl &build) { // Use a set of edges while building the map to cull duplicates. @@ -4614,13 +4628,7 @@ map> findEdgesByLiteral(const RoseBuildImpl &build) { for (const auto &e : edges_range(g)) { const auto &v = target(e, g); for (const auto &lit_id : g[v].literals) { - assert(lit_id < build.literal_info.size()); - u32 final_id = build.literal_info.at(lit_id).final_id; - if (final_id == MO_INVALID_IDX) { - // Unused, special report IDs are handled elsewhere. - continue; - } - unique_lit_edge_map[final_id].insert(e); + unique_lit_edge_map[lit_id].insert(e); } } @@ -4633,7 +4641,7 @@ map> findEdgesByLiteral(const RoseBuildImpl &build) { return tie(g[source(a, g)].index, g[target(a, g)].index) < tie(g[source(b, g)].index, g[target(b, g)].index); }); - lit_edge_map.emplace(m.first, edge_list); + lit_edge_map.emplace(m.first, move(edge_list)); } return lit_edge_map; From b68694b729d1dabf643bb2f891ab52154c7d6495 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 22 Feb 2017 14:48:23 +1100 Subject: [PATCH 104/326] rose: new program construction code --- src/rose/rose_build_bytecode.cpp | 142 +++++++++++++++++-------------- 1 file changed, 78 insertions(+), 64 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 264b9529..b1aead36 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4531,51 +4531,60 @@ RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, return lit_program; } +/** + * \brief Consumes list of program blocks, checks them for duplicates and then + * concatenates them into one program. + */ static -RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, - const flat_set &final_ids, - const map> &lit_edge_map, - bool is_anchored_program) { - assert(!final_ids.empty()); - - DEBUG_PRINTF("entry, %zu final ids: {%s}\n", final_ids.size(), - as_string_list(final_ids).c_str()); - - const auto &g = build.g; - vector lit_edges; - +RoseProgram assembleProgramBlocks(vector &&blocks) { RoseProgram program; - for (const auto &final_id : final_ids) { - assert(contains(bc.final_id_to_literal, final_id)); - const auto &lit_ids = bc.final_id_to_literal.at(final_id); - lit_edges.clear(); - for (const auto &lit_id : lit_ids) { - if (contains(lit_edge_map, lit_id)) { - insert(&lit_edges, lit_edges.end(), lit_edge_map.at(lit_id)); - } - } - sort_and_unique(lit_edges, [&g](const RoseEdge &a, const RoseEdge &b) { - return tie(g[source(a, g)].index, g[target(a, g)].index) < - tie(g[source(b, g)].index, g[target(b, g)].index); - }); + DEBUG_PRINTF("%zu blocks before dedupe\n", blocks.size()); - auto prog = buildLiteralProgram(build, bc, lit_ids, lit_edges, - is_anchored_program); - DEBUG_PRINTF("final_id=%u, prog has %zu entries\n", final_id, - prog.size()); + sort(blocks.begin(), blocks.end(), + [](const RoseProgram &a, const RoseProgram &b) { + RoseProgramHash hasher; + return hasher(a) < hasher(b); + }); + + blocks.erase(unique(blocks.begin(), blocks.end(), RoseProgramEquivalence()), + blocks.end()); + + DEBUG_PRINTF("%zu blocks after dedupe\n", blocks.size()); + + for (auto &prog : blocks) { program.add_block(move(prog)); } + return program; } static u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc, - const flat_set &final_ids, + const flat_set &lit_ids, const map> &lit_edge_map, bool is_anchored_program) { - auto program = buildLiteralProgram(build, bc, final_ids, lit_edge_map, - is_anchored_program); + assert(!lit_ids.empty()); + + vector blocks; + + const vector no_edges; + + for (const auto &lit_id : lit_ids) { + DEBUG_PRINTF("lit_id=%u\n", lit_id); + const vector *edges_ptr; + if (contains(lit_edge_map, lit_id)) { + edges_ptr = &lit_edge_map.at(lit_id); + } else { + edges_ptr = &no_edges; + } + auto prog = buildLiteralProgram(build, bc, {lit_id}, *edges_ptr, + is_anchored_program); + blocks.push_back(move(prog)); + } + + auto program = assembleProgramBlocks(move(blocks)); + if (program.empty()) { return 0; } @@ -4584,30 +4593,32 @@ u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc, } static -u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc, - const flat_set &final_ids) { +u32 writeDelayRebuildProgram(RoseBuildImpl &build, build_context &bc, + const flat_set &lit_ids) { + assert(!lit_ids.empty()); + if (!build.cc.streaming) { return 0; // We only do delayed rebuild in streaming mode. } - RoseProgram program; + vector blocks; - for (const auto &final_id : final_ids) { - const auto &lit_ids = bc.final_id_to_literal.at(final_id); - assert(!lit_ids.empty()); - - const auto &arb_lit_info = build.literal_info.at(*lit_ids.begin()); - if (arb_lit_info.delayed_ids.empty()) { + for (const auto &lit_id : lit_ids) { + DEBUG_PRINTF("lit_id=%u\n", lit_id); + const auto &info = build.literal_info.at(lit_id); + if (info.delayed_ids.empty()) { continue; // No delayed IDs, no work to do. } RoseProgram prog; - makeCheckLiteralInstruction(build, bc, lit_ids, prog); - makeCheckLitMaskInstruction(build, bc, lit_ids, prog); - makePushDelayedInstructions(build, bc, lit_ids, prog); - program.add_block(move(prog)); + makeCheckLiteralInstruction(build, bc, {lit_id}, prog); + makeCheckLitMaskInstruction(build, bc, {lit_id}, prog); + makePushDelayedInstructions(build, bc, {lit_id}, prog); + blocks.push_back(move(prog)); } + auto program = assembleProgramBlocks(move(blocks)); + if (program.empty()) { return 0; } @@ -4751,26 +4762,29 @@ void groupByFragment(RoseBuildImpl &build, const build_context &bc) { */ static void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { - // Build a reverse mapping from fragment -> final_id. - map> frag_to_final_map; - for (const auto &info : build.literal_info) { + // Build a reverse mapping from fragment -> {lit_id, lit_id,...} + map> frag_to_lit_map; + for (u32 lit_id = 0; lit_id < verify_u32(build.literal_info.size()); + lit_id++) { + const auto &info = build.literal_info[lit_id]; if (info.fragment_id == MO_INVALID_IDX) { continue; } - frag_to_final_map[info.fragment_id].insert(info.final_id); + frag_to_lit_map[info.fragment_id].insert(lit_id); } DEBUG_PRINTF("%zu fragments\n", build.fragments.size()); auto lit_edge_map = findEdgesByLiteral(build); for (auto &frag : build.fragments) { - const auto &final_ids = frag_to_final_map[frag.fragment_id]; - DEBUG_PRINTF("frag_id=%u, final_ids=[%s]\n", frag.fragment_id, - as_string_list(final_ids).c_str()); + const auto &lit_ids = frag_to_lit_map[frag.fragment_id]; + DEBUG_PRINTF("frag_id=%u, lit_ids=[%s]\n", frag.fragment_id, + as_string_list(lit_ids).c_str()); + frag.lit_program_offset = - writeLiteralProgram(build, bc, final_ids, lit_edge_map, false); + writeLiteralProgram(build, bc, lit_ids, lit_edge_map, false); frag.delay_program_offset = - buildDelayRebuildProgram(build, bc, final_ids); + writeDelayRebuildProgram(build, bc, lit_ids); } } @@ -4789,14 +4803,15 @@ pair writeDelayPrograms(RoseBuildImpl &build, build_context &bc) { for (const auto &lit_id : build.literals.right | map_keys) { const auto &info = build.literal_info.at(lit_id); + + if (info.fragment_id == MO_INVALID_IDX) { + continue; // Unused literal. + } + for (const auto &delayed_lit_id : info.delayed_ids) { DEBUG_PRINTF("lit id %u delay id %u\n", lit_id, delayed_lit_id); - u32 final_id = build.literal_info.at(delayed_lit_id).final_id; - if (final_id == MO_INVALID_IDX) { - continue; - } - u32 offset = - writeLiteralProgram(build, bc, {final_id}, lit_edge_map, false); + u32 offset = writeLiteralProgram(build, bc, {delayed_lit_id}, + lit_edge_map, false); u32 delay_id; auto it = cache.find(offset); @@ -4841,9 +4856,8 @@ pair writeAnchoredPrograms(RoseBuildImpl &build, build_context &bc) { continue; } - u32 final_id = build.literal_info.at(lit_id).final_id; - if (final_id == MO_INVALID_IDX) { - continue; + if (build.literal_info.at(lit_id).fragment_id == MO_INVALID_IDX) { + continue; // Unused literal. } // If this anchored literal can never match past @@ -4856,7 +4870,7 @@ pair writeAnchoredPrograms(RoseBuildImpl &build, build_context &bc) { } u32 offset = - writeLiteralProgram(build, bc, {final_id}, lit_edge_map, true); + writeLiteralProgram(build, bc, {lit_id}, lit_edge_map, true); DEBUG_PRINTF("lit_id=%u, final_id %u -> anch prog at %u\n", lit_id, final_id, offset); From bf93c993cb282b405888f812e05ede86f8ca9a53 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 22 Feb 2017 16:01:40 +1100 Subject: [PATCH 105/326] rose: remove final_id --- src/rose/rose_build_bytecode.cpp | 265 ++++++------------------------- src/rose/rose_build_compile.cpp | 4 - src/rose/rose_build_dump.cpp | 6 +- src/rose/rose_build_impl.h | 3 - src/rose/rose_build_matchers.cpp | 6 +- 5 files changed, 55 insertions(+), 229 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index b1aead36..7604e27a 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -244,9 +244,6 @@ struct build_context : boost::noncopyable { /** \brief Global bitmap of groups that can be squashed. */ rose_group squashable_groups = 0; - /** \brief Mapping from final ID to the set of literals it is used for. */ - map> final_id_to_literal; - /** \brief Mapping from Rose literal ID to anchored program index. */ map anchored_programs; @@ -2566,12 +2563,12 @@ void recordResources(RoseResources &resources, if (!build.outfixes.empty()) { resources.has_outfixes = true; } - for (u32 i = 0; i < build.literal_info.size(); i++) { - if (build.hasFinalId(i)) { - resources.has_literals = true; - break; - } - } + + resources.has_literals = + any_of(begin(build.literal_info), end(build.literal_info), + [](const rose_literal_info &info) { + return info.fragment_id != MO_INVALID_IDX; + }); const auto &g = build.g; for (const auto &v : vertices_range(g)) { @@ -4658,6 +4655,26 @@ map> findEdgesByLiteral(const RoseBuildImpl &build) { return lit_edge_map; } +static +bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) { + assert(lit_id < build.literal_info.size()); + const auto &info = build.literal_info[lit_id]; + if (!info.vertices.empty()) { + return true; + } + + for (const u32 &delayed_id : info.delayed_ids) { + assert(delayed_id < build.literal_info.size()); + const rose_literal_info &delayed_info = build.literal_info[delayed_id]; + if (!delayed_info.vertices.empty()) { + return true; + } + } + + DEBUG_PRINTF("literal %u has no refs\n", lit_id); + return false; +} + static rose_literal_id getFragment(const rose_literal_id &lit) { if (lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX) { @@ -4673,87 +4690,68 @@ rose_literal_id getFragment(const rose_literal_id &lit) { } static -rose_group getGroups(const RoseBuildImpl &build, const flat_set &lit_ids) { - rose_group groups = 0; - for (auto lit_id : lit_ids) { - auto &info = build.literal_info.at(lit_id); - groups |= info.group_mask; - } - return groups; -} - -static -void groupByFragment(RoseBuildImpl &build, const build_context &bc) { +void groupByFragment(RoseBuildImpl &build) { u32 frag_id = 0; struct FragmentInfo { - vector final_ids; + vector lit_ids; rose_group groups = 0; }; map frag_info; - map final_to_frag; + map lit_to_frag; auto &fragments = build.fragments; - for (const auto &m : bc.final_id_to_literal) { - u32 final_id = m.first; - const auto &lit_ids = m.second; - assert(!lit_ids.empty()); + for (const auto &m : build.literals.right) { + u32 lit_id = m.first; - auto groups = getGroups(build, lit_ids); - - if (lit_ids.size() > 1) { - final_to_frag.emplace(final_id, frag_id); - fragments.emplace_back(frag_id, groups); - frag_id++; + if (!isUsedLiteral(build, lit_id)) { continue; } - const auto lit_id = *lit_ids.begin(); - const auto &lit = build.literals.right.at(lit_id); + const auto &lit = m.second; + const auto &info = build.literal_info.at(lit_id); + + auto groups = info.group_mask; + if (lit.s.length() < ROSE_SHORT_LITERAL_LEN_MAX) { - final_to_frag.emplace(final_id, frag_id); + lit_to_frag.emplace(lit_id, frag_id); fragments.emplace_back(frag_id, groups); frag_id++; continue; } // Combining fragments that squash their groups is unsafe. - const auto &info = build.literal_info[lit_id]; if (info.squash_group) { - final_to_frag.emplace(final_id, frag_id); + lit_to_frag.emplace(lit_id, frag_id); fragments.emplace_back(frag_id, groups); frag_id++; continue; } - DEBUG_PRINTF("fragment candidate: final_id=%u %s\n", final_id, + DEBUG_PRINTF("fragment candidate: lit_id=%u %s\n", lit_id, dumpString(lit.s).c_str()); auto &fi = frag_info[getFragment(lit)]; - fi.final_ids.push_back(final_id); + fi.lit_ids.push_back(lit_id); fi.groups |= groups; } for (const auto &m : frag_info) { const auto &fi = m.second; DEBUG_PRINTF("frag %s -> ids: %s\n", dumpString(m.first.s).c_str(), - as_string_list(fi.final_ids).c_str()); + as_string_list(fi.lit_ids).c_str()); fragments.emplace_back(frag_id, fi.groups); - for (const auto final_id : fi.final_ids) { - assert(!contains(final_to_frag, final_id)); - final_to_frag.emplace(final_id, frag_id); + for (const auto lit_id : fi.lit_ids) { + assert(!contains(lit_to_frag, lit_id)); + lit_to_frag.emplace(lit_id, frag_id); } frag_id++; } // Write the fragment IDs into the literal_info structures. - for (auto &info : build.literal_info) { - if (info.final_id == MO_INVALID_IDX) { - continue; - } - assert(contains(final_to_frag, info.final_id)); - info.fragment_id = final_to_frag.at(info.final_id); + for (const auto &m : lit_to_frag) { + build.literal_info[m.first].fragment_id = m.second; } } @@ -4871,8 +4869,7 @@ pair writeAnchoredPrograms(RoseBuildImpl &build, build_context &bc) { u32 offset = writeLiteralProgram(build, bc, {lit_id}, lit_edge_map, true); - DEBUG_PRINTF("lit_id=%u, final_id %u -> anch prog at %u\n", lit_id, - final_id, offset); + DEBUG_PRINTF("lit_id=%u -> anch prog at %u\n", lit_id, offset); u32 anch_id; auto it = cache.find(offset); @@ -5248,169 +5245,6 @@ u32 buildEagerQueueIter(const set &eager, u32 leftfixBeginQueue, return bc.engine_blob.add_iterator(iter); } -static -void allocateFinalIdToSet(RoseBuildImpl &build, build_context &bc, - const set &lits, u32 *next_final_id) { - const auto &g = build.g; - auto &literal_info = build.literal_info; - auto &final_id_to_literal = bc.final_id_to_literal; - - /* We can allocate the same final id to multiple literals of the same type - * if they share the same vertex set and trigger the same delayed literal - * ids and squash the same roles and have the same group squashing - * behaviour. Benefits literals cannot be merged. */ - - for (u32 int_id : lits) { - rose_literal_info &curr_info = literal_info[int_id]; - const rose_literal_id &lit = build.literals.right.at(int_id); - const auto &verts = curr_info.vertices; - - // Literals with benefits cannot be merged. - if (curr_info.requires_benefits) { - DEBUG_PRINTF("id %u has benefits\n", int_id); - goto assign_new_id; - } - - // Literals that need confirmation with CHECK_LONG_LIT or CHECK_MED_LIT - // cannot be merged. - if (lit.s.length() > ROSE_SHORT_LITERAL_LEN_MAX) { - DEBUG_PRINTF("id %u needs lit confirm\n", int_id); - goto assign_new_id; - } - - if (!verts.empty() && curr_info.delayed_ids.empty()) { - vector cand; - insert(&cand, cand.end(), g[*verts.begin()].literals); - for (auto v : verts) { - vector temp; - set_intersection(cand.begin(), cand.end(), - g[v].literals.begin(), - g[v].literals.end(), - inserter(temp, temp.end())); - cand.swap(temp); - } - - for (u32 cand_id : cand) { - if (cand_id >= int_id) { - break; - } - - const auto &cand_info = literal_info[cand_id]; - const auto &cand_lit = build.literals.right.at(cand_id); - - if (cand_lit.s.length() > ROSE_SHORT_LITERAL_LEN_MAX) { - continue; - } - - if (cand_info.requires_benefits) { - continue; - } - - if (!cand_info.delayed_ids.empty()) { - /* TODO: allow cases where delayed ids are equivalent. - * This is awkward currently as the have not had their - * final ids allocated yet */ - continue; - } - - if (lits.find(cand_id) == lits.end() - || cand_info.vertices.size() != verts.size() - || cand_info.squash_group != curr_info.squash_group) { - continue; - } - - /* if we are squashing groups we need to check if they are the - * same group */ - if (cand_info.squash_group - && cand_info.group_mask != curr_info.group_mask) { - continue; - } - - u32 final_id = cand_info.final_id; - assert(final_id != MO_INVALID_IDX); - assert(curr_info.final_id == MO_INVALID_IDX); - curr_info.final_id = final_id; - final_id_to_literal[final_id].insert(int_id); - goto next_lit; - } - } - - assign_new_id: - /* oh well, have to give it a fresh one, hang the expense */ - DEBUG_PRINTF("allocating final id %u to %u\n", *next_final_id, int_id); - assert(curr_info.final_id == MO_INVALID_IDX); - curr_info.final_id = *next_final_id; - final_id_to_literal[*next_final_id].insert(int_id); - (*next_final_id)++; - next_lit:; - } -} - -static -bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) { - assert(lit_id < build.literal_info.size()); - const auto &info = build.literal_info[lit_id]; - if (!info.vertices.empty()) { - return true; - } - - for (const u32 &delayed_id : info.delayed_ids) { - assert(delayed_id < build.literal_info.size()); - const rose_literal_info &delayed_info = build.literal_info[delayed_id]; - if (!delayed_info.vertices.empty()) { - return true; - } - } - - DEBUG_PRINTF("literal %u has no refs\n", lit_id); - return false; -} - -/** \brief Allocate final literal IDs for all literals. */ -static -void allocateFinalLiteralId(RoseBuildImpl &build, build_context &bc) { - set anch; - set norm; - set delay; - - /* undelayed ids come first */ - assert(bc.final_id_to_literal.empty()); - u32 next_final_id = 0; - for (u32 i = 0; i < build.literal_info.size(); i++) { - assert(!build.hasFinalId(i)); - - if (!isUsedLiteral(build, i)) { - /* what is this literal good for? absolutely nothing */ - continue; - } - - // The special EOD event literal has its own program and does not need - // a real literal ID. - if (i == build.eod_event_literal_id) { - assert(build.eod_event_literal_id != MO_INVALID_IDX); - continue; - } - - if (build.isDelayed(i)) { - assert(!build.literal_info[i].requires_benefits); - delay.insert(i); - } else if (build.literals.right.at(i).table == ROSE_ANCHORED) { - anch.insert(i); - } else { - norm.insert(i); - } - } - - /* normal lits */ - allocateFinalIdToSet(build, bc, norm, &next_final_id); - - /* next anchored stuff */ - allocateFinalIdToSet(build, bc, anch, &next_final_id); - - /* delayed ids come last */ - allocateFinalIdToSet(build, bc, delay, &next_final_id); -} - static aligned_unique_ptr addSmallWriteEngine(RoseBuildImpl &build, aligned_unique_ptr rose) { @@ -5523,8 +5357,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { DEBUG_PRINTF("longLitLengthThreshold=%zu\n", longLitLengthThreshold); build_context bc; - allocateFinalLiteralId(*this, bc); - groupByFragment(*this, bc); + groupByFragment(*this); auto anchored_dfas = buildAnchoredDfas(*this); diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 63b5bd0f..7dd55d5f 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -759,10 +759,6 @@ bool RoseBuildImpl::isDelayed(u32 id) const { return literal_info.at(id).undelayed_id != id; } -bool RoseBuildImpl::hasFinalId(u32 id) const { - return literal_info.at(id).final_id != MO_INVALID_IDX; -} - bool RoseBuildImpl::hasDelayedLiteral(RoseVertex v) const { for (u32 lit_id : g[v].literals) { if (literals.right.at(lit_id).delay) { diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index b3966169..2f882e68 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -249,9 +249,9 @@ private: void writeLiteral(ostream &os, u32 id) const { os << "lit=" << id; if (id < build.literal_info.size()) { - os << "/" << build.literal_info[id].final_id << " "; + os << "/" << build.literal_info[id].fragment_id << " "; } else { - os << "/nofinal "; + os << "/nofrag "; } if (contains(build.literals.right, id)) { @@ -355,7 +355,7 @@ void dumpRoseLiterals(const RoseBuildImpl &build, const char *filename) { break; } - os << " ID " << id << "/" << lit_info.final_id << ": \"" + os << " ID " << id << "/" << lit_info.fragment_id << ": \"" << escapeString(s.get_string()) << "\"" << " (len " << s.length() << ","; if (s.any_nocase()) { diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 860404b4..cafd0505 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -264,7 +264,6 @@ struct rose_literal_info { ue2::flat_set vertices; rose_group group_mask = 0; u32 undelayed_id = MO_INVALID_IDX; - u32 final_id = MO_INVALID_IDX; // TODO: remove u32 fragment_id = MO_INVALID_IDX; //!< ID corresponding to literal prog. bool squash_group = false; bool requires_benefits = false; @@ -530,8 +529,6 @@ public: bool isDirectReport(u32 id) const; bool isDelayed(u32 id) const; - bool hasFinalId(u32 id) const; - bool isAnchored(RoseVertex v) const; /* true iff has literal in anchored * table */ bool isFloating(RoseVertex v) const; /* true iff has literal in floating diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index 1643a06c..50e48a5b 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -349,8 +349,8 @@ void findMoreLiteralMasks(RoseBuildImpl &build) { const u32 id = e.first; const auto &lit = e.second; - // This pass takes place before final IDs are assigned to literals. - assert(!build.hasFinalId(id)); + // This pass takes place before fragment IDs are assigned to literals. + assert(build.literal_info.at(id).fragment_id == MO_INVALID_IDX); if (lit.delay || build.isDelayed(id)) { continue; @@ -657,7 +657,7 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, for (const auto &e : build.literals.right) { const u32 id = e.first; - if (!build.hasFinalId(id)) { + if (build.literal_info.at(id).fragment_id == MO_INVALID_IDX) { continue; } From b525d7786cfff91d8f57f5a669a1a47ccb1020f3 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 23 Feb 2017 09:29:11 +1100 Subject: [PATCH 106/326] rose: don't assign fragments for event lits --- src/rose/rose_build_bytecode.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 7604e27a..34bc2a59 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4704,14 +4704,19 @@ void groupByFragment(RoseBuildImpl &build) { auto &fragments = build.fragments; for (const auto &m : build.literals.right) { - u32 lit_id = m.first; + const u32 lit_id = m.first; + const auto &lit = m.second; + const auto &info = build.literal_info.at(lit_id); if (!isUsedLiteral(build, lit_id)) { + DEBUG_PRINTF("lit %u is unused\n", lit_id); continue; } - const auto &lit = m.second; - const auto &info = build.literal_info.at(lit_id); + if (lit.table == ROSE_EVENT) { + DEBUG_PRINTF("lit %u is an event\n", lit_id); + continue; + } auto groups = info.group_mask; From 78875614c876ee95d52ce94fc77e5ca3e243fcd7 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 23 Feb 2017 09:52:52 +1100 Subject: [PATCH 107/326] rose: make buildLiteralProgram take one lit_id --- src/rose/rose_build_bytecode.cpp | 165 ++++++++++--------------------- 1 file changed, 51 insertions(+), 114 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 34bc2a59..b9c7ae33 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4138,24 +4138,18 @@ void addPredBlocks(build_context &bc, map &pred_blocks, } static -void makePushDelayedInstructions(const RoseBuildImpl &build, - build_context &bc, - const flat_set &lit_ids, - RoseProgram &program) { - assert(!lit_ids.empty()); +void makePushDelayedInstructions(const RoseBuildImpl &build, build_context &bc, + u32 lit_id, RoseProgram &program) { + const auto &info = build.literal_info.at(lit_id); vector delay_instructions; - for (const auto &lit_id : lit_ids) { - const auto &info = build.literal_info.at(lit_id); - for (const auto &delayed_lit_id : info.delayed_ids) { - DEBUG_PRINTF("delayed lit id %u\n", delayed_lit_id); - assert(contains(bc.delay_programs, delayed_lit_id)); - u32 delay_id = bc.delay_programs.at(delayed_lit_id); - const auto &delay_lit = build.literals.right.at(delayed_lit_id); - delay_instructions.emplace_back(verify_u8(delay_lit.delay), - delay_id); - } + for (const auto &delayed_lit_id : info.delayed_ids) { + DEBUG_PRINTF("delayed lit id %u\n", delayed_lit_id); + assert(contains(bc.delay_programs, delayed_lit_id)); + u32 delay_id = bc.delay_programs.at(delayed_lit_id); + const auto &delay_lit = build.literals.right.at(delayed_lit_id); + delay_instructions.emplace_back(verify_u8(delay_lit.delay), delay_id); } sort_and_unique(delay_instructions, [](const RoseInstrPushDelayed &a, @@ -4169,21 +4163,10 @@ void makePushDelayedInstructions(const RoseBuildImpl &build, } static -rose_group getLitGroupsUnion(const RoseBuildImpl &build, - const flat_set &lit_ids) { - rose_group groups = 0; - for (auto lit_id : lit_ids) { - const auto &info = build.literal_info.at(lit_id); - groups |= info.group_mask; - } - return groups; -} - -static -void makeGroupCheckInstruction(const RoseBuildImpl &build, - const flat_set &lit_ids, +void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 lit_id, RoseProgram &program) { - rose_group groups = getLitGroupsUnion(build, lit_ids); + const auto &info = build.literal_info.at(lit_id); + rose_group groups = info.group_mask; if (!groups) { return; } @@ -4192,17 +4175,14 @@ void makeGroupCheckInstruction(const RoseBuildImpl &build, static void makeCheckLitMaskInstruction(const RoseBuildImpl &build, build_context &bc, - const flat_set &lit_ids, - RoseProgram &program) { - const auto &lit_info = build.literal_info.at(*lit_ids.begin()); - if (!lit_info.requires_benefits) { + u32 lit_id, RoseProgram &program) { + const auto &info = build.literal_info.at(lit_id); + if (!info.requires_benefits) { return; } vector look; - assert(lit_ids.size() == 1); - u32 lit_id = *lit_ids.begin(); const ue2_literal &s = build.literals.right.at(lit_id).s; DEBUG_PRINTF("building mask for lit %u: %s\n", lit_id, dumpString(s).c_str()); @@ -4221,16 +4201,14 @@ void makeCheckLitMaskInstruction(const RoseBuildImpl &build, build_context &bc, static void makeGroupSquashInstruction(const RoseBuildImpl &build, - const flat_set &lit_ids, + u32 lit_id, RoseProgram &program) { - assert(!lit_ids.empty()); - const u32 lit_id = *lit_ids.begin(); - const auto &info = build.literal_info[lit_id]; + const auto &info = build.literal_info.at(lit_id); if (!info.squash_group) { return; } - rose_group groups = getLitGroupsUnion(build, lit_ids); + rose_group groups = info.group_mask; if (!groups) { return; } @@ -4255,32 +4233,17 @@ u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) { static void makeRecordAnchoredInstruction(const RoseBuildImpl &build, - build_context &bc, - const flat_set &lit_ids, + build_context &bc, u32 lit_id, RoseProgram &program) { - assert(!lit_ids.empty()); - u32 first_lit_id = *begin(lit_ids); - - // Must be anchored. - if (build.literals.right.at(first_lit_id).table != ROSE_ANCHORED) { + if (build.literals.right.at(lit_id).table != ROSE_ANCHORED) { return; } - - // Dedupe anch_ids to fire. - flat_set anch_ids; - - for (const auto &lit_id : lit_ids) { - assert(build.literals.right.at(lit_id).table == ROSE_ANCHORED); - if (!contains(bc.anchored_programs, lit_id)) { - continue; - } - anch_ids.insert(bc.anchored_programs.at(lit_id)); - } - - for (const auto &anch_id : anch_ids) { - DEBUG_PRINTF("adding RECORD_ANCHORED for anch_id=%u\n", anch_id); - program.add_before_end(make_unique(anch_id)); + if (!contains(bc.anchored_programs, lit_id)) { + return; } + auto anch_id = bc.anchored_programs.at(lit_id); + DEBUG_PRINTF("adding RECORD_ANCHORED for anch_id=%u\n", anch_id); + program.add_before_end(make_unique(anch_id)); } static @@ -4298,8 +4261,7 @@ u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id) { static void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc, - const flat_set &lit_ids, - const vector &lit_edges, + u32 lit_id, const vector &lit_edges, RoseProgram &program) { if (lit_edges.empty()) { return; @@ -4314,22 +4276,9 @@ void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc, return; } - if (lit_ids.empty()) { - return; - } - - size_t min_len = SIZE_MAX; - u32 min_offset = UINT32_MAX; - for (u32 lit_id : lit_ids) { - const auto &lit = build.literals.right.at(lit_id); - size_t lit_min_len = lit.elength(); - u32 lit_min_offset = findMinOffset(build, lit_id); - DEBUG_PRINTF("lit_id=%u has min_len=%zu, min_offset=%u\n", lit_id, - lit_min_len, lit_min_offset); - min_len = min(min_len, lit_min_len); - min_offset = min(min_offset, lit_min_offset); - } - + const auto &lit = build.literals.right.at(lit_id); + size_t min_len = lit.elength(); + u32 min_offset = findMinOffset(build, lit_id); DEBUG_PRINTF("has min_len=%zu, min_offset=%u, " "global min is %u\n", min_len, min_offset, bc.floatingMinLiteralMatchOffset); @@ -4352,25 +4301,13 @@ void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc, static void makeCheckLiteralInstruction(const RoseBuildImpl &build, - const build_context &bc, - const flat_set &lits, + const build_context &bc, u32 lit_id, RoseProgram &program) { assert(bc.longLitLengthThreshold > 0); - DEBUG_PRINTF("lits [%s], long lit threshold %zu\n", - as_string_list(lits).c_str(), bc.longLitLengthThreshold); + DEBUG_PRINTF("lit_id=%u, long lit threshold %zu\n", lit_id, + bc.longLitLengthThreshold); - if (lits.size() != 1) { - // final_id sharing is only allowed for literals that are short enough - // to not require any additional confirm work. - assert(all_of(begin(lits), end(lits), [&](u32 lit_id) { - const rose_literal_id &lit = build.literals.right.at(lit_id); - return lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX; - })); - return; - } - - u32 lit_id = *lits.begin(); if (build.isDelayed(lit_id)) { return; } @@ -4435,41 +4372,39 @@ bool hasDelayedLiteral(RoseBuildImpl &build, static RoseProgram buildLitInitialProgram(RoseBuildImpl &build, build_context &bc, - const flat_set &lit_ids, + u32 lit_id, const vector &lit_edges) { RoseProgram program; // Check long literal info. - makeCheckLiteralInstruction(build, bc, lit_ids, program); + makeCheckLiteralInstruction(build, bc, lit_id, program); // Check lit mask. - makeCheckLitMaskInstruction(build, bc, lit_ids, program); + makeCheckLitMaskInstruction(build, bc, lit_id, program); // Check literal groups. This is an optimisation that we only perform for // delayed literals, as their groups may be switched off; ordinarily, we // can trust the HWLM matcher. if (hasDelayedLiteral(build, lit_edges)) { - makeGroupCheckInstruction(build, lit_ids, program); + makeGroupCheckInstruction(build, lit_id, program); } // Add instructions for pushing delayed matches, if there are any. - makePushDelayedInstructions(build, bc, lit_ids, program); + makePushDelayedInstructions(build, bc, lit_id, program); // Add pre-check for early literals in the floating table. - makeCheckLitEarlyInstruction(build, bc, lit_ids, lit_edges, program); + makeCheckLitEarlyInstruction(build, bc, lit_id, lit_edges, program); return program; } static RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, - const flat_set &lit_ids, - const vector &lit_edges, + u32 lit_id, const vector &lit_edges, bool is_anchored_program) { const auto &g = build.g; - DEBUG_PRINTF("lit ids {%s}, %zu lit edges\n", - as_string_list(lit_ids).c_str(), lit_edges.size()); + DEBUG_PRINTF("lit id=%u, %zu lit edges\n", lit_id, lit_edges.size()); RoseProgram program; @@ -4504,25 +4439,26 @@ RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, program.add_block(makeProgram(build, bc, e)); } - if (lit_ids.empty()) { + if (lit_id == build.eod_event_literal_id) { + assert(build.eod_event_literal_id != MO_INVALID_IDX); return program; } RoseProgram root_block; // Literal may squash groups. - makeGroupSquashInstruction(build, lit_ids, root_block); + makeGroupSquashInstruction(build, lit_id, root_block); // Literal may be anchored and need to be recorded. if (!is_anchored_program) { - makeRecordAnchoredInstruction(build, bc, lit_ids, root_block); + makeRecordAnchoredInstruction(build, bc, lit_id, root_block); } program.add_block(move(root_block)); // Construct initial program up front, as its early checks must be able // to jump to end and terminate processing for this literal. - auto lit_program = buildLitInitialProgram(build, bc, lit_ids, lit_edges); + auto lit_program = buildLitInitialProgram(build, bc, lit_id, lit_edges); lit_program.add_before_end(move(program)); return lit_program; @@ -4575,7 +4511,7 @@ u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc, } else { edges_ptr = &no_edges; } - auto prog = buildLiteralProgram(build, bc, {lit_id}, *edges_ptr, + auto prog = buildLiteralProgram(build, bc, lit_id, *edges_ptr, is_anchored_program); blocks.push_back(move(prog)); } @@ -4608,9 +4544,9 @@ u32 writeDelayRebuildProgram(RoseBuildImpl &build, build_context &bc, } RoseProgram prog; - makeCheckLiteralInstruction(build, bc, {lit_id}, prog); - makeCheckLitMaskInstruction(build, bc, {lit_id}, prog); - makePushDelayedInstructions(build, bc, {lit_id}, prog); + makeCheckLiteralInstruction(build, bc, lit_id, prog); + makeCheckLitMaskInstruction(build, bc, lit_id, prog); + makePushDelayedInstructions(build, bc, lit_id, prog); blocks.push_back(move(prog)); } @@ -5074,7 +5010,8 @@ void addEodEventProgram(RoseBuildImpl &build, build_context &bc, tie(g[source(b, g)].index, g[target(b, g)].index); }); - program.add_block(buildLiteralProgram(build, bc, {}, edge_list, false)); + program.add_block(buildLiteralProgram(build, bc, build.eod_event_literal_id, + edge_list, false)); } static From 7b5c4c85cc037535a0b013cc9cbfcd76b5fa10a0 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 23 Feb 2017 11:43:49 +1100 Subject: [PATCH 108/326] rose: create new ProgramBuild struct --- src/rose/rose_build_bytecode.cpp | 161 +++++++++++++++++-------------- 1 file changed, 90 insertions(+), 71 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index b9c7ae33..c08713df 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -183,10 +183,6 @@ struct build_context : boost::noncopyable { /** \brief mapping from suffix to queue index. */ map suffixes; - /** \brief Mapping from vertex to key, for vertices with a - * CHECK_NOT_HANDLED instruction. */ - ue2::unordered_map handledKeys; - /** \brief Number of roles with a state bit. * * This is set by assignStateIndices() and should be constant throughout @@ -236,6 +232,20 @@ struct build_context : boost::noncopyable { /** \brief Resources in use (tracked as programs are added). */ RoseResources resources; +}; + +/** \brief Data only used during construction of various programs (literal, + * anchored, delay, etc). */ +struct ProgramBuild : boost::noncopyable { + /** \brief Mapping from vertex to key, for vertices with a + * CHECK_NOT_HANDLED instruction. */ + ue2::unordered_map handledKeys; + + /** \brief Mapping from Rose literal ID to anchored program index. */ + map anchored_programs; + + /** \brief Mapping from Rose literal ID to delayed program index. */ + map delay_programs; /** \brief Mapping from every vertex to the groups that must be on for that * vertex to be reached. */ @@ -243,12 +253,6 @@ struct build_context : boost::noncopyable { /** \brief Global bitmap of groups that can be squashed. */ rose_group squashable_groups = 0; - - /** \brief Mapping from Rose literal ID to anchored program index. */ - map anchored_programs; - - /** \brief Mapping from Rose literal ID to delayed program index. */ - map delay_programs; }; /** \brief subengine info including built engine and @@ -3575,8 +3579,8 @@ void makeRoleSuffix(RoseBuildImpl &build, build_context &bc, RoseVertex v, } static -void makeRoleGroups(RoseBuildImpl &build, build_context &bc, RoseVertex v, - RoseProgram &program) { +void makeRoleGroups(RoseBuildImpl &build, ProgramBuild &prog_build, + RoseVertex v, RoseProgram &program) { const auto &g = build.g; rose_group groups = g[v].groups; if (!groups) { @@ -3588,14 +3592,14 @@ void makeRoleGroups(RoseBuildImpl &build, build_context &bc, RoseVertex v, assert(in_degree(v, g) > 0); rose_group already_on = ~rose_group{0}; for (const auto &u : inv_adjacent_vertices_range(v, g)) { - already_on &= bc.vertex_group_map.at(u); + already_on &= prog_build.vertex_group_map.at(u); } DEBUG_PRINTF("already_on=0x%llx\n", already_on); - DEBUG_PRINTF("squashable=0x%llx\n", bc.squashable_groups); + DEBUG_PRINTF("squashable=0x%llx\n", prog_build.squashable_groups); DEBUG_PRINTF("groups=0x%llx\n", groups); - already_on &= ~bc.squashable_groups; + already_on &= ~prog_build.squashable_groups; DEBUG_PRINTF("squashed already_on=0x%llx\n", already_on); // We don't *have* to mask off the groups that we know are already on, but @@ -3726,14 +3730,14 @@ void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, } static -void makeRoleCheckNotHandled(build_context &bc, RoseVertex v, +void makeRoleCheckNotHandled(ProgramBuild &prog_build, RoseVertex v, RoseProgram &program) { u32 handled_key; - if (contains(bc.handledKeys, v)) { - handled_key = bc.handledKeys.at(v); + if (contains(prog_build.handledKeys, v)) { + handled_key = prog_build.handledKeys.at(v); } else { - handled_key = verify_u32(bc.handledKeys.size()); - bc.handledKeys.emplace(v, handled_key); + handled_key = verify_u32(prog_build.handledKeys.size()); + prog_build.handledKeys.emplace(v, handled_key); } const auto *end_inst = program.end_instruction(); @@ -3771,7 +3775,7 @@ void makeRoleEagerEodReports(RoseBuildImpl &build, build_context &bc, static RoseProgram makeProgram(RoseBuildImpl &build, build_context &bc, - const RoseEdge &e) { + ProgramBuild &prog_build, const RoseEdge &e) { const RoseGraph &g = build.g; auto v = target(e, g); @@ -3796,7 +3800,7 @@ RoseProgram makeProgram(RoseBuildImpl &build, build_context &bc, // offset bounds. We must ensure we put this check/set operation after the // bounds check to deal with this case. if (in_degree(v, g) > 1) { - makeRoleCheckNotHandled(bc, v, program); + makeRoleCheckNotHandled(prog_build, v, program); } makeRoleLookaround(build, bc, v, program); @@ -3819,7 +3823,7 @@ RoseProgram makeProgram(RoseBuildImpl &build, build_context &bc, // Note: SET_GROUPS instruction must be after infix triggers, as an infix // going dead may switch off groups. RoseProgram groups_block; - makeRoleGroups(build, bc, v, groups_block); + makeRoleGroups(build, prog_build, v, groups_block); effects_block.add_block(move(groups_block)); RoseProgram suffix_block; @@ -4138,16 +4142,17 @@ void addPredBlocks(build_context &bc, map &pred_blocks, } static -void makePushDelayedInstructions(const RoseBuildImpl &build, build_context &bc, - u32 lit_id, RoseProgram &program) { +void makePushDelayedInstructions(const RoseBuildImpl &build, + ProgramBuild &prog_build, u32 lit_id, + RoseProgram &program) { const auto &info = build.literal_info.at(lit_id); vector delay_instructions; for (const auto &delayed_lit_id : info.delayed_ids) { DEBUG_PRINTF("delayed lit id %u\n", delayed_lit_id); - assert(contains(bc.delay_programs, delayed_lit_id)); - u32 delay_id = bc.delay_programs.at(delayed_lit_id); + assert(contains(prog_build.delay_programs, delayed_lit_id)); + u32 delay_id = prog_build.delay_programs.at(delayed_lit_id); const auto &delay_lit = build.literals.right.at(delayed_lit_id); delay_instructions.emplace_back(verify_u8(delay_lit.delay), delay_id); } @@ -4233,15 +4238,15 @@ u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) { static void makeRecordAnchoredInstruction(const RoseBuildImpl &build, - build_context &bc, u32 lit_id, + ProgramBuild &prog_build, u32 lit_id, RoseProgram &program) { if (build.literals.right.at(lit_id).table != ROSE_ANCHORED) { return; } - if (!contains(bc.anchored_programs, lit_id)) { + if (!contains(prog_build.anchored_programs, lit_id)) { return; } - auto anch_id = bc.anchored_programs.at(lit_id); + auto anch_id = prog_build.anchored_programs.at(lit_id); DEBUG_PRINTF("adding RECORD_ANCHORED for anch_id=%u\n", anch_id); program.add_before_end(make_unique(anch_id)); } @@ -4372,7 +4377,7 @@ bool hasDelayedLiteral(RoseBuildImpl &build, static RoseProgram buildLitInitialProgram(RoseBuildImpl &build, build_context &bc, - u32 lit_id, + ProgramBuild &prog_build, u32 lit_id, const vector &lit_edges) { RoseProgram program; @@ -4390,7 +4395,7 @@ RoseProgram buildLitInitialProgram(RoseBuildImpl &build, build_context &bc, } // Add instructions for pushing delayed matches, if there are any. - makePushDelayedInstructions(build, bc, lit_id, program); + makePushDelayedInstructions(build, prog_build, lit_id, program); // Add pre-check for early literals in the floating table. makeCheckLitEarlyInstruction(build, bc, lit_id, lit_edges, program); @@ -4400,7 +4405,8 @@ RoseProgram buildLitInitialProgram(RoseBuildImpl &build, build_context &bc, static RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, - u32 lit_id, const vector &lit_edges, + ProgramBuild &prog_build, u32 lit_id, + const vector &lit_edges, bool is_anchored_program) { const auto &g = build.g; @@ -4421,7 +4427,8 @@ RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, g[target(e, g)].index); assert(contains(bc.roleStateIndices, u)); u32 pred_state = bc.roleStateIndices.at(u); - pred_blocks[pred_state].add_block(makeProgram(build, bc, e)); + pred_blocks[pred_state].add_block( + makeProgram(build, bc, prog_build, e)); } // Add blocks to deal with non-root edges (triggered by sparse iterator or @@ -4436,7 +4443,7 @@ RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, } DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].index, g[target(e, g)].index); - program.add_block(makeProgram(build, bc, e)); + program.add_block(makeProgram(build, bc, prog_build, e)); } if (lit_id == build.eod_event_literal_id) { @@ -4451,14 +4458,15 @@ RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, // Literal may be anchored and need to be recorded. if (!is_anchored_program) { - makeRecordAnchoredInstruction(build, bc, lit_id, root_block); + makeRecordAnchoredInstruction(build, prog_build, lit_id, root_block); } program.add_block(move(root_block)); // Construct initial program up front, as its early checks must be able // to jump to end and terminate processing for this literal. - auto lit_program = buildLitInitialProgram(build, bc, lit_id, lit_edges); + auto lit_program = + buildLitInitialProgram(build, bc, prog_build, lit_id, lit_edges); lit_program.add_before_end(move(program)); return lit_program; @@ -4494,7 +4502,7 @@ RoseProgram assembleProgramBlocks(vector &&blocks) { static u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc, - const flat_set &lit_ids, + ProgramBuild &prog_build, const flat_set &lit_ids, const map> &lit_edge_map, bool is_anchored_program) { assert(!lit_ids.empty()); @@ -4511,8 +4519,8 @@ u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc, } else { edges_ptr = &no_edges; } - auto prog = buildLiteralProgram(build, bc, lit_id, *edges_ptr, - is_anchored_program); + auto prog = buildLiteralProgram(build, bc, prog_build, lit_id, + *edges_ptr, is_anchored_program); blocks.push_back(move(prog)); } @@ -4527,6 +4535,7 @@ u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc, static u32 writeDelayRebuildProgram(RoseBuildImpl &build, build_context &bc, + ProgramBuild &prog_build, const flat_set &lit_ids) { assert(!lit_ids.empty()); @@ -4546,7 +4555,7 @@ u32 writeDelayRebuildProgram(RoseBuildImpl &build, build_context &bc, RoseProgram prog; makeCheckLiteralInstruction(build, bc, lit_id, prog); makeCheckLitMaskInstruction(build, bc, lit_id, prog); - makePushDelayedInstructions(build, bc, lit_id, prog); + makePushDelayedInstructions(build, prog_build, lit_id, prog); blocks.push_back(move(prog)); } @@ -4700,7 +4709,8 @@ void groupByFragment(RoseBuildImpl &build) { * \brief Build the interpreter programs for each literal. */ static -void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { +void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc, + ProgramBuild &prog_build) { // Build a reverse mapping from fragment -> {lit_id, lit_id,...} map> frag_to_lit_map; for (u32 lit_id = 0; lit_id < verify_u32(build.literal_info.size()); @@ -4720,10 +4730,10 @@ void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { DEBUG_PRINTF("frag_id=%u, lit_ids=[%s]\n", frag.fragment_id, as_string_list(lit_ids).c_str()); - frag.lit_program_offset = - writeLiteralProgram(build, bc, lit_ids, lit_edge_map, false); + frag.lit_program_offset = writeLiteralProgram( + build, bc, prog_build, lit_ids, lit_edge_map, false); frag.delay_program_offset = - writeDelayRebuildProgram(build, bc, lit_ids); + writeDelayRebuildProgram(build, bc, prog_build, lit_ids); } } @@ -4734,7 +4744,8 @@ void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { * programs. */ static -pair writeDelayPrograms(RoseBuildImpl &build, build_context &bc) { +pair writeDelayPrograms(RoseBuildImpl &build, build_context &bc, + ProgramBuild &prog_build) { auto lit_edge_map = findEdgesByLiteral(build); vector programs; // program offsets indexed by (delayed) lit id @@ -4749,8 +4760,8 @@ pair writeDelayPrograms(RoseBuildImpl &build, build_context &bc) { for (const auto &delayed_lit_id : info.delayed_ids) { DEBUG_PRINTF("lit id %u delay id %u\n", lit_id, delayed_lit_id); - u32 offset = writeLiteralProgram(build, bc, {delayed_lit_id}, - lit_edge_map, false); + u32 offset = writeLiteralProgram( + build, bc, prog_build, {delayed_lit_id}, lit_edge_map, false); u32 delay_id; auto it = cache.find(offset); @@ -4765,7 +4776,7 @@ pair writeDelayPrograms(RoseBuildImpl &build, build_context &bc) { DEBUG_PRINTF("assigned new delay_id %u for offset %u\n", delay_id, offset); } - bc.delay_programs.emplace(delayed_lit_id, delay_id); + prog_build.delay_programs.emplace(delayed_lit_id, delay_id); } } @@ -4781,7 +4792,8 @@ pair writeDelayPrograms(RoseBuildImpl &build, build_context &bc) { * programs. */ static -pair writeAnchoredPrograms(RoseBuildImpl &build, build_context &bc) { +pair writeAnchoredPrograms(RoseBuildImpl &build, build_context &bc, + ProgramBuild &prog_build) { auto lit_edge_map = findEdgesByLiteral(build); vector programs; // program offsets indexed by anchored id @@ -4808,8 +4820,8 @@ pair writeAnchoredPrograms(RoseBuildImpl &build, build_context &bc) { continue; } - u32 offset = - writeLiteralProgram(build, bc, {lit_id}, lit_edge_map, true); + u32 offset = writeLiteralProgram(build, bc, prog_build, {lit_id}, + lit_edge_map, true); DEBUG_PRINTF("lit_id=%u -> anch prog at %u\n", lit_id, offset); u32 anch_id; @@ -4824,7 +4836,7 @@ pair writeAnchoredPrograms(RoseBuildImpl &build, build_context &bc) { DEBUG_PRINTF("assigned new anch_id %u for offset %u\n", anch_id, offset); } - bc.anchored_programs.emplace(lit_id, anch_id); + prog_build.anchored_programs.emplace(lit_id, anch_id); } DEBUG_PRINTF("%zu anchored programs\n", programs.size()); @@ -4885,7 +4897,8 @@ pair buildReportPrograms(RoseBuildImpl &build, build_context &bc) { static RoseProgram makeEodAnchorProgram(RoseBuildImpl &build, build_context &bc, - const RoseEdge &e, const bool multiple_preds) { + ProgramBuild &prog_build, const RoseEdge &e, + const bool multiple_preds) { const RoseGraph &g = build.g; const RoseVertex v = target(e, g); @@ -4897,7 +4910,7 @@ RoseProgram makeEodAnchorProgram(RoseBuildImpl &build, build_context &bc, if (multiple_preds) { // Only necessary when there is more than one pred. - makeRoleCheckNotHandled(bc, v, program); + makeRoleCheckNotHandled(prog_build, v, program); } const auto &reports = g[v].reports; @@ -4940,7 +4953,8 @@ bool hasEodMatcher(const RoseBuildImpl &build) { static void addEodAnchorProgram(RoseBuildImpl &build, build_context &bc, - bool in_etable, RoseProgram &program) { + ProgramBuild &prog_build, bool in_etable, + RoseProgram &program) { const RoseGraph &g = build.g; // Predecessor state id -> program block. @@ -4975,7 +4989,7 @@ void addEodAnchorProgram(RoseBuildImpl &build, build_context &bc, assert(contains(bc.roleStateIndices, u)); u32 pred_state = bc.roleStateIndices.at(u); pred_blocks[pred_state].add_block( - makeEodAnchorProgram(build, bc, e, multiple_preds)); + makeEodAnchorProgram(build, bc, prog_build, e, multiple_preds)); } } @@ -4984,7 +4998,7 @@ void addEodAnchorProgram(RoseBuildImpl &build, build_context &bc, static void addEodEventProgram(RoseBuildImpl &build, build_context &bc, - RoseProgram &program) { + ProgramBuild &prog_build, RoseProgram &program) { if (build.eod_event_literal_id == MO_INVALID_IDX) { return; } @@ -5010,8 +5024,8 @@ void addEodEventProgram(RoseBuildImpl &build, build_context &bc, tie(g[source(b, g)].index, g[target(b, g)].index); }); - program.add_block(buildLiteralProgram(build, bc, build.eod_event_literal_id, - edge_list, false)); + program.add_block(buildLiteralProgram( + build, bc, prog_build, build.eod_event_literal_id, edge_list, false)); } static @@ -5049,14 +5063,14 @@ void addMatcherEodProgram(const RoseBuildImpl &build, RoseProgram &program) { static u32 writeEodProgram(RoseBuildImpl &build, build_context &bc, - u32 eodNfaIterOffset) { + ProgramBuild &prog_build, u32 eodNfaIterOffset) { RoseProgram program; - addEodEventProgram(build, bc, program); + addEodEventProgram(build, bc, prog_build, program); addEnginesEodProgram(eodNfaIterOffset, program); - addEodAnchorProgram(build, bc, false, program); + addEodAnchorProgram(build, bc, prog_build, false, program); addMatcherEodProgram(build, program); - addEodAnchorProgram(build, bc, true, program); + addEodAnchorProgram(build, bc, prog_build, true, program); addSuffixesEodProgram(build, program); if (program.empty()) { @@ -5312,8 +5326,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { bc.resources.has_anchored = true; } bc.needs_mpv_catchup = needsMpvCatchup(*this); - bc.vertex_group_map = getVertexGroupMap(*this); - bc.squashable_groups = getSquashableGroups(*this); auto boundary_out = makeBoundaryPrograms(*this, bc, boundary, dboundary); @@ -5360,18 +5372,25 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { queue_count - leftfixBeginQueue, leftInfoTable, &laggedRoseCount, &historyRequired); + // Information only needed for program construction. + ProgramBuild prog_build; + prog_build.vertex_group_map = getVertexGroupMap(*this); + prog_build.squashable_groups = getSquashableGroups(*this); + u32 anchoredProgramOffset; u32 anchoredProgramCount; tie(anchoredProgramOffset, anchoredProgramCount) = - writeAnchoredPrograms(*this, bc); + writeAnchoredPrograms(*this, bc, prog_build); u32 delayProgramOffset; u32 delayProgramCount; - tie(delayProgramOffset, delayProgramCount) = writeDelayPrograms(*this, bc); + tie(delayProgramOffset, delayProgramCount) = + writeDelayPrograms(*this, bc, prog_build); - buildLiteralPrograms(*this, bc); + buildLiteralPrograms(*this, bc, prog_build); - u32 eodProgramOffset = writeEodProgram(*this, bc, eodNfaIterOffset); + u32 eodProgramOffset = + writeEodProgram(*this, bc, prog_build, eodNfaIterOffset); size_t longLitStreamStateRequired = 0; u32 longLitTableOffset = buildLongLiteralTable(*this, bc.engine_blob, @@ -5574,7 +5593,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->queueCount = queue_count; engine->activeQueueArraySize = fatbit_size(queue_count); engine->eagerIterOffset = eagerIterOffset; - engine->handledKeyCount = bc.handledKeys.size(); + engine->handledKeyCount = prog_build.handledKeys.size(); engine->handledKeyFatbitSize = fatbit_size(engine->handledKeyCount); engine->rolesWithStateCount = bc.numStates; From 90216921b068fa4891aea313061654cef3cdbd96 Mon Sep 17 00:00:00 2001 From: "Wang, Xiang W" Date: Mon, 23 Jan 2017 17:15:40 -0500 Subject: [PATCH 109/326] FDR: front end loop improvement --- src/fdr/fdr.c | 290 ++++++++++++++++------------- src/fdr/fdr_engine_description.cpp | 2 +- src/fdr/flood_runtime.h | 4 +- src/util/simd_utils.h | 14 +- 4 files changed, 169 insertions(+), 141 deletions(-) diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c index a965ba14..e15c6c33 100644 --- a/src/fdr/fdr.c +++ b/src/fdr/fdr.c @@ -35,6 +35,7 @@ #include "teddy.h" #include "teddy_internal.h" #include "util/simd_utils.h" +#include "util/uniform_ops.h" /** \brief number of bytes processed in each iteration */ #define ITER_BYTES 16 @@ -51,7 +52,7 @@ * * The incoming buffer is to split in multiple zones to ensure two properties: * 1: that we can read 8? bytes behind to generate a hash safely - * 2: that we can read the byte after the current byte (domain > 8) + * 2: that we can read the 3 byte after the current byte (domain > 8) */ struct zone { /** \brief copied buffer, used only when it is a boundary zone. */ @@ -116,20 +117,34 @@ const ALIGN_CL_DIRECTIVE u8 zone_or_mask[ITER_BYTES+1][ITER_BYTES] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }; +/* compilers don't reliably synthesize the ANDN instruction here, + * so we force its generation. + */ +static really_inline +u64a andn(const u32 a, const u32 *b) { + u64a r; +#if defined(__BMI__) + __asm__ ("andn\t%2,%1,%k0" : "=r"(r) : "r"(a), "m"(*b)); +#else + r = *b & ~a; +#endif + return r; +} + /* generates an initial state mask based on the last byte-ish of history rather * than being all accepting. If there is no history to consider, the state is * generated based on the minimum length of each bucket in order to prevent * confirms. */ static really_inline -m128 getInitState(const struct FDR *fdr, u8 len_history, const u8 *ft, +m128 getInitState(const struct FDR *fdr, u8 len_history, const u64a *ft, const struct zone *z) { m128 s; if (len_history) { /* +1: the zones ensure that we can read the byte at z->end */ u32 tmp = lv_u16(z->start + z->shift - 1, z->buf, z->end + 1); tmp &= fdr->domainMask; - s = *((const m128 *)ft + tmp); + s = load_m128_from_u64a(ft + tmp); s = rshiftbyte_m128(s, 1); } else { s = fdr->start; @@ -138,51 +153,30 @@ m128 getInitState(const struct FDR *fdr, u8 len_history, const u8 *ft, } static really_inline -void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr, - u64a domain_mask_adjusted, const u8 *ft, u64a *conf0, - u64a *conf8, m128 *s) { +void get_conf_stride_1(const u8 *itPtr, UNUSED const u8 *start_ptr, + UNUSED const u8 *end_ptr, u32 domain_mask_flipped, + const u64a *ft, u64a *conf0, u64a *conf8, m128 *s) { /* +1: the zones ensure that we can read the byte at z->end */ + assert(itPtr >= start_ptr && itPtr + ITER_BYTES <= end_ptr); + u64a reach0 = andn(domain_mask_flipped, (const u32 *)(itPtr)); + u64a reach1 = andn(domain_mask_flipped, (const u32 *)(itPtr + 1)); + u64a reach2 = andn(domain_mask_flipped, (const u32 *)(itPtr + 2)); + u64a reach3 = andn(domain_mask_flipped, (const u32 *)(itPtr + 3)); - u64a current_data_0; - u64a current_data_8; + m128 st0 = load_m128_from_u64a(ft + reach0); + m128 st1 = load_m128_from_u64a(ft + reach1); + m128 st2 = load_m128_from_u64a(ft + reach2); + m128 st3 = load_m128_from_u64a(ft + reach3); - current_data_0 = lv_u64a(itPtr + 0, start_ptr, end_ptr); - u64a v7 = (lv_u16(itPtr + 7, start_ptr, end_ptr + 1) << 1) & - domain_mask_adjusted; - u64a v0 = (current_data_0 << 1) & domain_mask_adjusted; - u64a v1 = (current_data_0 >> 7) & domain_mask_adjusted; - u64a v2 = (current_data_0 >> 15) & domain_mask_adjusted; - u64a v3 = (current_data_0 >> 23) & domain_mask_adjusted; - u64a v4 = (current_data_0 >> 31) & domain_mask_adjusted; - u64a v5 = (current_data_0 >> 39) & domain_mask_adjusted; - u64a v6 = (current_data_0 >> 47) & domain_mask_adjusted; - current_data_8 = lv_u64a(itPtr + 8, start_ptr, end_ptr); - u64a v15 = (lv_u16(itPtr + 15, start_ptr, end_ptr + 1) << 1) & - domain_mask_adjusted; - u64a v8 = (current_data_8 << 1) & domain_mask_adjusted; - u64a v9 = (current_data_8 >> 7) & domain_mask_adjusted; - u64a v10 = (current_data_8 >> 15) & domain_mask_adjusted; - u64a v11 = (current_data_8 >> 23) & domain_mask_adjusted; - u64a v12 = (current_data_8 >> 31) & domain_mask_adjusted; - u64a v13 = (current_data_8 >> 39) & domain_mask_adjusted; - u64a v14 = (current_data_8 >> 47) & domain_mask_adjusted; + u64a reach4 = andn(domain_mask_flipped, (const u32 *)(itPtr + 4)); + u64a reach5 = andn(domain_mask_flipped, (const u32 *)(itPtr + 5)); + u64a reach6 = andn(domain_mask_flipped, (const u32 *)(itPtr + 6)); + u64a reach7 = andn(domain_mask_flipped, (const u32 *)(itPtr + 7)); - m128 st0 = *(const m128 *)(ft + v0*8); - m128 st1 = *(const m128 *)(ft + v1*8); - m128 st2 = *(const m128 *)(ft + v2*8); - m128 st3 = *(const m128 *)(ft + v3*8); - m128 st4 = *(const m128 *)(ft + v4*8); - m128 st5 = *(const m128 *)(ft + v5*8); - m128 st6 = *(const m128 *)(ft + v6*8); - m128 st7 = *(const m128 *)(ft + v7*8); - m128 st8 = *(const m128 *)(ft + v8*8); - m128 st9 = *(const m128 *)(ft + v9*8); - m128 st10 = *(const m128 *)(ft + v10*8); - m128 st11 = *(const m128 *)(ft + v11*8); - m128 st12 = *(const m128 *)(ft + v12*8); - m128 st13 = *(const m128 *)(ft + v13*8); - m128 st14 = *(const m128 *)(ft + v14*8); - m128 st15 = *(const m128 *)(ft + v15*8); + m128 st4 = load_m128_from_u64a(ft + reach4); + m128 st5 = load_m128_from_u64a(ft + reach5); + m128 st6 = load_m128_from_u64a(ft + reach6); + m128 st7 = load_m128_from_u64a(ft + reach7); st1 = lshiftbyte_m128(st1, 1); st2 = lshiftbyte_m128(st2, 2); @@ -191,6 +185,40 @@ void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr, st5 = lshiftbyte_m128(st5, 5); st6 = lshiftbyte_m128(st6, 6); st7 = lshiftbyte_m128(st7, 7); + + st0 = or128(st0, st1); + st2 = or128(st2, st3); + st4 = or128(st4, st5); + st6 = or128(st6, st7); + st0 = or128(st0, st2); + st4 = or128(st4, st6); + st0 = or128(st0, st4); + *s = or128(*s, st0); + + *conf0 = movq(*s); + *s = rshiftbyte_m128(*s, 8); + *conf0 ^= ~0ULL; + + u64a reach8 = andn(domain_mask_flipped, (const u32 *)(itPtr + 8)); + u64a reach9 = andn(domain_mask_flipped, (const u32 *)(itPtr + 9)); + u64a reach10 = andn(domain_mask_flipped, (const u32 *)(itPtr + 10)); + u64a reach11 = andn(domain_mask_flipped, (const u32 *)(itPtr + 11)); + + m128 st8 = load_m128_from_u64a(ft + reach8); + m128 st9 = load_m128_from_u64a(ft + reach9); + m128 st10 = load_m128_from_u64a(ft + reach10); + m128 st11 = load_m128_from_u64a(ft + reach11); + + u64a reach12 = andn(domain_mask_flipped, (const u32 *)(itPtr + 12)); + u64a reach13 = andn(domain_mask_flipped, (const u32 *)(itPtr + 13)); + u64a reach14 = andn(domain_mask_flipped, (const u32 *)(itPtr + 14)); + u64a reach15 = andn(domain_mask_flipped, (const u32 *)(itPtr + 15)); + + m128 st12 = load_m128_from_u64a(ft + reach12); + m128 st13 = load_m128_from_u64a(ft + reach13); + m128 st14 = load_m128_from_u64a(ft + reach14); + m128 st15 = load_m128_from_u64a(ft + reach15); + st9 = lshiftbyte_m128(st9, 1); st10 = lshiftbyte_m128(st10, 2); st11 = lshiftbyte_m128(st11, 3); @@ -199,100 +227,86 @@ void get_conf_stride_1(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr, st14 = lshiftbyte_m128(st14, 6); st15 = lshiftbyte_m128(st15, 7); - *s = or128(*s, st0); - *s = or128(*s, st1); - *s = or128(*s, st2); - *s = or128(*s, st3); - *s = or128(*s, st4); - *s = or128(*s, st5); - *s = or128(*s, st6); - *s = or128(*s, st7); - *conf0 = movq(*s); - *s = rshiftbyte_m128(*s, 8); - *conf0 ^= ~0ULL; - + st8 = or128(st8, st9); + st10 = or128(st10, st11); + st12 = or128(st12, st13); + st14 = or128(st14, st15); + st8 = or128(st8, st10); + st12 = or128(st12, st14); + st8 = or128(st8, st12); *s = or128(*s, st8); - *s = or128(*s, st9); - *s = or128(*s, st10); - *s = or128(*s, st11); - *s = or128(*s, st12); - *s = or128(*s, st13); - *s = or128(*s, st14); - *s = or128(*s, st15); + *conf8 = movq(*s); *s = rshiftbyte_m128(*s, 8); *conf8 ^= ~0ULL; } static really_inline -void get_conf_stride_2(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr, - u64a domain_mask_adjusted, const u8 *ft, u64a *conf0, - u64a *conf8, m128 *s) { - u64a current_data_0; - u64a current_data_8; +void get_conf_stride_2(const u8 *itPtr, UNUSED const u8 *start_ptr, + UNUSED const u8 *end_ptr, u32 domain_mask_flipped, + const u64a *ft, u64a *conf0, u64a *conf8, m128 *s) { + assert(itPtr >= start_ptr && itPtr + ITER_BYTES <= end_ptr); + u64a reach0 = andn(domain_mask_flipped, (const u32 *)itPtr); + u64a reach2 = andn(domain_mask_flipped, (const u32 *)(itPtr + 2)); + u64a reach4 = andn(domain_mask_flipped, (const u32 *)(itPtr + 4)); + u64a reach6 = andn(domain_mask_flipped, (const u32 *)(itPtr + 6)); - current_data_0 = lv_u64a(itPtr + 0, start_ptr, end_ptr); - u64a v0 = (current_data_0 << 1) & domain_mask_adjusted; - u64a v2 = (current_data_0 >> 15) & domain_mask_adjusted; - u64a v4 = (current_data_0 >> 31) & domain_mask_adjusted; - u64a v6 = (current_data_0 >> 47) & domain_mask_adjusted; - current_data_8 = lv_u64a(itPtr + 8, start_ptr, end_ptr); - u64a v8 = (current_data_8 << 1) & domain_mask_adjusted; - u64a v10 = (current_data_8 >> 15) & domain_mask_adjusted; - u64a v12 = (current_data_8 >> 31) & domain_mask_adjusted; - u64a v14 = (current_data_8 >> 47) & domain_mask_adjusted; + m128 st0 = load_m128_from_u64a(ft + reach0); + m128 st2 = load_m128_from_u64a(ft + reach2); + m128 st4 = load_m128_from_u64a(ft + reach4); + m128 st6 = load_m128_from_u64a(ft + reach6); - m128 st0 = *(const m128 *)(ft + v0*8); - m128 st2 = *(const m128 *)(ft + v2*8); - m128 st4 = *(const m128 *)(ft + v4*8); - m128 st6 = *(const m128 *)(ft + v6*8); - m128 st8 = *(const m128 *)(ft + v8*8); - m128 st10 = *(const m128 *)(ft + v10*8); - m128 st12 = *(const m128 *)(ft + v12*8); - m128 st14 = *(const m128 *)(ft + v14*8); + u64a reach8 = andn(domain_mask_flipped, (const u32 *)(itPtr + 8)); + u64a reach10 = andn(domain_mask_flipped, (const u32 *)(itPtr + 10)); + u64a reach12 = andn(domain_mask_flipped, (const u32 *)(itPtr + 12)); + u64a reach14 = andn(domain_mask_flipped, (const u32 *)(itPtr + 14)); + + m128 st8 = load_m128_from_u64a(ft + reach8); + m128 st10 = load_m128_from_u64a(ft + reach10); + m128 st12 = load_m128_from_u64a(ft + reach12); + m128 st14 = load_m128_from_u64a(ft + reach14); st2 = lshiftbyte_m128(st2, 2); st4 = lshiftbyte_m128(st4, 4); st6 = lshiftbyte_m128(st6, 6); - st10 = lshiftbyte_m128(st10, 2); - st12 = lshiftbyte_m128(st12, 4); - st14 = lshiftbyte_m128(st14, 6); *s = or128(*s, st0); *s = or128(*s, st2); *s = or128(*s, st4); *s = or128(*s, st6); + *conf0 = movq(*s); *s = rshiftbyte_m128(*s, 8); *conf0 ^= ~0ULL; + st10 = lshiftbyte_m128(st10, 2); + st12 = lshiftbyte_m128(st12, 4); + st14 = lshiftbyte_m128(st14, 6); + *s = or128(*s, st8); *s = or128(*s, st10); *s = or128(*s, st12); *s = or128(*s, st14); + *conf8 = movq(*s); *s = rshiftbyte_m128(*s, 8); *conf8 ^= ~0ULL; } static really_inline -void get_conf_stride_4(const u8 *itPtr, const u8 *start_ptr, const u8 *end_ptr, - u64a domain_mask_adjusted, const u8 *ft, u64a *conf0, - u64a *conf8, m128 *s) { - u64a current_data_0; - u64a current_data_8; +void get_conf_stride_4(const u8 *itPtr, UNUSED const u8 *start_ptr, + UNUSED const u8 *end_ptr, u32 domain_mask_flipped, + const u64a *ft, u64a *conf0, u64a *conf8, m128 *s) { + assert(itPtr >= start_ptr && itPtr + ITER_BYTES <= end_ptr); + u64a reach0 = andn(domain_mask_flipped, (const u32 *)itPtr); + u64a reach4 = andn(domain_mask_flipped, (const u32 *)(itPtr + 4)); + u64a reach8 = andn(domain_mask_flipped, (const u32 *)(itPtr + 8)); + u64a reach12 = andn(domain_mask_flipped, (const u32 *)(itPtr + 12)); - current_data_0 = lv_u64a(itPtr + 0, start_ptr, end_ptr); - u64a v0 = (current_data_0 << 1) & domain_mask_adjusted; - u64a v4 = (current_data_0 >> 31) & domain_mask_adjusted; - current_data_8 = lv_u64a(itPtr + 8, start_ptr, end_ptr); - u64a v8 = (current_data_8 << 1) & domain_mask_adjusted; - u64a v12 = (current_data_8 >> 31) & domain_mask_adjusted; - - m128 st0 = *(const m128 *)(ft + v0*8); - m128 st4 = *(const m128 *)(ft + v4*8); - m128 st8 = *(const m128 *)(ft + v8*8); - m128 st12 = *(const m128 *)(ft + v12*8); + m128 st0 = load_m128_from_u64a(ft + reach0); + m128 st4 = load_m128_from_u64a(ft + reach4); + m128 st8 = load_m128_from_u64a(ft + reach8); + m128 st12 = load_m128_from_u64a(ft + reach12); st4 = lshiftbyte_m128(st4, 4); st12 = lshiftbyte_m128(st12, 4); @@ -494,6 +508,7 @@ void createShortZone(const u8 *buf, const u8 *hend, const u8 *begin, /* copy the post-padding byte; this is required for domain > 8 due to * overhang */ + assert(ZONE_SHORT_DATA_OFFSET + copy_len + 3 < 64); *z_end = 0; z->end = z_end; @@ -564,15 +579,19 @@ void createStartZone(const u8 *buf, const u8 *hend, const u8 *begin, storeu128(z_end - sizeof(m128), loadu128(end - sizeof(m128))); z->zone_pointer_adjust = (ptrdiff_t)((uintptr_t)end - (uintptr_t)z_end); + + assert(ZONE_START_BEGIN + copy_len + 3 < 64); } /** * \brief Create a zone for the end region. * * This function requires that there is > ITER_BYTES of data in the buffer to - * scan. The end zone, however, is only responsible for a scanning the <= - * ITER_BYTES rump of data. The end zone is required to handle a full ITER_BYTES - * iteration as the main loop cannot handle the last byte of the buffer. + * scan. The end zone is responsible for a scanning the <= ITER_BYTES rump of + * data and optional ITER_BYTES. The main zone cannot handle the last 3 bytes + * of the buffer. The end zone is required to handle an optional full + * ITER_BYTES from main zone when there are less than 3 bytes to scan. The + * main zone size is reduced by ITER_BYTES in this case. * * This zone ensures that the byte at z->end can be read by filling it with a * padding character. @@ -590,31 +609,45 @@ void createEndZone(const u8 *buf, const u8 *begin, const u8 *end, ptrdiff_t z_len = end - begin; assert(z_len > 0); - assert(z_len <= ITER_BYTES); + size_t iter_bytes_second = 0; + size_t z_len_first = z_len; + if (z_len > ITER_BYTES) { + z_len_first = z_len - ITER_BYTES; + iter_bytes_second = ITER_BYTES; + } + z->shift = ITER_BYTES - z_len_first; - z->shift = ITER_BYTES - z_len; + const u8 *end_first = end - iter_bytes_second; + /* The amount of data we have to copy from main buffer for the + * first iteration. */ + size_t copy_len_first = MIN((size_t)(end_first - buf), + ITER_BYTES + sizeof(CONF_TYPE)); + assert(copy_len_first >= 16); - /* The amount of data we have to copy from main buffer. */ - size_t copy_len = MIN((size_t)(end - buf), - ITER_BYTES + sizeof(CONF_TYPE)); - assert(copy_len >= 16); + size_t total_copy_len = copy_len_first + iter_bytes_second; + assert(total_copy_len + 3 < 64); /* copy the post-padding byte; this is required for domain > 8 due to * overhang */ - z->buf[copy_len] = 0; + z->buf[total_copy_len] = 0; /* set the start and end location of the zone buf * to be scanned */ - u8 *z_end = z->buf + copy_len; + u8 *z_end = z->buf + total_copy_len; z->end = z_end; - z->start = z_end - ITER_BYTES; + z->start = z_end - ITER_BYTES - iter_bytes_second; assert(z->start + z->shift == z_end - z_len); + u8 *z_end_first = z_end - iter_bytes_second; /* copy the first 8 bytes of the valid region */ - unaligned_store_u64a(z->buf, unaligned_load_u64a(end - copy_len)); + unaligned_store_u64a(z->buf, + unaligned_load_u64a(end_first - copy_len_first)); /* copy the last 16 bytes, may overlap with the previous 8 byte write */ - storeu128(z_end - sizeof(m128), loadu128(end - sizeof(m128))); + storeu128(z_end_first - sizeof(m128), loadu128(end_first - sizeof(m128))); + if (iter_bytes_second) { + storeu128(z_end - sizeof(m128), loadu128(end - sizeof(m128))); + } z->zone_pointer_adjust = (ptrdiff_t)((uintptr_t)end - (uintptr_t)z_end); } @@ -649,13 +682,13 @@ size_t prepareZones(const u8 *buf, size_t len, const u8 *hend, /* find maximum buffer location that the main zone can scan * - must be a multiple of ITER_BYTES, and - * - cannot contain the last byte (due to overhang) + * - cannot contain the last 3 bytes (due to 3 bytes read behind the + end of buffer in FDR main loop) */ - const u8 *main_end = buf + start + ROUNDDOWN_N(len - start - 1, ITER_BYTES); - assert(main_end >= ptr); + const u8 *main_end = buf + start + ROUNDDOWN_N(len - start - 3, ITER_BYTES); /* create a zone if multiple of ITER_BYTES are found */ - if (main_end != ptr) { + if (main_end > ptr) { createMainZone(flood, ptr, main_end, &zoneArr[numZone++]); ptr = main_end; } @@ -682,10 +715,10 @@ size_t prepareZones(const u8 *buf, size_t len, const u8 *hend, return HWLM_TERMINATED; \ } \ } \ - __builtin_prefetch(itPtr + (ITER_BYTES*4)); \ + __builtin_prefetch(itPtr + ITER_BYTES); \ u64a conf0; \ u64a conf8; \ - get_conf_fn(itPtr, start_ptr, end_ptr, domain_mask_adjusted, \ + get_conf_fn(itPtr, start_ptr, end_ptr, domain_mask_flipped, \ ft, &conf0, &conf8, &s); \ do_confirm_fdr(&conf0, 0, &control, confBase, a, itPtr, \ &last_match_id, zz); \ @@ -703,10 +736,11 @@ hwlm_error_t fdr_engine_exec(const struct FDR *fdr, hwlm_group_t control) { u32 floodBackoff = FLOOD_BACKOFF_START; u32 last_match_id = INVALID_MATCH_ID; - u64a domain_mask_adjusted = fdr->domainMask << 1; + u32 domain_mask_flipped = ~fdr->domainMask; u8 stride = fdr->stride; - const u8 *ft = (const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR)); - const u32 *confBase = (const u32 *)(ft + fdr->tabSize); + const u64a *ft = + (const u64a *)((const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR))); + const u32 *confBase = (const u32 *)((const u8 *)ft + fdr->tabSize); struct zone zones[ZONE_MAX]; assert(fdr->domain > 8 && fdr->domain < 16); diff --git a/src/fdr/fdr_engine_description.cpp b/src/fdr/fdr_engine_description.cpp index e44bfbb5..2f9ba420 100644 --- a/src/fdr/fdr_engine_description.cpp +++ b/src/fdr/fdr_engine_description.cpp @@ -54,7 +54,7 @@ u32 FDREngineDescription::getDefaultFloodSuffixLength() const { } void getFdrDescriptions(vector *out) { - static const FDREngineDef def = {0, 128, 8, 0}; + static const FDREngineDef def = {0, 64, 8, 0}; out->clear(); out->emplace_back(def); } diff --git a/src/fdr/flood_runtime.h b/src/fdr/flood_runtime.h index 97723be5..d3f6b3b2 100644 --- a/src/fdr/flood_runtime.h +++ b/src/fdr/flood_runtime.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -100,7 +100,7 @@ const u8 * floodDetect(const struct FDR * fdr, // tryFloodDetect is never put in places where unconditional // reads a short distance forward or backward here // TODO: rationale for this line needs to be rediscovered!! - size_t mainLoopLen = len > iterBytes ? len - iterBytes : 0; + size_t mainLoopLen = len > 2 * iterBytes ? len - 2 * iterBytes : 0; const u32 i = ptr - buf; u32 j = i; diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index e8676249..c6d43f57 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -180,15 +180,9 @@ static really_inline u64a movq(const m128 in) { /* another form of movq */ static really_inline m128 load_m128_from_u64a(const u64a *p) { -#if defined(__GNUC__) && !defined(__INTEL_COMPILER) - /* unfortunately _mm_loadl_epi64() is best avoided as it seems to cause - * trouble on some older compilers, possibly because it is misdefined to - * take an m128 as its parameter */ - return _mm_set_epi64((__m64)0ULL, (__m64)*p); -#else - /* ICC doesn't like casting to __m64 */ - return _mm_loadl_epi64((const m128 *)p); -#endif + m128 out; + __asm__ ("vmovq\t%1,%0" : "=x"(out) :"m"(*p)); + return out; } #define rshiftbyte_m128(a, count_immed) _mm_srli_si128(a, count_immed) From f4bda9def66f37720c9dd40a2f62fc100879137d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 23 Feb 2017 16:36:28 +1100 Subject: [PATCH 110/326] rose: build RoseEngine prototype for bytecode Rather than tracking a great many offsets in u32 variables, build them in a RoseEngine prototype. --- src/rose/rose_build_bytecode.cpp | 411 ++++++++++++++----------------- src/rose/rose_build_scatter.cpp | 22 +- src/rose/rose_build_scatter.h | 12 +- 3 files changed, 198 insertions(+), 247 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index c08713df..bb8cb8a4 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -481,7 +481,7 @@ bool needsMpvCatchup(const RoseBuildImpl &build) { } static -void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount, +void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount, u32 anchorStateSize, u32 activeArrayCount, u32 activeLeftCount, u32 laggedRoseCount, u32 longLitStreamStateRequired, u32 historyRequired, @@ -513,7 +513,7 @@ void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount, curr_offset += anchorStateSize; so->groups = curr_offset; - so->groups_size = (tbi.group_end + 7) / 8; + so->groups_size = (build.group_end + 7) / 8; assert(so->groups_size <= sizeof(u64a)); curr_offset += so->groups_size; @@ -523,22 +523,22 @@ void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount, // Exhaustion multibit. so->exhausted = curr_offset; - curr_offset += mmbit_size(tbi.rm.numEkeys()); + curr_offset += mmbit_size(build.rm.numEkeys()); // SOM locations and valid/writeable multibit structures. - if (tbi.ssm.numSomSlots()) { - const u32 somWidth = tbi.ssm.somPrecision(); + if (build.ssm.numSomSlots()) { + const u32 somWidth = build.ssm.somPrecision(); if (somWidth) { // somWidth is zero in block mode. curr_offset = ROUNDUP_N(curr_offset, somWidth); so->somLocation = curr_offset; - curr_offset += tbi.ssm.numSomSlots() * somWidth; + curr_offset += build.ssm.numSomSlots() * somWidth; } else { so->somLocation = 0; } so->somValid = curr_offset; - curr_offset += mmbit_size(tbi.ssm.numSomSlots()); + curr_offset += mmbit_size(build.ssm.numSomSlots()); so->somWritable = curr_offset; - curr_offset += mmbit_size(tbi.ssm.numSomSlots()); + curr_offset += mmbit_size(build.ssm.numSomSlots()); } else { // No SOM handling, avoid growing the stream state any further. so->somLocation = 0; @@ -2310,17 +2310,16 @@ u32 findMinFloatingLiteralMatch(const RoseBuildImpl &build, } static -void buildSuffixEkeyLists(const RoseBuildImpl &tbi, build_context &bc, - const QueueIndexFactory &qif, - vector *out) { - out->resize(qif.allocated_count()); +vector buildSuffixEkeyLists(const RoseBuildImpl &build, build_context &bc, + const QueueIndexFactory &qif) { + vector out(qif.allocated_count()); - map > qi_to_ekeys; /* for determinism */ + map> qi_to_ekeys; /* for determinism */ for (const auto &e : bc.suffixes) { const suffix_id &s = e.first; u32 qi = e.second; - set ekeys = reportsToEkeys(all_reports(s), tbi.rm); + set ekeys = reportsToEkeys(all_reports(s), build.rm); if (!ekeys.empty()) { qi_to_ekeys[qi] = {ekeys.begin(), ekeys.end()}; @@ -2328,9 +2327,9 @@ void buildSuffixEkeyLists(const RoseBuildImpl &tbi, build_context &bc, } /* for each outfix also build elists */ - for (const auto &outfix : tbi.outfixes) { + for (const auto &outfix : build.outfixes) { u32 qi = outfix.get_queue(); - set ekeys = reportsToEkeys(all_reports(outfix), tbi.rm); + set ekeys = reportsToEkeys(all_reports(outfix), build.rm); if (!ekeys.empty()) { qi_to_ekeys[qi] = {ekeys.begin(), ekeys.end()}; @@ -2338,11 +2337,14 @@ void buildSuffixEkeyLists(const RoseBuildImpl &tbi, build_context &bc, } for (auto &e : qi_to_ekeys) { - assert(!e.second.empty()); - e.second.push_back(INVALID_EKEY); /* terminator */ - (*out)[e.first] = bc.engine_blob.add(e.second.begin(), - e.second.end()); + u32 qi = e.first; + auto &ekeys = e.second; + assert(!ekeys.empty()); + ekeys.push_back(INVALID_EKEY); /* terminator */ + out[qi] = bc.engine_blob.add(ekeys.begin(), ekeys.end()); } + + return out; } /** Returns sparse iter offset in engine blob. */ @@ -2632,8 +2634,10 @@ u32 writeProgram(build_context &bc, RoseProgram &&program) { } static -void buildActiveLeftIter(const vector &leftTable, - vector &out) { +vector +buildActiveLeftIter(const vector &leftTable) { + vector out; + vector keys; for (size_t i = 0; i < leftTable.size(); i++) { if (!leftTable[i].transient) { @@ -2645,11 +2649,11 @@ void buildActiveLeftIter(const vector &leftTable, DEBUG_PRINTF("%zu active roses\n", keys.size()); if (keys.empty()) { - out.clear(); - return; + return out; } mmbBuildSparseIterator(out, keys, leftTable.size()); + return out; } static @@ -3376,7 +3380,7 @@ void writeSomOperation(const Report &report, som_operation *op) { } static -void makeReport(RoseBuildImpl &build, const ReportID id, +void makeReport(const RoseBuildImpl &build, const ReportID id, const bool has_som, RoseProgram &program) { assert(id < build.rm.numReports()); const Report &report = build.rm.getReport(id); @@ -3845,7 +3849,7 @@ RoseProgram makeProgram(RoseBuildImpl &build, build_context &bc, } static -u32 writeBoundaryProgram(RoseBuildImpl &build, build_context &bc, +u32 writeBoundaryProgram(const RoseBuildImpl &build, build_context &bc, const set &reports) { if (reports.empty()) { return 0; @@ -3865,13 +3869,10 @@ u32 writeBoundaryProgram(RoseBuildImpl &build, build_context &bc, } static -RoseBoundaryReports -makeBoundaryPrograms(RoseBuildImpl &build, build_context &bc, - const BoundaryReports &boundary, - const DerivedBoundaryReports &dboundary) { - RoseBoundaryReports out; - memset(&out, 0, sizeof(out)); - +void makeBoundaryPrograms(const RoseBuildImpl &build, build_context &bc, + const BoundaryReports &boundary, + const DerivedBoundaryReports &dboundary, + RoseBoundaryReports &out) { DEBUG_PRINTF("report ^: %zu\n", boundary.report_at_0.size()); DEBUG_PRINTF("report $: %zu\n", boundary.report_at_eod.size()); DEBUG_PRINTF("report ^$: %zu\n", dboundary.report_at_0_eod_full.size()); @@ -3882,8 +3883,6 @@ makeBoundaryPrograms(RoseBuildImpl &build, build_context &bc, writeBoundaryProgram(build, bc, boundary.report_at_0); out.reportZeroEodOffset = writeBoundaryProgram(build, bc, dboundary.report_at_0_eod_full); - - return out; } static @@ -5305,6 +5304,21 @@ size_t calcLongLitThreshold(const RoseBuildImpl &build, } aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { + // We keep all our offsets, counts etc. in a prototype RoseEngine which we + // will copy into the real one once it is allocated: we can't do this + // until we know how big it will be. + RoseEngine proto; + memset(&proto, 0, sizeof(proto)); + + // Set scanning mode. + if (!cc.streaming) { + proto.mode = HS_MODE_BLOCK; + } else if (cc.vectored) { + proto.mode = HS_MODE_VECTORED; + } else { + proto.mode = HS_MODE_STREAM; + } + DerivedBoundaryReports dboundary(boundary); size_t historyRequired = calcHistoryRequired(); // Updated by HWLM. @@ -5312,11 +5326,11 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { historyRequired); DEBUG_PRINTF("longLitLengthThreshold=%zu\n", longLitLengthThreshold); - build_context bc; groupByFragment(*this); auto anchored_dfas = buildAnchoredDfas(*this); + build_context bc; bc.floatingMinLiteralMatchOffset = findMinFloatingLiteralMatch(*this, anchored_dfas); bc.longLitLengthThreshold = longLitLengthThreshold; @@ -5327,32 +5341,30 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { } bc.needs_mpv_catchup = needsMpvCatchup(*this); - auto boundary_out = makeBoundaryPrograms(*this, bc, boundary, dboundary); + makeBoundaryPrograms(*this, bc, boundary, dboundary, proto.boundary); - u32 reportProgramOffset; - u32 reportProgramCount; - tie(reportProgramOffset, reportProgramCount) = + tie(proto.reportProgramOffset, proto.reportProgramCount) = buildReportPrograms(*this, bc); // Build NFAs - set no_retrigger_queues; bool mpv_as_outfix; prepMpv(*this, bc, &historyRequired, &mpv_as_outfix); - u32 outfixBeginQueue = qif.allocated_count(); + proto.outfixBeginQueue = qif.allocated_count(); if (!prepOutfixes(*this, bc, &historyRequired)) { return nullptr; } - u32 outfixEndQueue = qif.allocated_count(); - u32 leftfixBeginQueue = outfixEndQueue; + proto.outfixEndQueue = qif.allocated_count(); + proto.leftfixBeginQueue = proto.outfixEndQueue; + set no_retrigger_queues; set eager_queues; /* Note: buildNfas may reduce the lag for vertices that have prefixes */ if (!buildNfas(*this, bc, qif, &no_retrigger_queues, &eager_queues, - &leftfixBeginQueue)) { + &proto.leftfixBeginQueue)) { return nullptr; } - u32 eodNfaIterOffset = buildEodNfaIterator(bc, leftfixBeginQueue); + u32 eodNfaIterOffset = buildEodNfaIterator(bc, proto.leftfixBeginQueue); buildCountingMiracles(bc); u32 queue_count = qif.allocated_count(); /* excludes anchored matcher q; @@ -5361,15 +5373,14 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { throw ResourceLimitError(); } - vector suffixEkeyLists; - buildSuffixEkeyLists(*this, bc, qif, &suffixEkeyLists); + auto suffixEkeyLists = buildSuffixEkeyLists(*this, bc, qif); assignStateIndices(*this, bc); u32 laggedRoseCount = 0; vector leftInfoTable; - buildLeftInfoTable(*this, bc, eager_queues, leftfixBeginQueue, - queue_count - leftfixBeginQueue, leftInfoTable, + buildLeftInfoTable(*this, bc, eager_queues, proto.leftfixBeginQueue, + queue_count - proto.leftfixBeginQueue, leftInfoTable, &laggedRoseCount, &historyRequired); // Information only needed for program construction. @@ -5377,32 +5388,25 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { prog_build.vertex_group_map = getVertexGroupMap(*this); prog_build.squashable_groups = getSquashableGroups(*this); - u32 anchoredProgramOffset; - u32 anchoredProgramCount; - tie(anchoredProgramOffset, anchoredProgramCount) = + tie(proto.anchoredProgramOffset, proto.anchored_count) = writeAnchoredPrograms(*this, bc, prog_build); - u32 delayProgramOffset; - u32 delayProgramCount; - tie(delayProgramOffset, delayProgramCount) = + tie(proto.delayProgramOffset, proto.delay_count) = writeDelayPrograms(*this, bc, prog_build); buildLiteralPrograms(*this, bc, prog_build); - u32 eodProgramOffset = + proto.eodProgramOffset = writeEodProgram(*this, bc, prog_build, eodNfaIterOffset); size_t longLitStreamStateRequired = 0; - u32 longLitTableOffset = buildLongLiteralTable(*this, bc.engine_blob, + proto.longLitTableOffset = buildLongLiteralTable(*this, bc.engine_blob, bc.longLiterals, longLitLengthThreshold, &historyRequired, &longLitStreamStateRequired); - vector activeLeftIter; - buildActiveLeftIter(leftInfoTable, activeLeftIter); - - u32 lastByteOffset = buildLastByteIter(g, bc); - u32 eagerIterOffset = buildEagerQueueIter(eager_queues, leftfixBeginQueue, - queue_count, bc); + proto.lastByteHistoryIterOffset = buildLastByteIter(g, bc); + proto.eagerIterOffset = buildEagerQueueIter( + eager_queues, proto.leftfixBeginQueue, queue_count, bc); // Enforce role table resource limit. if (num_vertices(g) > cc.grey.limitRoseRoleCount) { @@ -5423,11 +5427,10 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Build anchored matcher. size_t asize = 0; - u32 amatcherOffset = 0; auto atable = buildAnchoredMatcher(*this, anchored_dfas, &asize); if (atable) { currOffset = ROUNDUP_CL(currOffset); - amatcherOffset = currOffset; + proto.amatcherOffset = currOffset; currOffset += verify_u32(asize); } @@ -5436,10 +5439,9 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { size_t fsize = 0; auto ftable = buildFloatingMatcher(*this, bc.longLitLengthThreshold, &fgroups, &fsize, &historyRequired); - u32 fmatcherOffset = 0; if (ftable) { currOffset = ROUNDUP_CL(currOffset); - fmatcherOffset = currOffset; + proto.fmatcherOffset = currOffset; currOffset += verify_u32(fsize); bc.resources.has_floating = true; } @@ -5448,64 +5450,60 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { size_t drsize = 0; auto drtable = buildDelayRebuildMatcher(*this, bc.longLitLengthThreshold, &drsize); - u32 drmatcherOffset = 0; if (drtable) { currOffset = ROUNDUP_CL(currOffset); - drmatcherOffset = currOffset; + proto.drmatcherOffset = currOffset; currOffset += verify_u32(drsize); } // Build EOD-anchored HWLM matcher. size_t esize = 0; auto etable = buildEodAnchoredMatcher(*this, &esize); - u32 ematcherOffset = 0; if (etable) { currOffset = ROUNDUP_CL(currOffset); - ematcherOffset = currOffset; + proto.ematcherOffset = currOffset; currOffset += verify_u32(esize); } // Build small-block HWLM matcher. size_t sbsize = 0; auto sbtable = buildSmallBlockMatcher(*this, &sbsize); - u32 sbmatcherOffset = 0; if (sbtable) { currOffset = ROUNDUP_CL(currOffset); - sbmatcherOffset = currOffset; + proto.sbmatcherOffset = currOffset; currOffset += verify_u32(sbsize); } - u32 leftOffset = ROUNDUP_N(currOffset, alignof(LeftNfaInfo)); - u32 roseLen = sizeof(LeftNfaInfo) * leftInfoTable.size(); - currOffset = leftOffset + roseLen; + currOffset = ROUNDUP_N(currOffset, alignof(LeftNfaInfo)); + proto.leftOffset = currOffset; + currOffset += sizeof(LeftNfaInfo) * leftInfoTable.size(); - u32 lookaroundReachOffset = currOffset; - u32 lookaroundReachLen = REACH_BITVECTOR_LEN * bc.lookaround.size(); - currOffset = lookaroundReachOffset + lookaroundReachLen; + proto.lookaroundReachOffset = currOffset; + currOffset += REACH_BITVECTOR_LEN * bc.lookaround.size(); - u32 lookaroundTableOffset = currOffset; - u32 lookaroundTableLen = sizeof(s8) * bc.lookaround.size(); - currOffset = lookaroundTableOffset + lookaroundTableLen; + proto.lookaroundTableOffset = currOffset; + currOffset += sizeof(s8) * bc.lookaround.size(); - u32 nfaInfoOffset = ROUNDUP_N(currOffset, sizeof(u32)); - u32 nfaInfoLen = sizeof(NfaInfo) * queue_count; - currOffset = nfaInfoOffset + nfaInfoLen; + currOffset = ROUNDUP_N(currOffset, sizeof(u32)); + proto.nfaInfoOffset = currOffset; + currOffset += sizeof(NfaInfo) * queue_count; - currOffset = ROUNDUP_N(currOffset, alignof(mmbit_sparse_iter)); - u32 activeLeftIterOffset = currOffset; - currOffset += activeLeftIter.size() * sizeof(mmbit_sparse_iter); + auto activeLeftIter = buildActiveLeftIter(leftInfoTable); + if (!activeLeftIter.empty()) { + currOffset = ROUNDUP_N(currOffset, alignof(mmbit_sparse_iter)); + proto.activeLeftIterOffset = currOffset; + currOffset += activeLeftIter.size() * sizeof(mmbit_sparse_iter); + } - u32 activeArrayCount = leftfixBeginQueue; - u32 activeLeftCount = leftInfoTable.size(); - u32 rosePrefixCount = countRosePrefixes(leftInfoTable); + proto.activeArrayCount = proto.leftfixBeginQueue; + proto.activeLeftCount = verify_u32(leftInfoTable.size()); + proto.rosePrefixCount = countRosePrefixes(leftInfoTable); u32 rev_nfa_table_offset; vector rev_nfa_offsets; prepSomRevNfas(ssm, &rev_nfa_table_offset, &rev_nfa_offsets, &currOffset); - // Build engine header and copy tables into place. - - u32 anchorStateSize = atable ? anchoredStateSize(*atable) : 0; + proto.anchorStateSize = atable ? anchoredStateSize(*atable) : 0; DEBUG_PRINTF("rose history required %zu\n", historyRequired); assert(!cc.streaming || historyRequired <= cc.grey.maxHistoryAvailable); @@ -5516,169 +5514,122 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { assert(!cc.streaming || historyRequired <= max(cc.grey.maxHistoryAvailable, cc.grey.somMaxRevNfaLength)); - RoseStateOffsets stateOffsets; - memset(&stateOffsets, 0, sizeof(stateOffsets)); - fillStateOffsets(*this, bc.numStates, anchorStateSize, - activeArrayCount, activeLeftCount, laggedRoseCount, - longLitStreamStateRequired, historyRequired, - &stateOffsets); + fillStateOffsets(*this, bc.numStates, proto.anchorStateSize, + proto.activeArrayCount, proto.activeLeftCount, + laggedRoseCount, longLitStreamStateRequired, + historyRequired, &proto.stateOffsets); - scatter_plan_raw state_scatter; - buildStateScatterPlan(sizeof(u8), bc.numStates, - activeLeftCount, rosePrefixCount, stateOffsets, - cc.streaming, activeArrayCount, outfixBeginQueue, - outfixEndQueue, &state_scatter); + scatter_plan_raw state_scatter = buildStateScatterPlan( + sizeof(u8), bc.numStates, proto.activeLeftCount, proto.rosePrefixCount, + proto.stateOffsets, cc.streaming, proto.activeArrayCount, + proto.outfixBeginQueue, proto.outfixEndQueue); currOffset = ROUNDUP_N(currOffset, alignof(scatter_unit_u64a)); - u32 state_scatter_aux_offset = currOffset; currOffset += aux_size(state_scatter); currOffset = ROUNDUP_N(currOffset, alignof(ReportID)); - u32 dkeyOffset = currOffset; + proto.invDkeyOffset = currOffset; currOffset += rm.numDkeys() * sizeof(ReportID); - aligned_unique_ptr engine - = aligned_zmalloc_unique(currOffset); - assert(engine); // will have thrown bad_alloc otherwise. - char *ptr = (char *)engine.get(); - assert(ISALIGNED_CL(ptr)); + proto.historyRequired = verify_u32(historyRequired); + proto.ekeyCount = rm.numEkeys(); + proto.dkeyCount = rm.numDkeys(); + proto.dkeyLogSize = fatbit_size(proto.dkeyCount); - if (atable) { - assert(amatcherOffset); - memcpy(ptr + amatcherOffset, atable.get(), asize); - } - if (ftable) { - assert(fmatcherOffset); - memcpy(ptr + fmatcherOffset, ftable.get(), fsize); - } - if (drtable) { - assert(drmatcherOffset); - memcpy(ptr + drmatcherOffset, drtable.get(), drsize); - } - if (etable) { - assert(ematcherOffset); - memcpy(ptr + ematcherOffset, etable.get(), esize); - } - if (sbtable) { - assert(sbmatcherOffset); - memcpy(ptr + sbmatcherOffset, sbtable.get(), sbsize); - } + proto.somHorizon = ssm.somPrecision(); + proto.somLocationCount = ssm.numSomSlots(); + proto.somLocationFatbitSize = fatbit_size(proto.somLocationCount); - memcpy(&engine->stateOffsets, &stateOffsets, sizeof(stateOffsets)); + proto.needsCatchup = bc.needs_catchup ? 1 : 0; - engine->historyRequired = verify_u32(historyRequired); + proto.runtimeImpl = pickRuntimeImpl(*this, bc, proto.outfixEndQueue); + proto.mpvTriggeredByLeaf = anyEndfixMpvTriggers(*this); - engine->ekeyCount = rm.numEkeys(); - engine->dkeyCount = rm.numDkeys(); - engine->dkeyLogSize = fatbit_size(engine->dkeyCount); - engine->invDkeyOffset = dkeyOffset; - copy_bytes(ptr + dkeyOffset, rm.getDkeyToReportTable()); + proto.queueCount = queue_count; + proto.activeQueueArraySize = fatbit_size(queue_count); + proto.handledKeyCount = prog_build.handledKeys.size(); + proto.handledKeyFatbitSize = fatbit_size(proto.handledKeyCount); - engine->somHorizon = ssm.somPrecision(); - engine->somLocationCount = ssm.numSomSlots(); - engine->somLocationFatbitSize = fatbit_size(engine->somLocationCount); + proto.rolesWithStateCount = bc.numStates; - engine->needsCatchup = bc.needs_catchup ? 1 : 0; + proto.roseCount = verify_u32(leftInfoTable.size()); + proto.initMpvNfa = mpv_as_outfix ? 0 : MO_INVALID_IDX; + proto.stateSize = mmbit_size(bc.numStates); - engine->reportProgramOffset = reportProgramOffset; - engine->reportProgramCount = reportProgramCount; - engine->delayProgramOffset = delayProgramOffset; - engine->anchoredProgramOffset = anchoredProgramOffset; - engine->runtimeImpl = pickRuntimeImpl(*this, bc, outfixEndQueue); - engine->mpvTriggeredByLeaf = anyEndfixMpvTriggers(*this); - - engine->activeArrayCount = activeArrayCount; - engine->activeLeftCount = activeLeftCount; - engine->queueCount = queue_count; - engine->activeQueueArraySize = fatbit_size(queue_count); - engine->eagerIterOffset = eagerIterOffset; - engine->handledKeyCount = prog_build.handledKeys.size(); - engine->handledKeyFatbitSize = fatbit_size(engine->handledKeyCount); - - engine->rolesWithStateCount = bc.numStates; - - engine->leftOffset = leftOffset; - engine->roseCount = verify_u32(leftInfoTable.size()); - engine->lookaroundTableOffset = lookaroundTableOffset; - engine->lookaroundReachOffset = lookaroundReachOffset; - engine->outfixBeginQueue = outfixBeginQueue; - engine->outfixEndQueue = outfixEndQueue; - engine->leftfixBeginQueue = leftfixBeginQueue; - engine->initMpvNfa = mpv_as_outfix ? 0 : MO_INVALID_IDX; - engine->stateSize = mmbit_size(bc.numStates); - engine->anchorStateSize = anchorStateSize; - engine->nfaInfoOffset = nfaInfoOffset; - - engine->eodProgramOffset = eodProgramOffset; - - engine->lastByteHistoryIterOffset = lastByteOffset; - - engine->delay_count = delayProgramCount; - engine->delay_fatbit_size = fatbit_size(engine->delay_count); - engine->anchored_count = anchoredProgramCount; - engine->anchored_fatbit_size = fatbit_size(engine->anchored_count); - - engine->rosePrefixCount = rosePrefixCount; - - engine->activeLeftIterOffset - = activeLeftIter.empty() ? 0 : activeLeftIterOffset; - - // Set scanning mode. - if (!cc.streaming) { - engine->mode = HS_MODE_BLOCK; - } else if (cc.vectored) { - engine->mode = HS_MODE_VECTORED; - } else { - engine->mode = HS_MODE_STREAM; - } + proto.delay_fatbit_size = fatbit_size(proto.delay_count); + proto.anchored_fatbit_size = fatbit_size(proto.anchored_count); // The Small Write matcher is (conditionally) added to the RoseEngine in // another pass by the caller. Set to zero (meaning no SMWR engine) for // now. - engine->smallWriteOffset = 0; + proto.smallWriteOffset = 0; - engine->amatcherOffset = amatcherOffset; - engine->ematcherOffset = ematcherOffset; - engine->sbmatcherOffset = sbmatcherOffset; - engine->fmatcherOffset = fmatcherOffset; - engine->drmatcherOffset = drmatcherOffset; - engine->longLitTableOffset = longLitTableOffset; - engine->amatcherMinWidth = findMinWidth(*this, ROSE_ANCHORED); - engine->fmatcherMinWidth = findMinWidth(*this, ROSE_FLOATING); - engine->eodmatcherMinWidth = findMinWidth(*this, ROSE_EOD_ANCHORED); - engine->amatcherMaxBiAnchoredWidth = findMaxBAWidth(*this, ROSE_ANCHORED); - engine->fmatcherMaxBiAnchoredWidth = findMaxBAWidth(*this, ROSE_FLOATING); - engine->size = currOffset; - engine->minWidth = hasBoundaryReports(boundary) ? 0 : minWidth; - engine->minWidthExcludingBoundaries = minWidth; - engine->floatingMinLiteralMatchOffset = bc.floatingMinLiteralMatchOffset; + proto.amatcherMinWidth = findMinWidth(*this, ROSE_ANCHORED); + proto.fmatcherMinWidth = findMinWidth(*this, ROSE_FLOATING); + proto.eodmatcherMinWidth = findMinWidth(*this, ROSE_EOD_ANCHORED); + proto.amatcherMaxBiAnchoredWidth = findMaxBAWidth(*this, ROSE_ANCHORED); + proto.fmatcherMaxBiAnchoredWidth = findMaxBAWidth(*this, ROSE_FLOATING); + proto.minWidth = hasBoundaryReports(boundary) ? 0 : minWidth; + proto.minWidthExcludingBoundaries = minWidth; + proto.floatingMinLiteralMatchOffset = bc.floatingMinLiteralMatchOffset; - engine->maxBiAnchoredWidth = findMaxBAWidth(*this); - engine->noFloatingRoots = hasNoFloatingRoots(); - engine->requiresEodCheck = hasEodAnchors(*this, bc, outfixEndQueue); - engine->hasOutfixesInSmallBlock = hasNonSmallBlockOutfix(outfixes); - engine->canExhaust = rm.patternSetCanExhaust(); - engine->hasSom = hasSom; + proto.maxBiAnchoredWidth = findMaxBAWidth(*this); + proto.noFloatingRoots = hasNoFloatingRoots(); + proto.requiresEodCheck = hasEodAnchors(*this, bc, proto.outfixEndQueue); + proto.hasOutfixesInSmallBlock = hasNonSmallBlockOutfix(outfixes); + proto.canExhaust = rm.patternSetCanExhaust(); + proto.hasSom = hasSom; /* populate anchoredDistance, floatingDistance, floatingMinDistance, etc */ - fillMatcherDistances(*this, engine.get()); + fillMatcherDistances(*this, &proto); - engine->initialGroups = getInitialGroups(); - engine->floating_group_mask = fgroups; - engine->totalNumLiterals = verify_u32(literal_info.size()); - engine->asize = verify_u32(asize); - engine->ematcherRegionSize = ematcher_region_size; - engine->longLitStreamState = verify_u32(longLitStreamStateRequired); + proto.initialGroups = getInitialGroups(); + proto.floating_group_mask = fgroups; + proto.totalNumLiterals = verify_u32(literal_info.size()); + proto.asize = verify_u32(asize); + proto.ematcherRegionSize = ematcher_region_size; + proto.longLitStreamState = verify_u32(longLitStreamStateRequired); - engine->boundary.reportEodOffset = boundary_out.reportEodOffset; - engine->boundary.reportZeroOffset = boundary_out.reportZeroOffset; - engine->boundary.reportZeroEodOffset = boundary_out.reportZeroEodOffset; + proto.size = currOffset; + + // Time to allocate the real RoseEngine structure. + auto engine = aligned_zmalloc_unique(currOffset); + assert(engine); // will have thrown bad_alloc otherwise. + + // Copy in our prototype engine data. + memcpy(engine.get(), &proto, sizeof(proto)); + + char *ptr = (char *)engine.get(); + assert(ISALIGNED_CL(ptr)); + + if (atable) { + assert(proto.amatcherOffset); + memcpy(ptr + proto.amatcherOffset, atable.get(), asize); + } + if (ftable) { + assert(proto.fmatcherOffset); + memcpy(ptr + proto.fmatcherOffset, ftable.get(), fsize); + } + if (drtable) { + assert(proto.drmatcherOffset); + memcpy(ptr + proto.drmatcherOffset, drtable.get(), drsize); + } + if (etable) { + assert(proto.ematcherOffset); + memcpy(ptr + proto.ematcherOffset, etable.get(), esize); + } + if (sbtable) { + assert(proto.sbmatcherOffset); + memcpy(ptr + proto.sbmatcherOffset, sbtable.get(), sbsize); + } + + copy_bytes(ptr + proto.invDkeyOffset, rm.getDkeyToReportTable()); write_out(&engine->state_init, (char *)engine.get(), state_scatter, state_scatter_aux_offset); - NfaInfo *nfa_infos = (NfaInfo *)(ptr + nfaInfoOffset); + NfaInfo *nfa_infos = (NfaInfo *)(ptr + proto.nfaInfoOffset); populateNfaInfoBasics(*this, bc, outfixes, suffixEkeyLists, no_retrigger_queues, nfa_infos); updateNfaState(bc, &engine->stateOffsets, nfa_infos, @@ -5689,8 +5640,8 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { bc.engine_blob.write_bytes(engine.get()); copy_bytes(ptr + engine->leftOffset, leftInfoTable); - fillLookaroundTables(ptr + lookaroundTableOffset, - ptr + lookaroundReachOffset, bc.lookaround); + fillLookaroundTables(ptr + proto.lookaroundTableOffset, + ptr + proto.lookaroundReachOffset, bc.lookaround); fillInSomRevNfas(engine.get(), ssm, rev_nfa_table_offset, rev_nfa_offsets); copy_bytes(ptr + engine->activeLeftIterOffset, activeLeftIter); diff --git a/src/rose/rose_build_scatter.cpp b/src/rose/rose_build_scatter.cpp index 8d30dd23..87085ae9 100644 --- a/src/rose/rose_build_scatter.cpp +++ b/src/rose/rose_build_scatter.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -63,24 +63,24 @@ void merge_in(scatter_plan_raw *out, const scatter_plan_raw &in) { insert(&out->p_u8, out->p_u8.end(), in.p_u8); } -void buildStateScatterPlan(u32 role_state_offset, u32 role_state_count, - u32 left_array_count, u32 left_prefix_count, - const RoseStateOffsets &stateOffsets, - bool streaming, u32 leaf_array_count, - u32 outfix_begin, u32 outfix_end, - scatter_plan_raw *out) { +scatter_plan_raw buildStateScatterPlan(u32 role_state_offset, + u32 role_state_count, u32 left_array_count, u32 left_prefix_count, + const RoseStateOffsets &stateOffsets, bool streaming, + u32 leaf_array_count, u32 outfix_begin, u32 outfix_end) { + scatter_plan_raw out; + /* init role array */ scatter_plan_raw spr_role; mmbBuildClearPlan(role_state_count, &spr_role); rebase(&spr_role, role_state_offset); - merge_in(out, spr_role); + merge_in(&out, spr_role); /* init rose array: turn on prefixes */ u32 rose_array_offset = stateOffsets.activeLeftArray; scatter_plan_raw spr_rose; mmbBuildInitRangePlan(left_array_count, 0, left_prefix_count, &spr_rose); rebase(&spr_rose, rose_array_offset); - merge_in(out, spr_rose); + merge_in(&out, spr_rose); /* suffix/outfix array */ scatter_plan_raw spr_leaf; @@ -91,7 +91,9 @@ void buildStateScatterPlan(u32 role_state_offset, u32 role_state_count, mmbBuildClearPlan(leaf_array_count, &spr_leaf); } rebase(&spr_leaf, stateOffsets.activeLeafArray); - merge_in(out, spr_leaf); + merge_in(&out, spr_leaf); + + return out; } u32 aux_size(const scatter_plan_raw &raw) { diff --git a/src/rose/rose_build_scatter.h b/src/rose/rose_build_scatter.h index a159fe4e..67a82b99 100644 --- a/src/rose/rose_build_scatter.h +++ b/src/rose/rose_build_scatter.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -45,12 +45,10 @@ struct scatter_plan_raw { std::vector p_u8; }; -void buildStateScatterPlan(u32 role_state_offset, u32 role_state_count, - u32 left_array_count, u32 left_prefix_count, - const RoseStateOffsets &stateOffsets, - bool streaming, u32 leaf_array_count, - u32 outfix_begin, u32 outfix_end, - scatter_plan_raw *out); +scatter_plan_raw buildStateScatterPlan(u32 role_state_offset, + u32 role_state_count, u32 left_array_count, u32 left_prefix_count, + const RoseStateOffsets &stateOffsets, bool streaming, + u32 leaf_array_count, u32 outfix_begin, u32 outfix_end); u32 aux_size(const scatter_plan_raw &raw); From 282f72e04d83aa86178b94b95b473b65b37b77ac Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 27 Feb 2017 16:33:43 +1100 Subject: [PATCH 111/326] rose: move SOM reverse NFAs to engine_blob --- src/rose/rose_build_bytecode.cpp | 68 ++++++++------------------------ 1 file changed, 16 insertions(+), 52 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index bb8cb8a4..18df4b18 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2457,57 +2457,24 @@ struct DerivedBoundaryReports { }; static -void prepSomRevNfas(const SomSlotManager &ssm, u32 *rev_nfa_table_offset, - vector *nfa_offsets, u32 *currOffset) { - const deque> &nfas = ssm.getRevNfas(); - - *currOffset = ROUNDUP_N(*currOffset, alignof(u32)); - *rev_nfa_table_offset = *currOffset; - *currOffset += sizeof(u32) * nfas.size(); - - *currOffset = ROUNDUP_CL(*currOffset); - for (const auto &n : nfas) { - u32 bs_offset; - bs_offset = *currOffset; - nfa_offsets->push_back(bs_offset); - *currOffset += ROUNDUP_CL(n->length); +void addSomRevNfas(build_context &bc, RoseEngine &proto, + const SomSlotManager &ssm) { + const auto &nfas = ssm.getRevNfas(); + vector nfa_offsets; + nfa_offsets.reserve(nfas.size()); + for (const auto &nfa : nfas) { + assert(nfa); + u32 offset = bc.engine_blob.add(*nfa, nfa->length); + DEBUG_PRINTF("wrote SOM rev NFA %zu (len %u) to offset %u\n", + nfa_offsets.size(), nfa->length, offset); + nfa_offsets.push_back(offset); /* note: som rev nfas don't need a queue assigned as only run in block * mode reverse */ } - assert(nfa_offsets->size() == nfas.size()); -} - -static -void fillInSomRevNfas(RoseEngine *engine, const SomSlotManager &ssm, - u32 rev_nfa_table_offset, - const vector &nfa_offsets) { - const deque> &nfas = ssm.getRevNfas(); - assert(nfa_offsets.size() == nfas.size()); - - engine->somRevCount = (u32)nfas.size(); - engine->somRevOffsetOffset = rev_nfa_table_offset; - - if (nfas.empty()) { - return; - } - - char *out = (char *)engine + rev_nfa_table_offset; - size_t table_size = sizeof(u32) * nfa_offsets.size(); - memcpy(out, nfa_offsets.data(), table_size); - out = (char *)engine + ROUNDUP_CL(rev_nfa_table_offset + table_size); - - // Write the SOM reverse NFAs into place. - UNUSED size_t i = 0; - for (const auto &n : nfas) { - assert(n != nullptr); - assert(out == (char *)engine + nfa_offsets[i]); - - memcpy(out, n.get(), n->length); - out += ROUNDUP_CL(n->length); - DEBUG_PRINTF("wrote som rev nfa with len %u\n", n->length); - ++i; - } + proto.somRevCount = verify_u32(nfas.size()); + proto.somRevOffsetOffset = + bc.engine_blob.add(begin(nfa_offsets), end(nfa_offsets)); } static @@ -5408,6 +5375,8 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { proto.eagerIterOffset = buildEagerQueueIter( eager_queues, proto.leftfixBeginQueue, queue_count, bc); + addSomRevNfas(bc, proto, ssm); + // Enforce role table resource limit. if (num_vertices(g) > cc.grey.limitRoseRoleCount) { throw ResourceLimitError(); @@ -5499,10 +5468,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { proto.activeLeftCount = verify_u32(leftInfoTable.size()); proto.rosePrefixCount = countRosePrefixes(leftInfoTable); - u32 rev_nfa_table_offset; - vector rev_nfa_offsets; - prepSomRevNfas(ssm, &rev_nfa_table_offset, &rev_nfa_offsets, &currOffset); - proto.anchorStateSize = atable ? anchoredStateSize(*atable) : 0; DEBUG_PRINTF("rose history required %zu\n", historyRequired); @@ -5643,7 +5608,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { fillLookaroundTables(ptr + proto.lookaroundTableOffset, ptr + proto.lookaroundReachOffset, bc.lookaround); - fillInSomRevNfas(engine.get(), ssm, rev_nfa_table_offset, rev_nfa_offsets); copy_bytes(ptr + engine->activeLeftIterOffset, activeLeftIter); // Safety check: we shouldn't have written anything to the engine blob From b6254ca11f7fb5c5cd77a9ad4e084678a08b86af Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 27 Feb 2017 16:55:02 +1100 Subject: [PATCH 112/326] rose: move active leftfix iter to engine blob --- src/rose/rose_build_bytecode.cpp | 38 ++++++++++++-------------------- 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 18df4b18..12b51757 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2601,26 +2601,25 @@ u32 writeProgram(build_context &bc, RoseProgram &&program) { } static -vector -buildActiveLeftIter(const vector &leftTable) { - vector out; - +u32 writeActiveLeftIter(build_context &bc, + const vector &leftInfoTable) { vector keys; - for (size_t i = 0; i < leftTable.size(); i++) { - if (!leftTable[i].transient) { - DEBUG_PRINTF("rose %zu is active\n", i); + for (size_t i = 0; i < leftInfoTable.size(); i++) { + if (!leftInfoTable[i].transient) { + DEBUG_PRINTF("leftfix %zu is active\n", i); keys.push_back(verify_u32(i)); } } - DEBUG_PRINTF("%zu active roses\n", keys.size()); + DEBUG_PRINTF("%zu active leftfixes\n", keys.size()); if (keys.empty()) { - return out; + return 0; } - mmbBuildSparseIterator(out, keys, leftTable.size()); - return out; + vector iter; + mmbBuildSparseIterator(iter, keys, verify_u32(leftInfoTable.size())); + return bc.engine_blob.add_iterator(iter); } static @@ -5149,9 +5148,8 @@ void fillMatcherDistances(const RoseBuildImpl &build, RoseEngine *engine) { } static -u32 buildEagerQueueIter(const set &eager, u32 leftfixBeginQueue, - u32 queue_count, - build_context &bc) { +u32 writeEagerQueueIter(const set &eager, u32 leftfixBeginQueue, + u32 queue_count, build_context &bc) { if (eager.empty()) { return 0; } @@ -5372,8 +5370,9 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { &longLitStreamStateRequired); proto.lastByteHistoryIterOffset = buildLastByteIter(g, bc); - proto.eagerIterOffset = buildEagerQueueIter( + proto.eagerIterOffset = writeEagerQueueIter( eager_queues, proto.leftfixBeginQueue, queue_count, bc); + proto.activeLeftIterOffset = writeActiveLeftIter(bc, leftInfoTable); addSomRevNfas(bc, proto, ssm); @@ -5457,13 +5456,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { proto.nfaInfoOffset = currOffset; currOffset += sizeof(NfaInfo) * queue_count; - auto activeLeftIter = buildActiveLeftIter(leftInfoTable); - if (!activeLeftIter.empty()) { - currOffset = ROUNDUP_N(currOffset, alignof(mmbit_sparse_iter)); - proto.activeLeftIterOffset = currOffset; - currOffset += activeLeftIter.size() * sizeof(mmbit_sparse_iter); - } - proto.activeArrayCount = proto.leftfixBeginQueue; proto.activeLeftCount = verify_u32(leftInfoTable.size()); proto.rosePrefixCount = countRosePrefixes(leftInfoTable); @@ -5608,8 +5600,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { fillLookaroundTables(ptr + proto.lookaroundTableOffset, ptr + proto.lookaroundReachOffset, bc.lookaround); - copy_bytes(ptr + engine->activeLeftIterOffset, activeLeftIter); - // Safety check: we shouldn't have written anything to the engine blob // after we copied it into the engine bytecode. assert(bc.engine_blob.size() == engineBlobSize); From e3d2d678330b4954b126d8b3d88087b1eb52ac49 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 28 Feb 2017 11:18:23 +1100 Subject: [PATCH 113/326] rose: move lookaround tables to engine blob --- src/rose/rose_build_bytecode.cpp | 26 +++++++++++++------------- src/rose/rose_build_dump.cpp | 4 ---- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 12b51757..05e46ca8 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2683,13 +2683,15 @@ bool hasEodAnchors(const RoseBuildImpl &build, const build_context &bc, } static -void fillLookaroundTables(char *look_base, char *reach_base, - const vector &look_vec) { +void writeLookaroundTables(build_context &bc, RoseEngine &proto) { + const auto &look_vec = bc.lookaround; DEBUG_PRINTF("%zu lookaround table entries\n", look_vec.size()); - s8 *look = (s8 *)look_base; - u8 *reach = (u8 *)reach_base; // base for 256-bit bitvectors + vector look_table(look_vec.size(), 0); + vector reach_table(REACH_BITVECTOR_LEN * look_vec.size(), 0); + s8 *look = look_table.data(); + u8 *reach = reach_table.data(); for (const auto &le : look_vec) { *look = verify_s8(le.offset); const CharReach &cr = le.reach; @@ -2700,6 +2702,11 @@ void fillLookaroundTables(char *look_base, char *reach_base, ++look; reach += REACH_BITVECTOR_LEN; } + + proto.lookaroundTableOffset = + bc.engine_blob.add(begin(look_table), end(look_table)); + proto.lookaroundReachOffset = + bc.engine_blob.add(begin(reach_table), end(reach_table)); } static @@ -5376,6 +5383,8 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { addSomRevNfas(bc, proto, ssm); + writeLookaroundTables(bc, proto); + // Enforce role table resource limit. if (num_vertices(g) > cc.grey.limitRoseRoleCount) { throw ResourceLimitError(); @@ -5446,12 +5455,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { proto.leftOffset = currOffset; currOffset += sizeof(LeftNfaInfo) * leftInfoTable.size(); - proto.lookaroundReachOffset = currOffset; - currOffset += REACH_BITVECTOR_LEN * bc.lookaround.size(); - - proto.lookaroundTableOffset = currOffset; - currOffset += sizeof(s8) * bc.lookaround.size(); - currOffset = ROUNDUP_N(currOffset, sizeof(u32)); proto.nfaInfoOffset = currOffset; currOffset += sizeof(NfaInfo) * queue_count; @@ -5597,9 +5600,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { bc.engine_blob.write_bytes(engine.get()); copy_bytes(ptr + engine->leftOffset, leftInfoTable); - fillLookaroundTables(ptr + proto.lookaroundTableOffset, - ptr + proto.lookaroundReachOffset, bc.lookaround); - // Safety check: we shouldn't have written anything to the engine blob // after we copied it into the engine bytecode. assert(bc.engine_blob.size() == engineBlobSize); diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 2f882e68..a13fc964 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -1694,10 +1694,6 @@ void roseDumpText(const RoseEngine *t, FILE *f) { t->rolesWithStateCount * sizeof(u32)); fprintf(f, " - nfa info table : %zu bytes\n", t->queueCount * sizeof(NfaInfo)); - fprintf(f, " - lookaround table : %u bytes\n", - t->nfaInfoOffset - t->lookaroundTableOffset); - fprintf(f, " - lookaround reach : %u bytes\n", - t->lookaroundTableOffset - t->lookaroundReachOffset); fprintf(f, "state space required : %u bytes\n", t->stateOffsets.end); fprintf(f, " - history buffer : %u bytes\n", t->historyRequired); From 395d6ae650f4e827d0494144494de08bf17f9fa0 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 28 Feb 2017 13:46:07 +1100 Subject: [PATCH 114/326] rose: move dkey info to engine blob --- src/rose/rose_build_bytecode.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 05e46ca8..871d78c0 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2709,6 +2709,15 @@ void writeLookaroundTables(build_context &bc, RoseEngine &proto) { bc.engine_blob.add(begin(reach_table), end(reach_table)); } +static +void writeDkeyInfo(const ReportManager &rm, build_context &bc, + RoseEngine &proto) { + const auto inv_dkeys = rm.getDkeyToReportTable(); + proto.invDkeyOffset = bc.engine_blob.add(begin(inv_dkeys), end(inv_dkeys)); + proto.dkeyCount = rm.numDkeys(); + proto.dkeyLogSize = fatbit_size(proto.dkeyCount); +} + static bool hasBoundaryReports(const BoundaryReports &boundary) { if (!boundary.report_at_0.empty()) { @@ -5384,6 +5393,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { addSomRevNfas(bc, proto, ssm); writeLookaroundTables(bc, proto); + writeDkeyInfo(rm, bc, proto); // Enforce role table resource limit. if (num_vertices(g) > cc.grey.limitRoseRoleCount) { @@ -5488,14 +5498,8 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 state_scatter_aux_offset = currOffset; currOffset += aux_size(state_scatter); - currOffset = ROUNDUP_N(currOffset, alignof(ReportID)); - proto.invDkeyOffset = currOffset; - currOffset += rm.numDkeys() * sizeof(ReportID); - proto.historyRequired = verify_u32(historyRequired); proto.ekeyCount = rm.numEkeys(); - proto.dkeyCount = rm.numDkeys(); - proto.dkeyLogSize = fatbit_size(proto.dkeyCount); proto.somHorizon = ssm.somPrecision(); proto.somLocationCount = ssm.numSomSlots(); @@ -5584,8 +5588,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { memcpy(ptr + proto.sbmatcherOffset, sbtable.get(), sbsize); } - copy_bytes(ptr + proto.invDkeyOffset, rm.getDkeyToReportTable()); - write_out(&engine->state_init, (char *)engine.get(), state_scatter, state_scatter_aux_offset); From c619621573805985411668397e58c8e3f00f7709 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 28 Feb 2017 13:55:46 +1100 Subject: [PATCH 115/326] rose: move leftfix info into engine blob, refactor --- src/rose/rose_build_bytecode.cpp | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 871d78c0..330f4e46 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2718,6 +2718,18 @@ void writeDkeyInfo(const ReportManager &rm, build_context &bc, proto.dkeyLogSize = fatbit_size(proto.dkeyCount); } +static +void writeLeftInfo(build_context &bc, RoseEngine &proto, + const vector &leftInfoTable) { + proto.leftOffset = + bc.engine_blob.add(begin(leftInfoTable), end(leftInfoTable)); + proto.activeLeftIterOffset = writeActiveLeftIter(bc, leftInfoTable); + proto.roseCount = verify_u32(leftInfoTable.size()); + proto.activeLeftCount = verify_u32(leftInfoTable.size()); + proto.rosePrefixCount = countRosePrefixes(leftInfoTable); + +} + static bool hasBoundaryReports(const BoundaryReports &boundary) { if (!boundary.report_at_0.empty()) { @@ -5388,12 +5400,12 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { proto.lastByteHistoryIterOffset = buildLastByteIter(g, bc); proto.eagerIterOffset = writeEagerQueueIter( eager_queues, proto.leftfixBeginQueue, queue_count, bc); - proto.activeLeftIterOffset = writeActiveLeftIter(bc, leftInfoTable); addSomRevNfas(bc, proto, ssm); writeLookaroundTables(bc, proto); writeDkeyInfo(rm, bc, proto); + writeLeftInfo(bc, proto, leftInfoTable); // Enforce role table resource limit. if (num_vertices(g) > cc.grey.limitRoseRoleCount) { @@ -5461,17 +5473,11 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { currOffset += verify_u32(sbsize); } - currOffset = ROUNDUP_N(currOffset, alignof(LeftNfaInfo)); - proto.leftOffset = currOffset; - currOffset += sizeof(LeftNfaInfo) * leftInfoTable.size(); - currOffset = ROUNDUP_N(currOffset, sizeof(u32)); proto.nfaInfoOffset = currOffset; currOffset += sizeof(NfaInfo) * queue_count; proto.activeArrayCount = proto.leftfixBeginQueue; - proto.activeLeftCount = verify_u32(leftInfoTable.size()); - proto.rosePrefixCount = countRosePrefixes(leftInfoTable); proto.anchorStateSize = atable ? anchoredStateSize(*atable) : 0; @@ -5517,7 +5523,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { proto.rolesWithStateCount = bc.numStates; - proto.roseCount = verify_u32(leftInfoTable.size()); proto.initMpvNfa = mpv_as_outfix ? 0 : MO_INVALID_IDX; proto.stateSize = mmbit_size(bc.numStates); @@ -5600,7 +5605,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Copy in other tables bc.engine_blob.write_bytes(engine.get()); - copy_bytes(ptr + engine->leftOffset, leftInfoTable); // Safety check: we shouldn't have written anything to the engine blob // after we copied it into the engine bytecode. From a0b0247e477f949c692aea1c99571cce415fc115 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 28 Feb 2017 14:39:37 +1100 Subject: [PATCH 116/326] rose: move NfaInfo to engine blob --- src/rose/rose_build_bytecode.cpp | 93 ++++++++++++++++---------------- 1 file changed, 46 insertions(+), 47 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 330f4e46..3afbb620 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2411,42 +2411,6 @@ bool anyEndfixMpvTriggers(const RoseBuildImpl &tbi) { return false; } -static -void populateNfaInfoBasics(const RoseBuildImpl &build, const build_context &bc, - const vector &outfixes, - const vector &ekeyListOffsets, - const set &no_retrigger_queues, - NfaInfo *infos) { - const u32 num_queues = build.qif.allocated_count(); - for (u32 qi = 0; qi < num_queues; qi++) { - const NFA *n = get_nfa_from_blob(bc, qi); - enforceEngineSizeLimit(n, n->length, build.cc.grey); - - NfaInfo &info = infos[qi]; - info.nfaOffset = bc.engineOffsets.at(qi); - info.ekeyListOffset = ekeyListOffsets[qi]; - info.no_retrigger = contains(no_retrigger_queues, qi) ? 1 : 0; - } - - // Mark outfixes that are in the small block matcher. - for (const auto &out : outfixes) { - const u32 qi = out.get_queue(); - infos[qi].in_sbmatcher = out.in_sbmatcher; - } - - // Mark suffixes triggered by EOD table literals. - const RoseGraph &g = build.g; - for (auto v : vertices_range(g)) { - if (!g[v].suffix) { - continue; - } - u32 qi = bc.suffixes.at(g[v].suffix); - if (build.isInETable(v)) { - infos[qi].eod = 1; - } - } -} - struct DerivedBoundaryReports { explicit DerivedBoundaryReports(const BoundaryReports &boundary) { insert(&report_at_0_eod_full, boundary.report_at_0_eod); @@ -2727,7 +2691,49 @@ void writeLeftInfo(build_context &bc, RoseEngine &proto, proto.roseCount = verify_u32(leftInfoTable.size()); proto.activeLeftCount = verify_u32(leftInfoTable.size()); proto.rosePrefixCount = countRosePrefixes(leftInfoTable); +} +static +void writeNfaInfo(const RoseBuildImpl &build, build_context &bc, + RoseEngine &proto, const set &no_retrigger_queues) { + auto ekey_lists = buildSuffixEkeyLists(build, bc, build.qif); + + const u32 queue_count = build.qif.allocated_count(); + vector infos(queue_count); + memset(infos.data(), 0, sizeof(NfaInfo) * queue_count); + + for (u32 qi = 0; qi < queue_count; qi++) { + const NFA *n = get_nfa_from_blob(bc, qi); + enforceEngineSizeLimit(n, n->length, build.cc.grey); + + NfaInfo &info = infos[qi]; + info.nfaOffset = bc.engineOffsets.at(qi); + assert(qi < ekey_lists.size()); + info.ekeyListOffset = ekey_lists.at(qi); + info.no_retrigger = contains(no_retrigger_queues, qi) ? 1 : 0; + } + + // Mark outfixes that are in the small block matcher. + for (const auto &out : build.outfixes) { + const u32 qi = out.get_queue(); + assert(qi < infos.size()); + infos.at(qi).in_sbmatcher = out.in_sbmatcher; + } + + // Mark suffixes triggered by EOD table literals. + const RoseGraph &g = build.g; + for (auto v : vertices_range(g)) { + if (!g[v].suffix) { + continue; + } + u32 qi = bc.suffixes.at(g[v].suffix); + assert(qi < infos.size()); + if (build.isInETable(v)) { + infos.at(qi).eod = 1; + } + } + + proto.nfaInfoOffset = bc.engine_blob.add(begin(infos), end(infos)); } static @@ -5366,8 +5372,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { throw ResourceLimitError(); } - auto suffixEkeyLists = buildSuffixEkeyLists(*this, bc, qif); - assignStateIndices(*this, bc); u32 laggedRoseCount = 0; @@ -5405,6 +5409,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { writeLookaroundTables(bc, proto); writeDkeyInfo(rm, bc, proto); + writeNfaInfo(*this, bc, proto, no_retrigger_queues); writeLeftInfo(bc, proto, leftInfoTable); // Enforce role table resource limit. @@ -5473,10 +5478,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { currOffset += verify_u32(sbsize); } - currOffset = ROUNDUP_N(currOffset, sizeof(u32)); - proto.nfaInfoOffset = currOffset; - currOffset += sizeof(NfaInfo) * queue_count; - proto.activeArrayCount = proto.leftfixBeginQueue; proto.anchorStateSize = atable ? anchoredStateSize(*atable) : 0; @@ -5596,16 +5597,14 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { write_out(&engine->state_init, (char *)engine.get(), state_scatter, state_scatter_aux_offset); + // Copy in the engine blob. + bc.engine_blob.write_bytes(engine.get()); + NfaInfo *nfa_infos = (NfaInfo *)(ptr + proto.nfaInfoOffset); - populateNfaInfoBasics(*this, bc, outfixes, suffixEkeyLists, - no_retrigger_queues, nfa_infos); updateNfaState(bc, &engine->stateOffsets, nfa_infos, &engine->scratchStateSize, &engine->nfaStateSize, &engine->tStateSize); - // Copy in other tables - bc.engine_blob.write_bytes(engine.get()); - // Safety check: we shouldn't have written anything to the engine blob // after we copied it into the engine bytecode. assert(bc.engine_blob.size() == engineBlobSize); From 246f9f4f86f41de57843b6806bb001212c7ee93e Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 28 Feb 2017 17:03:59 +1100 Subject: [PATCH 117/326] rose: update nfa info earlier, in engine blob --- src/rose/rose_build_bytecode.cpp | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 3afbb620..979098e4 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2150,9 +2150,19 @@ void findTransientQueues(const map &leftfix_info, } static -void updateNfaState(const build_context &bc, RoseStateOffsets *so, - NfaInfo *nfa_infos, u32 *fullStateSize, u32 *nfaStateSize, +void updateNfaState(const build_context &bc, RoseEngine &proto, + RoseStateOffsets *so, u32 *fullStateSize, u32 *nfaStateSize, u32 *tStateSize) { + if (!proto.nfaInfoOffset) { + assert(bc.engineOffsets.empty()); + return; + } + + // Our array of NfaInfo structures is in the engine blob. + NfaInfo *nfa_infos = (NfaInfo *)(bc.engine_blob.data() + + proto.nfaInfoOffset - + bc.engine_blob.base_offset); + *nfaStateSize = 0; *tStateSize = 0; *fullStateSize = 0; @@ -5496,6 +5506,12 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { laggedRoseCount, longLitStreamStateRequired, historyRequired, &proto.stateOffsets); + // Update state offsets to do with NFAs in proto and in the NfaInfo + // structures. + updateNfaState(bc, proto, &proto.stateOffsets, + &proto.scratchStateSize, &proto.nfaStateSize, + &proto.tStateSize); + scatter_plan_raw state_scatter = buildStateScatterPlan( sizeof(u8), bc.numStates, proto.activeLeftCount, proto.rosePrefixCount, proto.stateOffsets, cc.streaming, proto.activeArrayCount, @@ -5600,11 +5616,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Copy in the engine blob. bc.engine_blob.write_bytes(engine.get()); - NfaInfo *nfa_infos = (NfaInfo *)(ptr + proto.nfaInfoOffset); - updateNfaState(bc, &engine->stateOffsets, nfa_infos, - &engine->scratchStateSize, &engine->nfaStateSize, - &engine->tStateSize); - // Safety check: we shouldn't have written anything to the engine blob // after we copied it into the engine bytecode. assert(bc.engine_blob.size() == engineBlobSize); From 10aa806d677f92b5725291a477691458f88daa9e Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 28 Feb 2017 17:13:47 +1100 Subject: [PATCH 118/326] rose: clean up nfa state alloc --- src/rose/rose_build_bytecode.cpp | 43 ++++++++++++++++---------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 979098e4..c5d97fdc 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2105,16 +2105,14 @@ bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif, } static -void allocateStateSpace(const NFA *nfa, const set &transient_queues, - RoseStateOffsets *so, NfaInfo *nfa_infos, - u32 *currFullStateSize, u32 *maskStateSize, - u32 *tStateSize) { - u32 qi = nfa->queueIndex; - bool transient = transient_queues.find(qi) != transient_queues.end(); - u32 stateSize = verify_u32(nfa->streamStateSize); +void allocateStateSpace(const NFA *nfa, NfaInfo *nfa_info, bool is_transient, + RoseStateOffsets *so, u32 *currFullStateSize, + u32 *maskStateSize, u32 *tStateSize) { + const u32 stateSize = nfa->streamStateSize; + const u32 scratchStateSize = nfa->scratchStateSize; u32 state_offset; - if (transient) { + if (is_transient) { state_offset = *tStateSize; *tStateSize += stateSize; } else { @@ -2124,29 +2122,30 @@ void allocateStateSpace(const NFA *nfa, const set &transient_queues, *maskStateSize += stateSize; } - nfa_infos[qi].stateOffset = state_offset; + nfa_info->stateOffset = state_offset; // Uncompressed state must be aligned. - u32 scratchStateSize = verify_u32(nfa->scratchStateSize); u32 alignReq = state_alignment(*nfa); assert(alignReq); while (*currFullStateSize % alignReq) { (*currFullStateSize)++; } - nfa_infos[qi].fullStateOffset = *currFullStateSize; + nfa_info->fullStateOffset = *currFullStateSize; *currFullStateSize += scratchStateSize; } static -void findTransientQueues(const map &leftfix_info, - set *out) { +set +findTransientQueues(const map &leftfix_info) { DEBUG_PRINTF("curating transient queues\n"); - for (const auto &build : leftfix_info | map_values) { - if (build.transient) { - DEBUG_PRINTF("q %u is transient\n", build.queue); - out->insert(build.queue); + set out; + for (const auto &left : leftfix_info | map_values) { + if (left.transient) { + DEBUG_PRINTF("q %u is transient\n", left.queue); + out.insert(left.queue); } } + return out; } static @@ -2167,12 +2166,14 @@ void updateNfaState(const build_context &bc, RoseEngine &proto, *tStateSize = 0; *fullStateSize = 0; - set transient_queues; - findTransientQueues(bc.leftfix_info, &transient_queues); + auto transient_queues = findTransientQueues(bc.leftfix_info); for (const auto &m : bc.engineOffsets) { - const NFA *n = get_nfa_from_blob(bc, m.first); - allocateStateSpace(n, transient_queues, so, nfa_infos, fullStateSize, + const NFA *nfa = get_nfa_from_blob(bc, m.first); + u32 qi = nfa->queueIndex; + bool is_transient = contains(transient_queues, qi); + NfaInfo *nfa_info = &nfa_infos[qi]; + allocateStateSpace(nfa, nfa_info, is_transient, so, fullStateSize, nfaStateSize, tStateSize); } } From 6013fb154665113ecf0e059945f2af9574d76a65 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 1 Mar 2017 11:28:14 +1100 Subject: [PATCH 119/326] engine_blob: add_range() member function --- src/rose/rose_build_bytecode.cpp | 29 +++++++++++------------------ src/rose/rose_build_engine_blob.h | 7 ++++++- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index c5d97fdc..0502da55 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2352,7 +2352,7 @@ vector buildSuffixEkeyLists(const RoseBuildImpl &build, build_context &bc, auto &ekeys = e.second; assert(!ekeys.empty()); ekeys.push_back(INVALID_EKEY); /* terminator */ - out[qi] = bc.engine_blob.add(ekeys.begin(), ekeys.end()); + out[qi] = bc.engine_blob.add_range(ekeys); } return out; @@ -2448,8 +2448,7 @@ void addSomRevNfas(build_context &bc, RoseEngine &proto, } proto.somRevCount = verify_u32(nfas.size()); - proto.somRevOffsetOffset = - bc.engine_blob.add(begin(nfa_offsets), end(nfa_offsets)); + proto.somRevOffsetOffset = bc.engine_blob.add_range(nfa_offsets); } static @@ -2678,17 +2677,15 @@ void writeLookaroundTables(build_context &bc, RoseEngine &proto) { reach += REACH_BITVECTOR_LEN; } - proto.lookaroundTableOffset = - bc.engine_blob.add(begin(look_table), end(look_table)); - proto.lookaroundReachOffset = - bc.engine_blob.add(begin(reach_table), end(reach_table)); + proto.lookaroundTableOffset = bc.engine_blob.add_range(look_table); + proto.lookaroundReachOffset = bc.engine_blob.add_range(reach_table); } static void writeDkeyInfo(const ReportManager &rm, build_context &bc, RoseEngine &proto) { const auto inv_dkeys = rm.getDkeyToReportTable(); - proto.invDkeyOffset = bc.engine_blob.add(begin(inv_dkeys), end(inv_dkeys)); + proto.invDkeyOffset = bc.engine_blob.add_range(inv_dkeys); proto.dkeyCount = rm.numDkeys(); proto.dkeyLogSize = fatbit_size(proto.dkeyCount); } @@ -2696,8 +2693,7 @@ void writeDkeyInfo(const ReportManager &rm, build_context &bc, static void writeLeftInfo(build_context &bc, RoseEngine &proto, const vector &leftInfoTable) { - proto.leftOffset = - bc.engine_blob.add(begin(leftInfoTable), end(leftInfoTable)); + proto.leftOffset = bc.engine_blob.add_range(leftInfoTable); proto.activeLeftIterOffset = writeActiveLeftIter(bc, leftInfoTable); proto.roseCount = verify_u32(leftInfoTable.size()); proto.activeLeftCount = verify_u32(leftInfoTable.size()); @@ -2744,7 +2740,7 @@ void writeNfaInfo(const RoseBuildImpl &build, build_context &bc, } } - proto.nfaInfoOffset = bc.engine_blob.add(begin(infos), end(infos)); + proto.nfaInfoOffset = bc.engine_blob.add_range(infos); } static @@ -3994,8 +3990,7 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, if (hasUsefulStops(lbi)) { assert(lbi.stopAlphabet.size() == N_CHARS); - left.stopTable = bc.engine_blob.add(lbi.stopAlphabet.begin(), - lbi.stopAlphabet.end()); + left.stopTable = bc.engine_blob.add_range(lbi.stopAlphabet); } assert(lbi.countingMiracleOffset || !lbi.countingMiracleCount); @@ -4791,8 +4786,7 @@ pair writeDelayPrograms(RoseBuildImpl &build, build_context &bc, } DEBUG_PRINTF("%zu delay programs\n", programs.size()); - return {bc.engine_blob.add(begin(programs), end(programs)), - verify_u32(programs.size())}; + return {bc.engine_blob.add_range(programs), verify_u32(programs.size())}; } /** @@ -4850,8 +4844,7 @@ pair writeAnchoredPrograms(RoseBuildImpl &build, build_context &bc, } DEBUG_PRINTF("%zu anchored programs\n", programs.size()); - return {bc.engine_blob.add(begin(programs), end(programs)), - verify_u32(programs.size())}; + return {bc.engine_blob.add_range(programs), verify_u32(programs.size())}; } /** @@ -4900,7 +4893,7 @@ pair buildReportPrograms(RoseBuildImpl &build, build_context &bc) { programs.back(), program.size()); } - u32 offset = bc.engine_blob.add(begin(programs), end(programs)); + u32 offset = bc.engine_blob.add_range(programs); u32 count = verify_u32(programs.size()); return {offset, count}; } diff --git a/src/rose/rose_build_engine_blob.h b/src/rose/rose_build_engine_blob.h index 8542b87b..9298c37f 100644 --- a/src/rose/rose_build_engine_blob.h +++ b/src/rose/rose_build_engine_blob.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -106,6 +106,11 @@ public: return offset; } + template + u32 add_range(const Range &range) { + return add(begin(range), end(range)); + } + u32 add_iterator(const std::vector &iter) { auto cache_it = cached_iters.find(iter); if (cache_it != cached_iters.end()) { From b2aae060d831ca50d243cc3d3ee9808f8224392b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 2 Mar 2017 09:18:28 +1100 Subject: [PATCH 120/326] rose: consistent naming in updateNfaState --- src/rose/rose_build_bytecode.cpp | 43 ++++++++++++++++---------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 0502da55..a40e54e8 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2106,32 +2106,31 @@ bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif, static void allocateStateSpace(const NFA *nfa, NfaInfo *nfa_info, bool is_transient, - RoseStateOffsets *so, u32 *currFullStateSize, - u32 *maskStateSize, u32 *tStateSize) { - const u32 stateSize = nfa->streamStateSize; - const u32 scratchStateSize = nfa->scratchStateSize; - + RoseStateOffsets *so, u32 *scratchStateSize, + u32 *streamStateSize, u32 *transientStateSize) { u32 state_offset; if (is_transient) { - state_offset = *tStateSize; - *tStateSize += stateSize; + // Transient engines do not use stream state, but must have room in + // transient state (stored in scratch). + state_offset = *transientStateSize; + *transientStateSize += nfa->streamStateSize; } else { - // Pack NFA state on to the end of the Rose state. + // Pack NFA stream state on to the end of the Rose stream state. state_offset = so->end; - so->end += stateSize; - *maskStateSize += stateSize; + so->end += nfa->streamStateSize; + *streamStateSize += nfa->streamStateSize; } nfa_info->stateOffset = state_offset; - // Uncompressed state must be aligned. + // Uncompressed state in scratch must be aligned. u32 alignReq = state_alignment(*nfa); assert(alignReq); - while (*currFullStateSize % alignReq) { - (*currFullStateSize)++; + while (*scratchStateSize % alignReq) { + (*scratchStateSize)++; } - nfa_info->fullStateOffset = *currFullStateSize; - *currFullStateSize += scratchStateSize; + nfa_info->fullStateOffset = *scratchStateSize; + *scratchStateSize += nfa->scratchStateSize; } static @@ -2150,8 +2149,8 @@ findTransientQueues(const map &leftfix_info) { static void updateNfaState(const build_context &bc, RoseEngine &proto, - RoseStateOffsets *so, u32 *fullStateSize, u32 *nfaStateSize, - u32 *tStateSize) { + RoseStateOffsets *so, u32 *scratchStateSize, + u32 *streamStateSize, u32 *transientStateSize) { if (!proto.nfaInfoOffset) { assert(bc.engineOffsets.empty()); return; @@ -2162,9 +2161,9 @@ void updateNfaState(const build_context &bc, RoseEngine &proto, proto.nfaInfoOffset - bc.engine_blob.base_offset); - *nfaStateSize = 0; - *tStateSize = 0; - *fullStateSize = 0; + *streamStateSize = 0; + *transientStateSize = 0; + *scratchStateSize = 0; auto transient_queues = findTransientQueues(bc.leftfix_info); @@ -2173,8 +2172,8 @@ void updateNfaState(const build_context &bc, RoseEngine &proto, u32 qi = nfa->queueIndex; bool is_transient = contains(transient_queues, qi); NfaInfo *nfa_info = &nfa_infos[qi]; - allocateStateSpace(nfa, nfa_info, is_transient, so, fullStateSize, - nfaStateSize, tStateSize); + allocateStateSpace(nfa, nfa_info, is_transient, so, scratchStateSize, + streamStateSize, transientStateSize); } } From 96be1190efa4938a363a8122975ac0daaf5f75d4 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 2 Mar 2017 09:39:23 +1100 Subject: [PATCH 121/326] rose: move matcher bytecode to engine blob --- src/rose/rose_build_bytecode.cpp | 68 ++++++++------------------------ 1 file changed, 17 insertions(+), 51 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index a40e54e8..e6e4976d 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -5420,25 +5420,11 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { throw ResourceLimitError(); } - u32 currOffset; /* relative to base of RoseEngine */ - if (!bc.engine_blob.empty()) { - currOffset = bc.engine_blob.base_offset + bc.engine_blob.size(); - } else { - currOffset = sizeof(RoseEngine); - } - - UNUSED const size_t engineBlobSize = bc.engine_blob.size(); // test later - - currOffset = ROUNDUP_CL(currOffset); - DEBUG_PRINTF("currOffset %u\n", currOffset); - // Build anchored matcher. size_t asize = 0; auto atable = buildAnchoredMatcher(*this, anchored_dfas, &asize); if (atable) { - currOffset = ROUNDUP_CL(currOffset); - proto.amatcherOffset = currOffset; - currOffset += verify_u32(asize); + proto.amatcherOffset = bc.engine_blob.add(atable.get(), asize, 64); } // Build floating HWLM matcher. @@ -5447,9 +5433,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { auto ftable = buildFloatingMatcher(*this, bc.longLitLengthThreshold, &fgroups, &fsize, &historyRequired); if (ftable) { - currOffset = ROUNDUP_CL(currOffset); - proto.fmatcherOffset = currOffset; - currOffset += verify_u32(fsize); + proto.fmatcherOffset = bc.engine_blob.add(ftable.get(), fsize, 64); bc.resources.has_floating = true; } @@ -5458,27 +5442,21 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { auto drtable = buildDelayRebuildMatcher(*this, bc.longLitLengthThreshold, &drsize); if (drtable) { - currOffset = ROUNDUP_CL(currOffset); - proto.drmatcherOffset = currOffset; - currOffset += verify_u32(drsize); + proto.drmatcherOffset = bc.engine_blob.add(drtable.get(), drsize, 64); } // Build EOD-anchored HWLM matcher. size_t esize = 0; auto etable = buildEodAnchoredMatcher(*this, &esize); if (etable) { - currOffset = ROUNDUP_CL(currOffset); - proto.ematcherOffset = currOffset; - currOffset += verify_u32(esize); + proto.ematcherOffset = bc.engine_blob.add(etable.get(), esize, 64); } // Build small-block HWLM matcher. size_t sbsize = 0; auto sbtable = buildSmallBlockMatcher(*this, &sbsize); if (sbtable) { - currOffset = ROUNDUP_CL(currOffset); - proto.sbmatcherOffset = currOffset; - currOffset += verify_u32(sbsize); + proto.sbmatcherOffset = bc.engine_blob.add(sbtable.get(), sbsize, 64); } proto.activeArrayCount = proto.leftfixBeginQueue; @@ -5510,6 +5488,18 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { proto.stateOffsets, cc.streaming, proto.activeArrayCount, proto.outfixBeginQueue, proto.outfixEndQueue); + u32 currOffset; /* relative to base of RoseEngine */ + if (!bc.engine_blob.empty()) { + currOffset = bc.engine_blob.base_offset + bc.engine_blob.size(); + } else { + currOffset = sizeof(RoseEngine); + } + + UNUSED const size_t engineBlobSize = bc.engine_blob.size(); // test later + + currOffset = ROUNDUP_CL(currOffset); + DEBUG_PRINTF("currOffset %u\n", currOffset); + currOffset = ROUNDUP_N(currOffset, alignof(scatter_unit_u64a)); u32 state_scatter_aux_offset = currOffset; currOffset += aux_size(state_scatter); @@ -5579,30 +5569,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Copy in our prototype engine data. memcpy(engine.get(), &proto, sizeof(proto)); - char *ptr = (char *)engine.get(); - assert(ISALIGNED_CL(ptr)); - - if (atable) { - assert(proto.amatcherOffset); - memcpy(ptr + proto.amatcherOffset, atable.get(), asize); - } - if (ftable) { - assert(proto.fmatcherOffset); - memcpy(ptr + proto.fmatcherOffset, ftable.get(), fsize); - } - if (drtable) { - assert(proto.drmatcherOffset); - memcpy(ptr + proto.drmatcherOffset, drtable.get(), drsize); - } - if (etable) { - assert(proto.ematcherOffset); - memcpy(ptr + proto.ematcherOffset, etable.get(), esize); - } - if (sbtable) { - assert(proto.sbmatcherOffset); - memcpy(ptr + proto.sbmatcherOffset, sbtable.get(), sbsize); - } - write_out(&engine->state_init, (char *)engine.get(), state_scatter, state_scatter_aux_offset); From 2ec3019e04dd84937f957ad9edc55d032ddbfe5a Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 2 Mar 2017 09:46:59 +1100 Subject: [PATCH 122/326] rose: do state work before writing NfaInfo structs --- src/rose/rose_build_bytecode.cpp | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index e6e4976d..222e6926 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2105,7 +2105,7 @@ bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif, } static -void allocateStateSpace(const NFA *nfa, NfaInfo *nfa_info, bool is_transient, +void allocateStateSpace(const NFA *nfa, NfaInfo &nfa_info, bool is_transient, RoseStateOffsets *so, u32 *scratchStateSize, u32 *streamStateSize, u32 *transientStateSize) { u32 state_offset; @@ -2121,7 +2121,7 @@ void allocateStateSpace(const NFA *nfa, NfaInfo *nfa_info, bool is_transient, *streamStateSize += nfa->streamStateSize; } - nfa_info->stateOffset = state_offset; + nfa_info.stateOffset = state_offset; // Uncompressed state in scratch must be aligned. u32 alignReq = state_alignment(*nfa); @@ -2129,7 +2129,7 @@ void allocateStateSpace(const NFA *nfa, NfaInfo *nfa_info, bool is_transient, while (*scratchStateSize % alignReq) { (*scratchStateSize)++; } - nfa_info->fullStateOffset = *scratchStateSize; + nfa_info.fullStateOffset = *scratchStateSize; *scratchStateSize += nfa->scratchStateSize; } @@ -2148,19 +2148,14 @@ findTransientQueues(const map &leftfix_info) { } static -void updateNfaState(const build_context &bc, RoseEngine &proto, +void updateNfaState(const build_context &bc, vector &nfa_infos, RoseStateOffsets *so, u32 *scratchStateSize, u32 *streamStateSize, u32 *transientStateSize) { - if (!proto.nfaInfoOffset) { + if (nfa_infos.empty()) { assert(bc.engineOffsets.empty()); return; } - // Our array of NfaInfo structures is in the engine blob. - NfaInfo *nfa_infos = (NfaInfo *)(bc.engine_blob.data() + - proto.nfaInfoOffset - - bc.engine_blob.base_offset); - *streamStateSize = 0; *transientStateSize = 0; *scratchStateSize = 0; @@ -2171,7 +2166,7 @@ void updateNfaState(const build_context &bc, RoseEngine &proto, const NFA *nfa = get_nfa_from_blob(bc, m.first); u32 qi = nfa->queueIndex; bool is_transient = contains(transient_queues, qi); - NfaInfo *nfa_info = &nfa_infos[qi]; + NfaInfo &nfa_info = nfa_infos[qi]; allocateStateSpace(nfa, nfa_info, is_transient, so, scratchStateSize, streamStateSize, transientStateSize); } @@ -2739,6 +2734,11 @@ void writeNfaInfo(const RoseBuildImpl &build, build_context &bc, } } + // Update state offsets to do with NFAs in proto and in the NfaInfo + // structures. + updateNfaState(bc, infos, &proto.stateOffsets, &proto.scratchStateSize, + &proto.nfaStateSize, &proto.tStateSize); + proto.nfaInfoOffset = bc.engine_blob.add_range(infos); } @@ -5412,7 +5412,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { writeLookaroundTables(bc, proto); writeDkeyInfo(rm, bc, proto); - writeNfaInfo(*this, bc, proto, no_retrigger_queues); writeLeftInfo(bc, proto, leftInfoTable); // Enforce role table resource limit. @@ -5477,11 +5476,9 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { laggedRoseCount, longLitStreamStateRequired, historyRequired, &proto.stateOffsets); - // Update state offsets to do with NFAs in proto and in the NfaInfo - // structures. - updateNfaState(bc, proto, &proto.stateOffsets, - &proto.scratchStateSize, &proto.nfaStateSize, - &proto.tStateSize); + // Write in NfaInfo structures. This will also update state size + // information in proto. + writeNfaInfo(*this, bc, proto, no_retrigger_queues); scatter_plan_raw state_scatter = buildStateScatterPlan( sizeof(u8), bc.numStates, proto.activeLeftCount, proto.rosePrefixCount, From 09d19c7c571b7dcb0956c96117fe7f95ce4169f5 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 2 Mar 2017 09:51:19 +1100 Subject: [PATCH 123/326] rose: remove unnecessary engine blob size check --- src/rose/rose_build_bytecode.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 222e6926..02107b9d 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -5492,8 +5492,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { currOffset = sizeof(RoseEngine); } - UNUSED const size_t engineBlobSize = bc.engine_blob.size(); // test later - currOffset = ROUNDUP_CL(currOffset); DEBUG_PRINTF("currOffset %u\n", currOffset); @@ -5572,10 +5570,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Copy in the engine blob. bc.engine_blob.write_bytes(engine.get()); - // Safety check: we shouldn't have written anything to the engine blob - // after we copied it into the engine bytecode. - assert(bc.engine_blob.size() == engineBlobSize); - // Add a small write engine if appropriate. engine = addSmallWriteEngine(*this, move(engine)); From 60fc975c816d78144bbf05b51306554148d0b6a4 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 2 Mar 2017 10:06:29 +1100 Subject: [PATCH 124/326] rose: use ROUNDUP_N for alignment --- src/rose/rose_build_bytecode.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 02107b9d..8dcf1d66 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2126,9 +2126,7 @@ void allocateStateSpace(const NFA *nfa, NfaInfo &nfa_info, bool is_transient, // Uncompressed state in scratch must be aligned. u32 alignReq = state_alignment(*nfa); assert(alignReq); - while (*scratchStateSize % alignReq) { - (*scratchStateSize)++; - } + *scratchStateSize = ROUNDUP_N(*scratchStateSize, alignReq); nfa_info.fullStateOffset = *scratchStateSize; *scratchStateSize += nfa->scratchStateSize; } From 2de6706df24457ba759c8e97daf540784f8347b7 Mon Sep 17 00:00:00 2001 From: Anatoly Burakov Date: Fri, 10 Feb 2017 15:37:35 +0000 Subject: [PATCH 125/326] Adding support for compiling approximate matching patterns Adds new "edit_distance" extparam --- CMakeLists.txt | 2 + src/compiler/compiler.cpp | 11 +- src/compiler/compiler.h | 3 +- src/grey.cpp | 4 + src/grey.h | 2 + src/hs.cpp | 9 +- src/hs_compile.h | 12 +- src/nfagraph/ng.cpp | 19 +- src/nfagraph/ng.h | 5 +- src/nfagraph/ng_builder.cpp | 5 +- src/nfagraph/ng_expr_info.cpp | 2 +- src/nfagraph/ng_fuzzy.cpp | 677 ++++++++++++++++++++++++++++++++ src/nfagraph/ng_fuzzy.h | 49 +++ src/parser/shortcut_literal.cpp | 5 +- unit/hyperscan/expr_info.cpp | 13 +- util/ExpressionParser.rl | 14 +- 16 files changed, 804 insertions(+), 28 deletions(-) create mode 100644 src/nfagraph/ng_fuzzy.cpp create mode 100644 src/nfagraph/ng_fuzzy.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 85d97b9b..78bf207d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -782,6 +782,8 @@ SET (hs_SRCS src/nfagraph/ng_extparam.h src/nfagraph/ng_fixed_width.cpp src/nfagraph/ng_fixed_width.h + src/nfagraph/ng_fuzzy.cpp + src/nfagraph/ng_fuzzy.h src/nfagraph/ng_haig.cpp src/nfagraph/ng_haig.h src/nfagraph/ng_holder.cpp diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index 4a4afc64..d59c5cc6 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -79,7 +79,8 @@ static void validateExt(const hs_expr_ext &ext) { static const unsigned long long ALL_EXT_FLAGS = HS_EXT_FLAG_MIN_OFFSET | HS_EXT_FLAG_MAX_OFFSET | - HS_EXT_FLAG_MIN_LENGTH; + HS_EXT_FLAG_MIN_LENGTH | + HS_EXT_FLAG_EDIT_DISTANCE; if (ext.flags & ~ALL_EXT_FLAGS) { throw CompileError("Invalid hs_expr_ext flag set."); } @@ -111,7 +112,8 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, id(actionId), min_offset(0), max_offset(MAX_OFFSET), - min_length(0) { + min_length(0), + edit_distance(0) { ParseMode mode(flags); component = parse(expression, mode); @@ -163,6 +165,9 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, if (ext->flags & HS_EXT_FLAG_MIN_LENGTH) { min_length = ext->min_length; } + if (ext->flags & HS_EXT_FLAG_EDIT_DISTANCE) { + edit_distance = ext->edit_distance; + } } // These are validated in validateExt, so an error will already have been diff --git a/src/compiler/compiler.h b/src/compiler/compiler.h index 1d7d6536..48987fc3 100644 --- a/src/compiler/compiler.h +++ b/src/compiler/compiler.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -78,6 +78,7 @@ public: u64a min_offset; //!< 0 if not used u64a max_offset; //!< MAX_OFFSET if not used u64a min_length; //!< 0 if not used + u32 edit_distance; //!< 0 if not used }; /** diff --git a/src/grey.cpp b/src/grey.cpp index cd19e863..05473abb 100644 --- a/src/grey.cpp +++ b/src/grey.cpp @@ -61,6 +61,7 @@ Grey::Grey(void) : allowSmallLiteralSet(true), allowCastle(true), allowDecoratedLiteral(true), + allowApproximateMatching(true), allowNoodle(true), fdrAllowTeddy(true), fdrAllowFlood(true), @@ -98,6 +99,7 @@ Grey::Grey(void) : minRoseLiteralLength(3), minRoseNetflowLiteralLength(2), maxRoseNetflowEdges(50000), /* otherwise no netflow pass. */ + maxEditDistance(16), minExtBoundedRepeatSize(32), goughCopyPropagate(true), goughRegisterAllocate(true), @@ -226,6 +228,7 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(allowCastle); G_UPDATE(allowDecoratedLiteral); G_UPDATE(allowNoodle); + G_UPDATE(allowApproximateMatching); G_UPDATE(fdrAllowTeddy); G_UPDATE(fdrAllowFlood); G_UPDATE(violetAvoidSuffixes); @@ -262,6 +265,7 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(minRoseLiteralLength); G_UPDATE(minRoseNetflowLiteralLength); G_UPDATE(maxRoseNetflowEdges); + G_UPDATE(maxEditDistance); G_UPDATE(minExtBoundedRepeatSize); G_UPDATE(goughCopyPropagate); G_UPDATE(goughRegisterAllocate); diff --git a/src/grey.h b/src/grey.h index dcbc2e7d..c2d5ac92 100644 --- a/src/grey.h +++ b/src/grey.h @@ -61,6 +61,7 @@ struct Grey { bool allowSmallLiteralSet; bool allowCastle; bool allowDecoratedLiteral; + bool allowApproximateMatching; bool allowNoodle; bool fdrAllowTeddy; @@ -107,6 +108,7 @@ struct Grey { u32 minRoseLiteralLength; u32 minRoseNetflowLiteralLength; u32 maxRoseNetflowEdges; + u32 maxEditDistance; u32 minExtBoundedRepeatSize; /* to be considered for ng_repeat */ diff --git a/src/hs.cpp b/src/hs.cpp index f64e867a..6cd3a3ee 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,6 +40,7 @@ #include "nfagraph/ng.h" #include "nfagraph/ng_expr_info.h" #include "nfagraph/ng_extparam.h" +#include "nfagraph/ng_fuzzy.h" #include "parser/parse_error.h" #include "parser/Parser.h" #include "parser/prefilter.h" @@ -379,6 +380,12 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, throw ParseError("Internal error."); } + // validate graph's suitability for fuzzing + validate_fuzzy_compile(*g, g->edit_distance, g->utf8, cc.grey); + + // fuzz graph - this must happen before any transformations are made + make_fuzzy(*g, g->edit_distance, cc.grey); + handleExtendedParams(rm, *g, cc); fillExpressionInfo(rm, *g, &local_info); } diff --git a/src/hs_compile.h b/src/hs_compile.h index c5212cbe..1e2e0219 100644 --- a/src/hs_compile.h +++ b/src/hs_compile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -241,6 +241,13 @@ typedef struct hs_expr_ext { * @ref HS_EXT_FLAG_MIN_LENGTH flag in the hs_expr_ext::flags field. */ unsigned long long min_length; + + /** + * Allow patterns to approximately match within this edit distance. To use + * this parameter, set the @ref HS_EXT_FLAG_EDIT_DISTANCE flag in the + * hs_expr_ext::flags field. + */ + unsigned edit_distance; } hs_expr_ext_t; /** @@ -261,6 +268,9 @@ typedef struct hs_expr_ext { /** Flag indicating that the hs_expr_ext::min_length field is used. */ #define HS_EXT_FLAG_MIN_LENGTH 4ULL +/** Flag indicating that the hs_expr_ext::edit_distance field is used. */ +#define HS_EXT_FLAG_EDIT_DISTANCE 8ULL + /** @} */ /** diff --git a/src/nfagraph/ng.cpp b/src/nfagraph/ng.cpp index e1f29318..9453aae9 100644 --- a/src/nfagraph/ng.cpp +++ b/src/nfagraph/ng.cpp @@ -41,6 +41,7 @@ #include "ng_equivalence.h" #include "ng_extparam.h" #include "ng_fixed_width.h" +#include "ng_fuzzy.h" #include "ng_haig.h" #include "ng_literal_component.h" #include "ng_literal_decorated.h" @@ -328,11 +329,17 @@ bool NG::addGraph(NGWrapper &w) { /* ensure utf8 starts at cp boundary */ ensureCodePointStart(rm, w); - resolveAsserts(rm, w); + // validate graph's suitability for fuzzing before resolving asserts + validate_fuzzy_compile(w, w.edit_distance, w.utf8, cc.grey); + + resolveAsserts(rm, w); dumpDotWrapper(w, "02_post_assert_resolve", cc.grey); assert(allMatchStatesHaveReports(w)); + make_fuzzy(w, w.edit_distance, cc.grey); + dumpDotWrapper(w, "02a_post_fuzz", cc.grey); + pruneUseless(w); pruneEmptyVertices(w); @@ -577,20 +584,22 @@ bool NG::addLiteral(const ue2_literal &literal, u32 expr_index, NGWrapper::NGWrapper(unsigned int ei, bool highlander_in, bool utf8_in, bool prefilter_in, som_type som_in, ReportID r, - u64a min_offset_in, u64a max_offset_in, u64a min_length_in) + u64a min_offset_in, u64a max_offset_in, u64a min_length_in, + u32 edit_distance_in) : expressionIndex(ei), reportId(r), highlander(highlander_in), utf8(utf8_in), prefilter(prefilter_in), som(som_in), min_offset(min_offset_in), max_offset(max_offset_in), - min_length(min_length_in) { + min_length(min_length_in), edit_distance(edit_distance_in) { // All special nodes/edges are added in NGHolder's constructor. DEBUG_PRINTF("built %p: expr=%u report=%u%s%s%s%s " - "min_offset=%llu max_offset=%llu min_length=%llu\n", + "min_offset=%llu max_offset=%llu min_length=%llu " + "edit_distance=%u\n", this, expressionIndex, reportId, highlander ? " highlander" : "", utf8 ? " utf8" : "", prefilter ? " prefilter" : "", (som != SOM_NONE) ? " som" : "", - min_offset, max_offset, min_length); + min_offset, max_offset, min_length, edit_distance); } NGWrapper::~NGWrapper() {} diff --git a/src/nfagraph/ng.h b/src/nfagraph/ng.h index 4aa6a7dc..d6e5d3c0 100644 --- a/src/nfagraph/ng.h +++ b/src/nfagraph/ng.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -62,7 +62,7 @@ class NGWrapper : public NGHolder { public: NGWrapper(unsigned int expressionIndex, bool highlander, bool utf8, bool prefilter, const som_type som, ReportID rid, u64a min_offset, - u64a max_offset, u64a min_length); + u64a max_offset, u64a min_length, u32 edit_distance); ~NGWrapper() override; @@ -80,6 +80,7 @@ public: u64a min_offset; /**< extparam min_offset value */ u64a max_offset; /**< extparam max_offset value */ u64a min_length; /**< extparam min_length value */ + u32 edit_distance; /**< extparam edit_distance value */ }; class RoseBuild; diff --git a/src/nfagraph/ng_builder.cpp b/src/nfagraph/ng_builder.cpp index 4ca0b37e..385e114f 100644 --- a/src/nfagraph/ng_builder.cpp +++ b/src/nfagraph/ng_builder.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -112,7 +112,8 @@ NFABuilderImpl::NFABuilderImpl(ReportManager &rm_in, const Grey &grey_in, : rm(rm_in), grey(grey_in), graph(ue2::make_unique( expr.index, expr.highlander, expr.utf8, expr.prefilter, expr.som, - expr.id, expr.min_offset, expr.max_offset, expr.min_length)), + expr.id, expr.min_offset, expr.max_offset, expr.min_length, + expr.edit_distance)), vertIdx(N_SPECIALS) { // Reserve space for a reasonably-sized NFA diff --git a/src/nfagraph/ng_expr_info.cpp b/src/nfagraph/ng_expr_info.cpp index b43c7fd1..7419609b 100644 --- a/src/nfagraph/ng_expr_info.cpp +++ b/src/nfagraph/ng_expr_info.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: diff --git a/src/nfagraph/ng_fuzzy.cpp b/src/nfagraph/ng_fuzzy.cpp new file mode 100644 index 00000000..fecb7065 --- /dev/null +++ b/src/nfagraph/ng_fuzzy.cpp @@ -0,0 +1,677 @@ +/* + * Copyright (c) 2015-2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Graph fuzzer for approximate matching + */ + +#include "ng_fuzzy.h" + +#include "ng.h" +#include "ng_depth.h" +#include "ng_util.h" + +#include +#include +using namespace std; + +namespace ue2 { + +// returns all successors up to a given depth in a vector of sets, indexed by +// zero-based depth from source vertex +static +vector> gatherSuccessorsByDepth(const NGHolder &g, + NFAVertex src, u32 depth) { + vector> result(depth); + flat_set cur, next; + + assert(depth > 0); + + // populate current set of successors + for (auto v : adjacent_vertices_range(src, g)) { + // ignore self-loops + if (src == v) { + continue; + } + DEBUG_PRINTF("Node %zu depth 1\n", g[v].index); + cur.insert(v); + } + result[0] = cur; + + for (unsigned d = 1; d < depth; d++) { + // collect all successors for all current level vertices + for (auto v : cur) { + // don't go past special nodes + if (is_special(v, g)) { + continue; + } + + for (auto succ : adjacent_vertices_range(v, g)) { + // ignore self-loops + if (v == succ) { + continue; + } + DEBUG_PRINTF("Node %zu depth %u\n", g[succ].index, d + 1); + next.insert(succ); + } + } + result[d] = next; + next.swap(cur); + next.clear(); + } + + return result; +} + +// returns all predecessors up to a given depth in a vector of sets, indexed by +// zero-based depth from source vertex +static +vector> gatherPredecessorsByDepth(const NGHolder &g, + NFAVertex src, + u32 depth) { + vector> result(depth); + flat_set cur, next; + + assert(depth > 0); + + // populate current set of successors + for (auto v : inv_adjacent_vertices_range(src, g)) { + // ignore self-loops + if (src == v) { + continue; + } + DEBUG_PRINTF("Node %zu depth 1\n", g[v].index); + cur.insert(v); + } + result[0] = cur; + + for (unsigned d = 1; d < depth; d++) { + // collect all successors for all current level vertices + for (auto v : cur) { + for (auto pred : inv_adjacent_vertices_range(v, g)) { + // ignore self-loops + if (v == pred) { + continue; + } + DEBUG_PRINTF("Node %zu depth %u\n", g[pred].index, d + 1); + next.insert(pred); + } + } + result[d] = next; + next.swap(cur); + next.clear(); + } + + return result; +} + +/* + * This struct produces a fuzzed graph; that is, a graph that is able to match + * the original pattern, as well as input data within a certain edit distance. + * Construct the struct, then call fuzz_graph() to transform the graph. + * + * Terminology used: + * - Shadow vertices: vertices mirroring the original graph at various edit + * distances + * - Shadow graph level: edit distance of a particular shadow graph + * - Helpers: dot vertices assigned to shadow vertices, used for insert/replace + */ +struct ShadowGraph { + NGHolder &g; + u32 edit_distance; + map, NFAVertex> shadow_map; + map, NFAVertex> helper_map; + map clones; + // edge creation is deferred + vector> edges_to_be_added; + flat_set orig; + + ShadowGraph(NGHolder &g_in, u32 ed_in) : g(g_in), edit_distance(ed_in) {} + + void fuzz_graph() { + if (edit_distance == 0) { + return; + } + + // step 1: prepare the vertices, helpers and shadows according to + // the original graph + prepare_graph(); + + // step 2: add shadow and helper nodes + build_shadow_graph(); + + // step 3: set up reports for newly created vertices (and make clones + // if necessary) + create_reports(); + + // step 4: wire up shadow graph and helpers for insert/replace/remove + connect_shadow_graph(); + + // step 5: commit all the edge wirings + DEBUG_PRINTF("Committing edge wirings\n"); + for (const auto &p : edges_to_be_added) { + add_edge_if_not_present(p.first, p.second, g); + } + + DEBUG_PRINTF("Done!\n"); + } + +private: + const NFAVertex& get_clone(const NFAVertex &v) { + return contains(clones, v) ? + clones[v] : v; + } + + void connect_to_clones(const NFAVertex &u, const NFAVertex &v) { + const NFAVertex &clone_u = get_clone(u); + const NFAVertex &clone_v = get_clone(v); + + edges_to_be_added.emplace_back(u, v); + DEBUG_PRINTF("Adding edge: %zu -> %zu\n", g[u].index, g[v].index); + + // do not connect clones to accepts, we do it during cloning + if (is_any_accept(clone_v, g)) { + return; + } + edges_to_be_added.emplace_back(clone_u, clone_v); + DEBUG_PRINTF("Adding edge: %zu -> %zu\n", g[clone_u].index, + g[clone_v].index); + } + + void prepare_graph() { + DEBUG_PRINTF("Building shadow graphs\n"); + + for (auto v : vertices_range(g)) { + // all level 0 vertices are their own helpers and their own shadows + helper_map[make_pair(v, 0)] = v; + shadow_map[make_pair(v, 0)] = v; + + // find special nodes + if (is_any_accept(v, g)) { + DEBUG_PRINTF("Node %zu is a special node\n", g[v].index); + for (unsigned edit = 1; edit <= edit_distance; edit++) { + // all accepts are their own shadows and helpers at all + // levels + shadow_map[make_pair(v, edit)] = v; + helper_map[make_pair(v, edit)] = v; + } + continue; + } + DEBUG_PRINTF("Node %zu is to be shadowed\n", g[v].index); + orig.insert(v); + } + } + + void build_shadow_graph() { + for (auto v : orig) { + DEBUG_PRINTF("Adding shadow/helper nodes for node %zu\n", + g[v].index); + for (unsigned dist = 1; dist <= edit_distance; dist++) { + auto shadow_v = v; + + // start and startDs cannot have shadows but do have helpers + if (!is_any_start(v, g)) { + shadow_v = clone_vertex(g, v); + DEBUG_PRINTF("New shadow node ID: %zu (level %u)\n", + g[shadow_v].index, dist); + } + shadow_map[make_pair(v, dist)] = shadow_v; + + // if there's nowhere to go from this vertex, no helper needed + if (proper_out_degree(v, g) < 1) { + helper_map[make_pair(v, dist)] = shadow_v; + continue; + } + + auto helper_v = clone_vertex(g, v); + DEBUG_PRINTF("New helper node ID: %zu (level %u)\n", + g[helper_v].index, dist); + + // this is a helper, so make it a dot + g[helper_v].char_reach = CharReach::dot(); + // do not copy virtual start's assert flags + if (is_virtual_start(v, g)) { + g[helper_v].assert_flags = 0; + } + helper_map[make_pair(v, dist)] = helper_v; + } + } + } + + // wire up successors according to the original graph, wire helpers + // to shadow successors (insert/replace) + void connect_succs(NFAVertex v, u32 dist) { + DEBUG_PRINTF("Wiring up successors for node %zu shadow level %u\n", + g[v].index, dist); + const auto &cur_shadow_v = shadow_map[make_pair(v, dist)]; + const auto &cur_shadow_helper = helper_map[make_pair(v, dist)]; + + // multiple insert + if (dist > 1) { + const auto &prev_level_helper = helper_map[make_pair(v, dist - 1)]; + connect_to_clones(prev_level_helper, cur_shadow_helper); + } + + for (auto orig_dst : adjacent_vertices_range(v, g)) { + const auto &shadow_dst = shadow_map[make_pair(orig_dst, dist)]; + + connect_to_clones(cur_shadow_v, shadow_dst); + + // ignore startDs for insert/replace + if (orig_dst == g.startDs) { + continue; + } + + connect_to_clones(cur_shadow_helper, shadow_dst); + } + } + + // wire up predecessors according to the original graph, wire + // predecessors to helpers (replace), wire predecessor helpers to + // helpers (multiple replace) + void connect_preds(NFAVertex v, u32 dist) { + DEBUG_PRINTF("Wiring up predecessors for node %zu shadow level %u\n", + g[v].index, dist); + const auto &cur_shadow_v = shadow_map[make_pair(v, dist)]; + const auto &cur_shadow_helper = helper_map[make_pair(v, dist)]; + + auto orig_src_vertices = inv_adjacent_vertices_range(v, g); + for (auto orig_src : orig_src_vertices) { + // ignore edges from start to startDs + if (v == g.startDs && orig_src == g.start) { + continue; + } + // ignore self-loops for replace + if (orig_src != v) { + // do not wire a replace node for start vertices if we + // have a virtual start + if (is_virtual_start(v, g) && is_any_start(orig_src, g)) { + continue; + } + + if (dist) { + const auto &prev_level_src = + shadow_map[make_pair(orig_src, dist - 1)]; + const auto &prev_level_helper = + helper_map[make_pair(orig_src, dist - 1)]; + + connect_to_clones(prev_level_src, cur_shadow_helper); + connect_to_clones(prev_level_helper, cur_shadow_helper); + } + } + // wire predecessor according to original graph + const auto &shadow_src = shadow_map[make_pair(orig_src, dist)]; + + connect_to_clones(shadow_src, cur_shadow_v); + } + } + + // wire up previous level helper to current shadow (insert) + void connect_helpers(NFAVertex v, u32 dist) { + DEBUG_PRINTF("Wiring up helpers for node %zu shadow level %u\n", + g[v].index, dist); + const auto &cur_shadow_helper = helper_map[make_pair(v, dist)]; + auto prev_level_v = shadow_map[make_pair(v, dist - 1)]; + + connect_to_clones(prev_level_v, cur_shadow_helper); + } + + /* + * wiring edges for removal is a special case. + * + * when wiring edges for removal, as well as wiring up immediate + * predecessors to immediate successors, we also need to wire up more + * distant successors to their respective shadow graph levels. + * + * for example, consider graph start->a->b->c->d->accept. + * + * at edit distance 1, we need remove edges start->b, a->c, b->d, and + * c->accept, all going from original graph (level 0) to shadow graph + * level 1. + * + * at edit distance 2, we also need edges start->c, a->d and b->accept, + * all going from level 0 to shadow graph level 2. + * + * this is propagated to all shadow levels; that is, given edit + * distance 3, we will have edges from shadow levels 0->1, 0->2, + * 0->3, 1->2, 1->3, and 2->3. + * + * therefore, we wire them in steps: first wire with step 1 (0->1, 1->2, + * 2->3) at depth 1, then wire with step 2 (0->2, 1->3) at depth 2, etc. + * + * we also have to wire helpers to their removal successors, to + * accommodate for a replace followed by a remove, on all shadow levels. + * + * and finally, we also have to wire source shadows into removal + * successor helpers on a level above, to accommodate for a remove + * followed by a replace. + */ + void connect_removals(NFAVertex v) { + DEBUG_PRINTF("Wiring up remove edges for node %zu\n", g[v].index); + + // vertices returned by this function don't include self-loops + auto dst_vertices_by_depth = + gatherSuccessorsByDepth(g, v, edit_distance); + auto orig_src_vertices = inv_adjacent_vertices_range(v, g); + for (auto orig_src : orig_src_vertices) { + // ignore self-loops + if (orig_src == v) { + continue; + } + for (unsigned step = 1; step <= edit_distance; step++) { + for (unsigned dist = step; dist <= edit_distance; dist++) { + auto &dst_vertices = dst_vertices_by_depth[step - 1]; + for (auto &orig_dst : dst_vertices) { + const auto &shadow_src = + shadow_map[make_pair(orig_src, dist - step)]; + const auto &shadow_helper = + helper_map[make_pair(orig_src, dist - step)]; + const auto &shadow_dst = + shadow_map[make_pair(orig_dst, dist)]; + + // removal + connect_to_clones(shadow_src, shadow_dst); + + // removal from helper vertex + connect_to_clones(shadow_helper, shadow_dst); + + // removal into helper, requires additional edit + if ((dist + 1) <= edit_distance) { + const auto &next_level_helper = + helper_map[make_pair(orig_dst, dist + 1)]; + + connect_to_clones(shadow_src, next_level_helper); + } + } + } + } + } + } + + void connect_shadow_graph() { + DEBUG_PRINTF("Wiring up the graph\n"); + + for (auto v : orig) { + + DEBUG_PRINTF("Wiring up edges for node %zu\n", g[v].index); + + for (unsigned dist = 0; dist <= edit_distance; dist++) { + + // handle insert/replace + connect_succs(v, dist); + + // handle replace/multiple insert + connect_preds(v, dist); + + // handle helpers + if (dist > 0) { + connect_helpers(v, dist); + } + } + + // handle removals + connect_removals(v); + } + } + + void connect_to_targets(NFAVertex src, const flat_set &targets) { + for (auto dst : targets) { + DEBUG_PRINTF("Adding edge: %zu -> %zu\n", g[src].index, + g[dst].index); + edges_to_be_added.emplace_back(src, dst); + } + } + + // create a clone of the vertex, but overwrite its report set + void create_clone(NFAVertex v, const flat_set &reports, + unsigned max_edit_distance, + const flat_set &targets) { + // some vertices may have the same reports, but different successors; + // therefore, we may need to connect them multiple times, but still only + // clone once + bool needs_cloning = !contains(clones, v); + + DEBUG_PRINTF("Cloning node %zu\n", g[v].index); + // go through all shadows and helpers, including + // original vertex + for (unsigned d = 0; d < max_edit_distance; d++) { + auto shadow_v = shadow_map[make_pair(v, d)]; + auto helper_v = helper_map[make_pair(v, d)]; + + NFAVertex new_shadow_v, new_helper_v; + + // make sure we don't clone the same vertex twice + if (needs_cloning) { + new_shadow_v = clone_vertex(g, shadow_v); + DEBUG_PRINTF("New shadow node ID: %zu (level %u)\n", + g[new_shadow_v].index, d); + clones[shadow_v] = new_shadow_v; + } else { + new_shadow_v = clones[shadow_v]; + } + g[new_shadow_v].reports = reports; + + connect_to_targets(new_shadow_v, targets); + + if (shadow_v == helper_v) { + continue; + } + if (needs_cloning) { + new_helper_v = clone_vertex(g, helper_v); + DEBUG_PRINTF("New helper node ID: %zu (level %u)\n", + g[new_helper_v].index, d); + clones[helper_v] = new_helper_v; + } else { + new_helper_v = clones[helper_v]; + } + g[new_helper_v].reports = reports; + + connect_to_targets(new_helper_v, targets); + } + } + + void write_reports(NFAVertex v, const flat_set &reports, + unsigned max_edit_distance, + const flat_set &targets) { + // we're overwriting reports, but we're not losing any + // information as we already cached all the different report + // sets, so vertices having different reports will be cloned and set up + // with the correct report set + + // go through all shadows and helpers, including original + // vertex + for (unsigned d = 0; d < max_edit_distance; d++) { + auto shadow_v = shadow_map[make_pair(v, d)]; + auto helper_v = helper_map[make_pair(v, d)]; + DEBUG_PRINTF("Setting up reports for shadow node: %zu " + "(level %u)\n", + g[shadow_v].index, d); + DEBUG_PRINTF("Setting up reports for helper node: %zu " + "(level %u)\n", + g[helper_v].index, d); + g[shadow_v].reports = reports; + g[helper_v].reports = reports; + + connect_to_targets(shadow_v, targets); + connect_to_targets(helper_v, targets); + } + } + + /* + * we may have multiple report sets per graph. that means, whenever we + * construct additional paths through the graph (alternations, removals), we + * have to account for the fact that some vertices are predecessors to + * vertices with different report sets. + * + * whenever that happens, we have to clone the paths for both report sets, + * and set up these new vertices with their respective report sets as well. + * + * in order to do that, we first have to get all the predecessors for accept + * and acceptEod vertices. then, go through them one by one, and take note + * of the report lists. the first report set we find, wins, the rest we + * clone. + * + * we also have to do this in two passes, because there may be vertices that + * are predecessors to vertices with different report sets, so to avoid + * overwriting reports we will be caching reports info instead. + */ + void create_reports() { + map, flat_set> reports_to_vertices; + flat_set accepts{g.accept, g.acceptEod}; + + // gather reports info from all vertices connected to accept + for (auto accept : accepts) { + for (auto src : inv_adjacent_vertices_range(accept, g)) { + // skip special vertices + if (is_special(src, g)) { + continue; + } + reports_to_vertices[g[src].reports].insert(src); + } + } + + // we expect to see at most two report sets + assert(reports_to_vertices.size() > 0 && + reports_to_vertices.size() <= 2); + + // set up all reports + bool clone = false; + for (auto &pair : reports_to_vertices) { + const auto &reports = pair.first; + const auto &vertices = pair.second; + + for (auto src : vertices) { + // get all predecessors up to edit distance + auto src_vertices_by_depth = + gatherPredecessorsByDepth(g, src, edit_distance); + + // find which accepts source vertex connects to + flat_set targets; + for (const auto &accept : accepts) { + NFAEdge e = edge(src, accept, g); + if (e) { + targets.insert(accept); + } + } + assert(targets.size()); + + for (unsigned d = 0; d < src_vertices_by_depth.size(); d++) { + const auto &preds = src_vertices_by_depth[d]; + for (auto v : preds) { + // only clone a node if it already contains reports + if (clone && !g[v].reports.empty()) { + create_clone(v, reports, edit_distance - d, + targets); + } else { + write_reports(v, reports, edit_distance - d, + targets); + } + } + } + } + // clone vertices only if it's not our first report set + clone = true; + } + } +}; + +// check if we will edit our way into a vacuous pattern +static +bool will_turn_vacuous(const NGHolder &g, u32 edit_distance) { + vector depths; + + calcDepths(g, depths); + + depth min_depth = depth::infinity(); + auto idx = g[g.start].index; + + // check distance from start to accept/acceptEod + if (depths[idx].toAccept.min.is_finite()) { + min_depth = min(depths[idx].toAccept.min, min_depth); + } + if (depths[idx].toAcceptEod.min.is_finite()) { + min_depth = min(depths[idx].toAcceptEod.min, min_depth); + } + + idx = g[g.startDs].index; + + // check distance from startDs to accept/acceptEod + if (depths[idx].toAccept.min.is_finite()) { + min_depth = min(depths[idx].toAccept.min, min_depth); + } + if (depths[idx].toAcceptEod.min.is_finite()) { + min_depth = min(depths[idx].toAcceptEod.min, min_depth); + } + + assert(min_depth.is_finite()); + + // now, check if we can edit our way into a vacuous pattern + if (min_depth <= (u64a) edit_distance + 1) { + DEBUG_PRINTF("Pattern will turn vacuous if approximately matched\n"); + return true; + } + return false; +} + +void validate_fuzzy_compile(const NGHolder &g, u32 edit_distance, bool utf8, + const Grey &grey) { + if (edit_distance == 0) { + return; + } + if (!grey.allowApproximateMatching) { + throw CompileError("Approximate matching is disabled."); + } + if (edit_distance > grey.maxEditDistance) { + throw CompileError("Edit distance is too big."); + } + if (utf8) { + throw CompileError("UTF-8 is disallowed for approximate matching."); + } + // graph isn't fuzzable if there are edge assertions anywhere in the graph + for (auto e : edges_range(g)) { + if (g[e].assert_flags) { + throw CompileError("Zero-width assertions are disallowed for " + "approximate matching."); + } + } + if (will_turn_vacuous(g, edit_distance)) { + throw CompileError("Approximate matching patterns that reduce to " + "vacuous patterns are disallowed."); + } +} + +void make_fuzzy(NGHolder &g, u32 edit_distance, UNUSED const Grey &grey) { + if (edit_distance == 0) { + return; + } + assert(grey.allowApproximateMatching); + assert(grey.maxEditDistance >= edit_distance); + ShadowGraph sg(g, edit_distance); + sg.fuzz_graph(); +} +} diff --git a/src/nfagraph/ng_fuzzy.h b/src/nfagraph/ng_fuzzy.h new file mode 100644 index 00000000..a2c82127 --- /dev/null +++ b/src/nfagraph/ng_fuzzy.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2015-2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Graph fuzzer for approximate matching + */ + +#ifndef NG_FUZZY_H +#define NG_FUZZY_H + +#include "ue2common.h" + +namespace ue2 { +struct Grey; +class NGHolder; +class ReportManager; + +void validate_fuzzy_compile(const NGHolder &g, u32 edit_distance, bool utf8, + const Grey &grey); + +void make_fuzzy(NGHolder &g, u32 edit_distance, const Grey &grey); +} + +#endif // NG_FUZZY_H diff --git a/src/parser/shortcut_literal.cpp b/src/parser/shortcut_literal.cpp index 3f58d752..a7aa5d06 100644 --- a/src/parser/shortcut_literal.cpp +++ b/src/parser/shortcut_literal.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -167,7 +167,8 @@ bool shortcutLiteral(NG &ng, const ParsedExpression &expr) { } // XXX: don't shortcut literals with extended params (yet) - if (expr.min_offset || expr.max_offset != MAX_OFFSET || expr.min_length) { + if (expr.min_offset || expr.max_offset != MAX_OFFSET || expr.min_length || + expr.edit_distance) { DEBUG_PRINTF("extended params not allowed\n"); return false; } diff --git a/unit/hyperscan/expr_info.cpp b/unit/hyperscan/expr_info.cpp index 984104c5..aa242798 100644 --- a/unit/hyperscan/expr_info.cpp +++ b/unit/hyperscan/expr_info.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -124,7 +124,7 @@ TEST_P(ExprInfop, check_ext_null) { free(info); } -static const hs_expr_ext NO_EXT_PARAM = { 0, 0, 0, 0 }; +static const hs_expr_ext NO_EXT_PARAM = { 0, 0, 0, 0, 0 }; static const expected_info ei_test[] = { {"abc", NO_EXT_PARAM, 3, 3, 0, 0, 0}, @@ -167,10 +167,11 @@ static const expected_info ei_test[] = { {"(foo|bar)\\z", NO_EXT_PARAM, 3, 3, 0, 1, 1}, // Some cases with extended parameters. - {"^abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0}, 6, 10, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0}, 6, 10, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100}, 100, UINT_MAX, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 5}, 6, UINT_MAX, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0}, 6, 10, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0}, 6, 10, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0}, 100, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 5, 0}, 6, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 0, UINT_MAX, 0, 0, 0}, }; INSTANTIATE_TEST_CASE_P(ExprInfo, ExprInfop, ValuesIn(ei_test)); diff --git a/util/ExpressionParser.rl b/util/ExpressionParser.rl index 98ed8daa..073f5300 100644 --- a/util/ExpressionParser.rl +++ b/util/ExpressionParser.rl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -48,7 +48,8 @@ enum ParamKey { PARAM_NONE, PARAM_MIN_OFFSET, PARAM_MAX_OFFSET, - PARAM_MIN_LENGTH + PARAM_MIN_LENGTH, + PARAM_EDIT_DISTANCE }; %%{ @@ -92,6 +93,10 @@ enum ParamKey { ext->flags |= HS_EXT_FLAG_MIN_LENGTH; ext->min_length = num; break; + case PARAM_EDIT_DISTANCE: + ext->flags |= HS_EXT_FLAG_EDIT_DISTANCE; + ext->edit_distance = num; + break; case PARAM_NONE: default: // No key specified, syntax invalid. @@ -151,8 +156,9 @@ bool readExpression(const std::string &input, std::string &expr, %%{ single_flag = [ismW8HPLVO]; param = ('min_offset' @{ key = PARAM_MIN_OFFSET; } | - 'max_offset' @{ key = PARAM_MAX_OFFSET; } | - 'min_length' @{ key = PARAM_MIN_LENGTH; } ); + 'max_offset' @{ key = PARAM_MAX_OFFSET; } | + 'min_length' @{ key = PARAM_MIN_LENGTH; } | + 'edit_distance' @{ key = PARAM_EDIT_DISTANCE; }); value = (digit @accumulateNum)+ >{num = 0;}; param_spec = (' '* param '=' value ' '*) >{ key = PARAM_NONE; } From 4c2b7cc04fefd7198915868239f3b1bd5dac1c14 Mon Sep 17 00:00:00 2001 From: Anatoly Burakov Date: Fri, 10 Feb 2017 15:42:36 +0000 Subject: [PATCH 126/326] Add support for approximate matching in ue2collider --- util/ng_corpus_generator.cpp | 23 +- util/ng_corpus_generator.h | 8 +- util/ng_find_matches.cpp | 934 +++++++++++++++++++++++++++++++---- util/ng_find_matches.h | 5 +- 4 files changed, 860 insertions(+), 110 deletions(-) diff --git a/util/ng_corpus_generator.cpp b/util/ng_corpus_generator.cpp index ca7c413a..6d0de539 100644 --- a/util/ng_corpus_generator.cpp +++ b/util/ng_corpus_generator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -218,7 +218,7 @@ namespace { /** \brief Concrete implementation */ class CorpusGeneratorImpl : public CorpusGenerator { public: - CorpusGeneratorImpl(const NGHolder &graph_in, CorpusProperties &props); + CorpusGeneratorImpl(const NGWrapper &graph_in, CorpusProperties &props); ~CorpusGeneratorImpl() {} void generateCorpus(vector &data); @@ -244,10 +244,13 @@ private: CorpusProperties &cProps; }; -CorpusGeneratorImpl::CorpusGeneratorImpl(const NGHolder &graph_in, +CorpusGeneratorImpl::CorpusGeneratorImpl(const NGWrapper &graph_in, CorpusProperties &props) : graph(graph_in), cProps(props) { - // empty + // if this pattern is to be matched approximately + if (graph_in.edit_distance && !props.editDistance) { + props.editDistance = props.rand(0, graph_in.edit_distance + 1); + } } void CorpusGeneratorImpl::generateCorpus(vector &data) { @@ -388,7 +391,7 @@ hit_limit: /** \brief Concrete implementation for UTF-8 */ class CorpusGeneratorUtf8 : public CorpusGenerator { public: - CorpusGeneratorUtf8(const NGHolder &graph_in, CorpusProperties &props); + CorpusGeneratorUtf8(const NGWrapper &graph_in, CorpusProperties &props); ~CorpusGeneratorUtf8() {} void generateCorpus(vector &data); @@ -407,17 +410,21 @@ private: void addRandom(const min_max &mm, vector *out); /** \brief The NFA graph we operate over. */ - const NGHolder &graph; + const NGWrapper &graph; /** \brief Reference to our corpus generator properties object (stores some * state) */ CorpusProperties &cProps; }; -CorpusGeneratorUtf8::CorpusGeneratorUtf8(const NGHolder &graph_in, +CorpusGeneratorUtf8::CorpusGeneratorUtf8(const NGWrapper &graph_in, CorpusProperties &props) : graph(graph_in), cProps(props) { - // empty + // we do not support Utf8 for approximate matching + if (graph.edit_distance) { + throw CorpusGenerationFailure("UTF-8 for edited patterns is not " + "supported."); + } } void CorpusGeneratorUtf8::generateCorpus(vector &data) { diff --git a/util/ng_corpus_generator.h b/util/ng_corpus_generator.h index a7445ab6..a02721bd 100644 --- a/util/ng_corpus_generator.h +++ b/util/ng_corpus_generator.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -45,6 +45,12 @@ class NGWrapper; } // namespace ue2 +struct CorpusGenerationFailure { + explicit CorpusGenerationFailure(const std::string s) : + message(std::move(s)) {} + std::string message; +}; + /** \brief Abstract interface to corpus generator tool. */ class CorpusGenerator { public: diff --git a/util/ng_find_matches.cpp b/util/ng_find_matches.cpp index 2b337365..b3c81574 100644 --- a/util/ng_find_matches.cpp +++ b/util/ng_find_matches.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -47,55 +47,732 @@ using namespace std; using namespace ue2; +using MatchSet = set>; +using StateBitSet = boost::dynamic_bitset<>; + namespace { -struct StateSet { - explicit StateSet(size_t sz) : s(sz), som(sz, 0) {} - boost::dynamic_bitset<> s; // bitset of states that are on - vector som; // som value for each state +// returns all successors up to a given depth in a vector of sets, indexed by +// zero-based depth from source vertex +static +vector> +gatherSuccessorsByDepth(const NGHolder &g, const NFAVertex &src, u32 depth) { + vector> result(depth); + flat_set cur, next; + + // populate current set of successors + for (auto v : adjacent_vertices_range(src, g)) { + // ignore self-loops + if (src == v) { + continue; + } + DEBUG_PRINTF("Node %zu depth 1\n", g[v].index); + + cur.insert(v); + } + result[0] = cur; + + for (u32 d = 1; d < depth; d++) { + // collect all successors for all current level vertices + for (auto v : cur) { + // don't go past special nodes + if (is_special(v, g)) { + continue; + } + + for (auto succ : adjacent_vertices_range(v, g)) { + // ignore self-loops + if (v == succ) { + continue; + } + DEBUG_PRINTF("Node %zu depth %u\n", g[succ].index, d + 1); + next.insert(succ); + } + } + result[d] = next; + next.swap(cur); + next.clear(); + } + + return result; +} + +// returns all predecessors up to a given depth in a vector of sets, indexed by +// zero-based depth from source vertex +static +vector> gatherPredecessorsByDepth(const NGHolder &g, + NFAVertex src, u32 depth) { + vector> result(depth); + flat_set cur, next; + + assert(depth > 0); + + // populate current set of successors + for (auto v : inv_adjacent_vertices_range(src, g)) { + // ignore self-loops + if (src == v) { + continue; + } + DEBUG_PRINTF("Node %zu depth 1\n", g[v].index); + cur.insert(v); + } + result[0] = cur; + + for (u32 d = 1; d < depth; d++) { + // collect all successors for all current level vertices + for (auto v : cur) { + for (auto pred : inv_adjacent_vertices_range(v, g)) { + // ignore self-loops + if (v == pred) { + continue; + } + DEBUG_PRINTF("Node %zu depth %u\n", g[pred].index, d + 1); + next.insert(pred); + } + } + result[d] = next; + next.swap(cur); + next.clear(); + } + + return result; +} + +// this is a per-vertex, per-shadow level state transition table +struct GraphCache { + GraphCache(u32 dist_in, const NGHolder &g) : + size(num_vertices(g)), edit_distance(dist_in) + { + auto dist_max = edit_distance + 1; + + allocateStateTransitionTable(dist_max); + populateTransitionCache(g, dist_max); + populateAcceptCache(g, dist_max); + } + + void allocateStateTransitionTable(u32 dist_max) { + // resize level 1 - per vertex + shadow_transitions.resize(size); + helper_transitions.resize(size); + + // resize level 2 - per shadow level + for (u32 i = 0; i < size; i++) { + shadow_transitions[i].resize(dist_max); + helper_transitions[i].resize(dist_max); + + // resize level 3 - per vertex + for (u32 d = 0; d < dist_max; d++) { + shadow_transitions[i][d].resize(size); + helper_transitions[i][d].resize(size); + } + } + + // accept states are indexed by edit distance + accept_states.resize(dist_max); + accept_eod_states.resize(dist_max); + + // vertex report maps are indexed by edit distance + vertex_reports_by_level.resize(dist_max); + vertex_eod_reports_by_level.resize(dist_max); + } + + /* + * certain transitions to helpers are disallowed: + * 1. transitions from accept/acceptEod + * 2. transitions to accept/acceptEod + * 3. from start to startDs + * 4. to a virtual/multiline start + * + * everything else is allowed. + */ + bool canTransitionToHelper(NFAVertex u, NFAVertex v, const NGHolder &g) const { + if (is_any_accept(u, g)) { + return false; + } + if (is_any_accept(v, g)) { + return false; + } + if (u == g.start && v == g.startDs) { + return false; + } + if (is_virtual_start(v, g)) { + return false; + } + return true; + } + + void populateTransitionCache(const NGHolder &g, u32 dist_max) { + // populate mapping of vertex index to vertex + vector idx_to_v(size); + for (auto v : vertices_range(g)) { + idx_to_v[g[v].index] = v; + } + + for (u32 i = 0; i < size; i++) { + auto cur_v = idx_to_v[i]; + + // set up transition tables + auto succs = gatherSuccessorsByDepth(g, cur_v, dist_max); + + assert(succs.size() == dist_max); + + for (u32 d = 0; d < dist_max; d++) { + auto &v_shadows = shadow_transitions[i][d]; + auto cur_v_bit = i; + + // enable transition to next level helper (this handles insertion) + if (d < edit_distance && !is_any_accept(cur_v, g)) { + auto &next_v_helpers = helper_transitions[i][d + 1]; + + next_v_helpers.set(cur_v_bit); + } + + // if vertex has a self-loop, we can also transition to it, + // but only if we're at shadow level 0 + if (edge(cur_v, cur_v, g).second && d == 0) { + v_shadows.set(cur_v_bit); + } + + // populate state transition tables + for (auto v : succs[d]) { + auto v_bit = g[v].index; + + // we cannot transition to startDs on any level other than + // level 0 + if (v != g.startDs || d == 0) { + // this handles direct transitions as well as removals + v_shadows.set(v_bit); + } + + // we can also transition to next-level helper (handles + // replace), provided we meet the criteria + if (d < edit_distance && canTransitionToHelper(cur_v, v, g)) { + auto &next_v_helpers = helper_transitions[i][d + 1]; + + next_v_helpers.set(v_bit); + } + } + } + } + } + + void populateAcceptCache(const NGHolder &g, u32 dist_max) { + // set up accept states masks + StateBitSet accept(size); + accept.set(g[g.accept].index); + StateBitSet accept_eod(size); + accept_eod.set(g[g.acceptEod].index); + + // gather accept and acceptEod states + for (u32 base_dist = 0; base_dist < dist_max; base_dist++) { + auto &states = accept_states[base_dist]; + auto &eod_states = accept_eod_states[base_dist]; + + states.resize(size); + eod_states.resize(size); + + // inspect each vertex + for (u32 i = 0; i < size; i++) { + // inspect all shadow levels from base_dist to dist_max + for (u32 d = 0; d < dist_max - base_dist; d++) { + auto &shadows = shadow_transitions[i][d]; + + // if this state transitions to accept, set its bit + if ((shadows & accept).any()) { + states.set(i); + } + if ((shadows & accept_eod).any()) { + eod_states.set(i); + } + } + } + } + + // populate accepts cache + for (auto v : inv_adjacent_vertices_range(g.accept, g)) { + const auto &rs = g[v].reports; + + for (u32 d = 0; d <= edit_distance; d++) { + // add self to report list at all levels + vertex_reports_by_level[d][v].insert(rs.begin(), rs.end()); + } + if (edit_distance == 0) { + // if edit distance is 0, no predecessors will have reports + continue; + } + + auto preds_by_depth = gatherPredecessorsByDepth(g, v, edit_distance); + for (u32 pd = 0; pd < preds_by_depth.size(); pd++) { + const auto &preds = preds_by_depth[pd]; + // for each predecessor, add reports up to maximum edit distance + // for current depth from source vertex + for (auto pred : preds) { + for (u32 d = 0; d < edit_distance - pd; d++) { + vertex_reports_by_level[d][pred].insert(rs.begin(), rs.end()); + } + } + } + } + for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { + const auto &rs = g[v].reports; + + if (v == g.accept) { + continue; + } + + for (u32 d = 0; d <= edit_distance; d++) { + // add self to report list at all levels + vertex_eod_reports_by_level[d][v].insert(rs.begin(), rs.end()); + } + if (edit_distance == 0) { + // if edit distance is 0, no predecessors will have reports + continue; + } + + auto preds_by_depth = gatherPredecessorsByDepth(g, v, edit_distance); + for (u32 pd = 0; pd < preds_by_depth.size(); pd++) { + const auto &preds = preds_by_depth[pd]; + // for each predecessor, add reports up to maximum edit distance + // for current depth from source vertex + for (auto pred : preds) { + for (u32 d = 0; d < edit_distance - pd; d++) { + vertex_eod_reports_by_level[d][pred].insert(rs.begin(), rs.end()); + } + } + } + } + } + +#ifdef DEBUG + void dumpStateTransitionTable(const NGHolder &g) { + StateBitSet accept(size); + accept.set(g[g.accept].index); + StateBitSet accept_eod(size); + accept_eod.set(g[g.acceptEod].index); + + DEBUG_PRINTF("Dumping state transition tables\n"); + DEBUG_PRINTF("Shadows:\n"); + for (u32 i = 0; i < num_vertices(g); i++) { + DEBUG_PRINTF("%-7s %3u:", "Vertex", i); + for (u32 j = 0; j < num_vertices(g); j++) { + printf("%3i", j); + } + printf("\n"); + for (u32 d = 0; d <= edit_distance; d++) { + DEBUG_PRINTF("%-7s %3u:", "Level", d); + const auto &s = getShadowTransitions(i, d); + for (u32 j = 0; j < num_vertices(g); j++) { + printf("%3i", s.test(j)); + } + printf("\n"); + } + DEBUG_PRINTF("\n"); + } + + DEBUG_PRINTF("Helpers:\n"); + for (u32 i = 0; i < num_vertices(g); i++) { + DEBUG_PRINTF("%-7s %3u:", "Vertex", i); + for (u32 j = 0; j < num_vertices(g); j++) { + printf("%3i", j); + } + printf("\n"); + for (u32 d = 0; d <= edit_distance; d++) { + DEBUG_PRINTF("%-7s %3u:", "Level", d); + const auto &s = getHelperTransitions(i, d); + for (u32 j = 0; j < num_vertices(g); j++) { + printf("%3i", s.test(j)); + } + printf("\n"); + } + DEBUG_PRINTF("\n"); + } + + DEBUG_PRINTF("Accept transitions:\n"); + DEBUG_PRINTF("%-12s", "Vertex idx:"); + for (u32 j = 0; j < num_vertices(g); j++) { + printf("%3i", j); + } + printf("\n"); + for (u32 d = 0; d <= edit_distance; d++) { + DEBUG_PRINTF("%-7s %3u:", "Level", d); + const auto &s = getAcceptTransitions(d); + for (u32 j = 0; j < num_vertices(g); j++) { + printf("%3i", s.test(j)); + } + printf("\n"); + } + DEBUG_PRINTF("\n"); + + DEBUG_PRINTF("Accept EOD transitions:\n"); + DEBUG_PRINTF("%-12s", "Vertex idx:"); + for (u32 j = 0; j < num_vertices(g); j++) { + printf("%3i", j); + } + printf("\n"); + for (u32 d = 0; d <= edit_distance; d++) { + DEBUG_PRINTF("%-7s %3u:", "Level", d); + const auto &s = getAcceptEodTransitions(d); + for (u32 j = 0; j < num_vertices(g); j++) { + printf("%3i", s.test(j)); + } + printf("\n"); + } + DEBUG_PRINTF("\n"); + + DEBUG_PRINTF("%-12s ", "Accepts:"); + for (u32 i = 0; i < num_vertices(g); i++) { + printf("%3i", accept.test(i)); + } + printf("\n"); + + DEBUG_PRINTF("%-12s ", "EOD Accepts:"); + for (u32 i = 0; i < num_vertices(g); i++) { + printf("%3i", accept_eod.test(i)); + } + printf("\n"); + + DEBUG_PRINTF("Reports\n"); + for (auto v : vertices_range(g)) { + for (u32 d = 0; d <= edit_distance; d++) { + const auto &r = vertex_reports_by_level[d][v]; + const auto &e = vertex_eod_reports_by_level[d][v]; + DEBUG_PRINTF("%-7s %3zu %-8s %3zu %-8s %3zu\n", + "Vertex", g[v].index, "rs:", r.size(), "eod:", e.size()); + } + } + printf("\n"); + } +#endif + + const StateBitSet& getShadowTransitions(u32 idx, u32 level) const { + assert(idx < size); + assert(level <= edit_distance); + return shadow_transitions[idx][level]; + } + const StateBitSet& getHelperTransitions(u32 idx, u32 level) const { + assert(idx < size); + assert(level <= edit_distance); + return helper_transitions[idx][level]; + } + const StateBitSet& getAcceptTransitions(u32 level) const { + assert(level <= edit_distance); + return accept_states[level]; + } + const StateBitSet& getAcceptEodTransitions(u32 level) const { + assert(level <= edit_distance); + return accept_eod_states[level]; + } + + /* + * the bitsets are indexed by vertex and shadow level. the bitset's length is + * equal to the total number of vertices in the graph. + * + * for convenience, helper functions are provided. + */ + vector> shadow_transitions; + vector> helper_transitions; + + // accept states masks, indexed by shadow level + vector accept_states; + vector accept_eod_states; + + // map of all reports associated with any vertex, indexed by shadow level + vector>> vertex_reports_by_level; + vector>> vertex_eod_reports_by_level; + + u32 size; + u32 edit_distance; }; -using MatchSet = set>; + +/* + * SOM workflow is expected to be the following: + * - Caller calls getActiveStates, which reports SOM for each active states + * - Caller calls getSuccessorStates on each of the active states, which *doesn't* + * report SOM + * - Caller decides if the successor state should be activated, and calls + * activateState with SOM set to that of previous active state (not successor!) + * - activateState then resolves any conflicts between SOMs that may arise from + * multiple active states progressing to the same successor + */ +struct StateSet { + struct State { + enum node_type { + NODE_SHADOW = 0, + NODE_HELPER + }; + State(size_t idx_in, u32 level_in, size_t som_in, node_type type_in) : + idx(idx_in), level(level_in), som(som_in), type(type_in) {} + size_t idx; + u32 level; + size_t som; + node_type type; + }; + + StateSet(size_t sz, u32 dist_in) : + shadows(dist_in + 1), helpers(dist_in + 1), + shadows_som(dist_in + 1), helpers_som(dist_in + 1), + edit_distance(dist_in) { + for (u32 dist = 0; dist <= dist_in; dist++) { + shadows[dist].resize(sz, false); + helpers[dist].resize(sz, false); + shadows_som[dist].resize(sz, 0); + helpers_som[dist].resize(sz, 0); + } + } + + void reset() { + for (u32 dist = 0; dist <= edit_distance; dist++) { + shadows[dist].reset(); + helpers[dist].reset(); + fill(shadows_som[dist].begin(), shadows_som[dist].end(), 0); + fill(helpers_som[dist].begin(), helpers_som[dist].end(), 0); + } + } + + bool empty() const { + for (u32 dist = 0; dist <= edit_distance; dist++) { + if (shadows[dist].any()) { + return false; + } + if (helpers[dist].any()) { + return false; + } + } + return true; + } + + size_t count() const { + size_t result = 0; + + for (u32 dist = 0; dist <= edit_distance; dist++) { + result += shadows[dist].count(); + result += helpers[dist].count(); + } + + return result; + } + + bool setActive(const State &s) { + switch (s.type) { + case State::NODE_HELPER: + return helpers[s.level].test_set(s.idx); + case State::NODE_SHADOW: + return shadows[s.level].test_set(s.idx); + } + assert(0); + return false; + } + + size_t getCachedSom(const State &s) const { + switch (s.type) { + case State::NODE_HELPER: + return helpers_som[s.level][s.idx]; + case State::NODE_SHADOW: + return shadows_som[s.level][s.idx]; + } + assert(0); + return 0; + } + + void setCachedSom(const State &s, const size_t som_val) { + switch (s.type) { + case State::NODE_HELPER: + helpers_som[s.level][s.idx] = som_val; + break; + case State::NODE_SHADOW: + shadows_som[s.level][s.idx] = som_val; + break; + default: + assert(0); + } + } + +#ifdef DEBUG + void dumpActiveStates() const { + const auto states = getActiveStates(); + + DEBUG_PRINTF("Dumping active states\n"); + + for (const auto &state : states) { + DEBUG_PRINTF("type: %s idx: %zu level: %u som: %zu\n", + state.type == State::NODE_HELPER ? "HELPER" : "SHADOW", + state.idx, state.level, state.som); + } + } +#endif + + flat_set getActiveStates() const { + flat_set result; + + for (u32 dist = 0; dist <= edit_distance; dist++) { + // get all shadow vertices (including original graph) + const auto &cur_shadow_vertices = shadows[dist]; + for (size_t id = cur_shadow_vertices.find_first(); + id != cur_shadow_vertices.npos; + id = cur_shadow_vertices.find_next(id)) { + result.emplace(id, dist, shadows_som[dist][id], + State::NODE_SHADOW); + } + + // the rest is only valid for edited graphs + if (dist == 0) { + continue; + } + + // get all helper vertices + const auto &cur_helper_vertices = helpers[dist]; + for (size_t id = cur_helper_vertices.find_first(); + id != cur_helper_vertices.npos; + id = cur_helper_vertices.find_next(id)) { + result.emplace(id, dist, helpers_som[dist][id], + State::NODE_HELPER); + } + } + + return result; + } + + // does not return SOM + flat_set getSuccessors(const State &state, const GraphCache &gc) const { + flat_set result; + + // maximum shadow depth that we can go from current level + u32 max_depth = edit_distance - state.level + 1; + + for (u32 d = 0; d < max_depth; d++) { + const auto &shadow_succ = gc.getShadowTransitions(state.idx, d); + for (size_t id = shadow_succ.find_first(); + id != shadow_succ.npos; + id = shadow_succ.find_next(id)) { + auto new_level = state.level + d; + result.emplace(id, new_level, 0, State::NODE_SHADOW); + } + + const auto &helper_succ = gc.getHelperTransitions(state.idx, d); + for (size_t id = helper_succ.find_first(); + id != helper_succ.npos; + id = helper_succ.find_next(id)) { + auto new_level = state.level + d; + result.emplace(id, new_level, 0, State::NODE_HELPER); + } + } + + return result; + } + + flat_set getAcceptStates(const GraphCache &gc) const { + flat_set result; + + for (u32 dist = 0; dist <= edit_distance; dist++) { + // get all shadow vertices (including original graph) + auto cur_shadow_vertices = shadows[dist]; + cur_shadow_vertices &= gc.getAcceptTransitions(dist); + for (size_t id = cur_shadow_vertices.find_first(); + id != cur_shadow_vertices.npos; + id = cur_shadow_vertices.find_next(id)) { + result.emplace(id, dist, shadows_som[dist][id], + State::NODE_SHADOW); + } + auto cur_helper_vertices = helpers[dist]; + cur_helper_vertices &= gc.getAcceptTransitions(dist); + for (size_t id = cur_helper_vertices.find_first(); + id != cur_helper_vertices.npos; + id = cur_helper_vertices.find_next(id)) { + result.emplace(id, dist, helpers_som[dist][id], + State::NODE_HELPER); + } + } + + return result; + } + + flat_set getAcceptEodStates(const GraphCache &gc) const { + flat_set result; + + for (u32 dist = 0; dist <= edit_distance; dist++) { + // get all shadow vertices (including original graph) + auto cur_shadow_vertices = shadows[dist]; + cur_shadow_vertices &= gc.getAcceptEodTransitions(dist); + for (size_t id = cur_shadow_vertices.find_first(); + id != cur_shadow_vertices.npos; + id = cur_shadow_vertices.find_next(id)) { + result.emplace(id, dist, shadows_som[dist][id], + State::NODE_SHADOW); + } + auto cur_helper_vertices = helpers[dist]; + cur_helper_vertices &= gc.getAcceptEodTransitions(dist); + for (size_t id = cur_helper_vertices.find_first(); + id != cur_helper_vertices.npos; + id = cur_helper_vertices.find_next(id)) { + result.emplace(id, dist, helpers_som[dist][id], + State::NODE_HELPER); + } + } + + return result; + } + + // the caller must specify SOM at current offset, and must not attempt to + // resolve SOM inheritance conflicts + void activateState(const State &state) { + size_t cur_som = state.som; + if (setActive(state)) { + size_t cached_som = getCachedSom(state); + cur_som = min(cur_som, cached_som); + } + setCachedSom(state, cur_som); + } + + vector shadows; + vector helpers; + vector> shadows_som; + vector> helpers_som; + u32 edit_distance; +}; + +// for flat_set +bool operator<(const StateSet::State &a, const StateSet::State &b) { + ORDER_CHECK(idx); + ORDER_CHECK(level); + ORDER_CHECK(type); + ORDER_CHECK(som); + return false; +} struct fmstate { const size_t num_states; // number of vertices in graph StateSet states; // currently active states StateSet next; // states on after this iteration + GraphCache &gc; vector vertices; // mapping from index to vertex size_t offset = 0; unsigned char cur = 0; unsigned char prev = 0; - const bool som; const bool utf8; const bool allowStartDs; const ReportManager &rm; - boost::dynamic_bitset<> accept; // states leading to accept - boost::dynamic_bitset<> accept_with_eod; // states leading to accept or eod - - fmstate(const NGHolder &g, bool som_in, bool utf8_in, bool aSD_in, - const ReportManager &rm_in) - : num_states(num_vertices(g)), states(num_states), next(num_states), - vertices(num_vertices(g), NGHolder::null_vertex()), som(som_in), - utf8(utf8_in), allowStartDs(aSD_in), rm(rm_in), accept(num_states), - accept_with_eod(num_states) { + fmstate(const NGHolder &g, GraphCache &gc_in, bool utf8_in, bool aSD_in, + const u32 edit_distance, const ReportManager &rm_in) + : num_states(num_vertices(g)), + states(num_states, edit_distance), + next(num_states, edit_distance), + gc(gc_in), vertices(num_vertices(g), NGHolder::null_vertex()), + utf8(utf8_in), allowStartDs(aSD_in), rm(rm_in) { // init states - states.s.set(g[g.start].index); + states.activateState( + StateSet::State {g[g.start].index, 0, 0, + StateSet::State::NODE_SHADOW}); if (allowStartDs) { - states.s.set(g[g.startDs].index); + states.activateState( + StateSet::State {g[g.startDs].index, 0, 0, + StateSet::State::NODE_SHADOW}); } // fill vertex mapping - for (const auto &v : vertices_range(g)) { + for (auto v : vertices_range(g)) { vertices[g[v].index] = v; } - // init accept states - for (const auto &u : inv_adjacent_vertices_range(g.accept, g)) { - accept.set(g[u].index); - } - accept_with_eod = accept; - for (const auto &u : inv_adjacent_vertices_range(g.acceptEod, g)) { - accept_with_eod.set(g[u].index); - } } }; @@ -140,8 +817,7 @@ bool isUtf8CodePoint(const char c) { } static -bool canReach(const NGHolder &g, const NFAEdge &e, - struct fmstate &state) { +bool canReach(const NGHolder &g, const NFAEdge &e, struct fmstate &state) { auto flags = g[e].assert_flags; if (!flags) { return true; @@ -177,36 +853,52 @@ bool canReach(const NGHolder &g, const NFAEdge &e, static void getMatches(const NGHolder &g, MatchSet &matches, struct fmstate &state, bool allowEodMatches) { - auto acc_states = state.states.s; - acc_states &= allowEodMatches ? state.accept_with_eod : state.accept; + flat_set accepts {g.accept, g.acceptEod}; - for (size_t i = acc_states.find_first(); i != acc_states.npos; - i = acc_states.find_next(i)) { - const NFAVertex u = state.vertices[i]; - const size_t &som_offset = state.states.som[i]; - - // we can't accept anything from startDs in between UTF-8 codepoints - if (state.utf8 && u == g.startDs && !isUtf8CodePoint(state.cur)) { + for (auto v : accepts) { + bool eod = v == g.acceptEod; + if (eod && !allowEodMatches) { continue; } - for (const auto &e : out_edges_range(u, g)) { - NFAVertex v = target(e, g); - if (v == g.accept || (v == g.acceptEod && allowEodMatches)) { - // check edge assertions if we are allowed to reach accept - if (!canReach(g, e, state)) { - continue; - } - DEBUG_PRINTF("match found at %zu\n", state.offset); + auto active_states = eod ? state.states.getAcceptEodStates(state.gc) : + state.states.getAcceptStates(state.gc); - assert(!g[u].reports.empty()); - for (const auto &report_id : g[u].reports) { - const Report &ri = state.rm.getReport(report_id); + DEBUG_PRINTF("Number of active states: %zu\n", active_states.size()); - DEBUG_PRINTF("report %u has offset adjustment %d\n", - report_id, ri.offsetAdjust); - matches.emplace(som_offset, state.offset + ri.offsetAdjust); - } + for (const auto &cur : active_states) { + auto u = state.vertices[cur.idx]; + + // we can't accept anything from startDs in between UTF-8 codepoints + if (state.utf8 && u == g.startDs && !isUtf8CodePoint(state.cur)) { + continue; + } + + const auto &reports = + eod ? + state.gc.vertex_eod_reports_by_level[cur.level][u] : + state.gc.vertex_reports_by_level[cur.level][u]; + + NFAEdge e = edge(u, v, g); + + // we assume edge assertions only exist at level 0 + if (e && !canReach(g, e, state)) { + continue; + } + + DEBUG_PRINTF("%smatch found at %zu\n", + eod ? "eod " : "", state.offset); + + assert(!reports.empty()); + for (const auto &report_id : reports) { + const Report &ri = state.rm.getReport(report_id); + + DEBUG_PRINTF("report %u has offset adjustment %d\n", + report_id, ri.offsetAdjust); + DEBUG_PRINTF("match from (i:%zu,l:%u,t:%u): (%zu,%zu)\n", + cur.idx, cur.level, cur.type, cur.som, + state.offset + ri.offsetAdjust); + matches.emplace(cur.som, state.offset + ri.offsetAdjust); } } } @@ -214,20 +906,18 @@ void getMatches(const NGHolder &g, MatchSet &matches, struct fmstate &state, static void step(const NGHolder &g, struct fmstate &state) { - state.next.s.reset(); + state.next.reset(); - for (size_t i = state.states.s.find_first(); i != state.states.s.npos; - i = state.states.s.find_next(i)) { - const NFAVertex &u = state.vertices[i]; - const size_t &u_som_offset = state.states.som[i]; + const auto active = state.states.getActiveStates(); - for (const auto &e : out_edges_range(u, g)) { - NFAVertex v = target(e, g); - if (v == g.acceptEod) { - // can't know the future: we don't know if we're at EOD. - continue; - } - if (v == g.accept) { + for (const auto &cur : active) { + auto u = state.vertices[cur.idx]; + auto succ_list = state.states.getSuccessors(cur, state.gc); + + for (auto succ : succ_list) { + auto v = state.vertices[succ.idx]; + + if (is_any_accept(v, g)) { continue; } @@ -235,37 +925,70 @@ void step(const NGHolder &g, struct fmstate &state) { continue; } - const CharReach &cr = g[v].char_reach; - const size_t v_idx = g[v].index; + // GraphCache doesn't differentiate between successors for shadows + // and helpers, and StateSet does not know anything about the graph, + // so the only place we can do it is here. we can't self-loop on a + // startDs if we're startDs's helper, so disallow it. + if (u == g.startDs && v == g.startDs && + succ.level != 0 && succ.level == cur.level) { + continue; + } - // check reachability and edge assertions - if (cr.test(state.cur) && canReach(g, e, state)) { - // if we aren't in SOM mode, just set every SOM to 0 - if (!state.som) { - state.next.s.set(v_idx); - state.next.som[v_idx] = 0; - continue; + // for the reasons outlined above, also putting this here. + // disallow transitions from start to startDs on levels other than zero + if (u == g.start && v == g.startDs && + cur.level != 0 && succ.level != 0) { + continue; + } + + bool can_reach = false; + + if (succ.type == StateSet::State::NODE_HELPER) { + can_reach = true; + } else { + // we assume edge assertions only exist on level 0 + const CharReach &cr = g[v].char_reach; + NFAEdge e = edge(u, v, g); + + if (cr.test(state.cur) && + (!e || canReach(g, e, state))) { + can_reach = true; } + } - // if this is first vertex since start, use current offset as SOM + // check edge assertions if we are allowed to reach accept + DEBUG_PRINTF("reaching %zu->%zu ('%c'->'%c'): %s\n", + g[u].index, g[v].index, + ourisprint(state.prev) ? state.prev : '?', + ourisprint(state.cur) ? state.cur : '?', + can_reach ? "yes" : "no"); + + if (can_reach) { + // we should use current offset as SOM if: + // - we're at level 0 and we're a start vertex + // - we're a fake start shadow size_t next_som; - if (u == g.start || u == g.startDs || is_virtual_start(u, g)) { + bool reset = is_any_start(u, g) && cur.level == 0; + reset |= is_virtual_start(u, g) && + cur.type == StateSet::State::NODE_SHADOW; + + if (reset) { next_som = state.offset; } else { // else, inherit SOM from predecessor - next_som = u_som_offset; + next_som = cur.som; } + succ.som = next_som; - // check if the vertex is already active - // if this vertex is not yet active, use current SOM - if (!state.next.s.test(v_idx)) { - state.next.s.set(v_idx); - state.next.som[v_idx] = next_som; - } else { - // else, work out leftmost SOM - state.next.som[v_idx] = - min(next_som, state.next.som[v_idx]); - } + DEBUG_PRINTF("src: idx %zu level: %u som: %zu type: %s\n", + cur.idx, cur.level, cur.som, + cur.type == StateSet::State::NODE_HELPER ? "H" : "S"); + DEBUG_PRINTF("dst: idx %zu level: %u som: %zu type: %s\n", + succ.idx, succ.level, succ.som, + succ.type == StateSet::State::NODE_HELPER ? "H" : "S"); + + // activate successor (SOM will be handled by activateState) + state.next.activateState(succ); } } } @@ -312,15 +1035,28 @@ void filterMatches(MatchSet &matches) { * Fills \a matches with offsets into the data stream where a match is found. */ void findMatches(const NGHolder &g, const ReportManager &rm, - const string &input, MatchSet &matches, const bool notEod, - const bool som, const bool utf8) { + const string &input, MatchSet &matches, + const u32 edit_distance, const bool notEod, const bool utf8) { assert(hasCorrectlyNumberedVertices(g)); + // cannot match fuzzy utf8 patterns, this should've been filtered out at + // compile time, so make it an assert + assert(!edit_distance || !utf8); + + DEBUG_PRINTF("Finding matches\n"); + + GraphCache gc(edit_distance, g); +#ifdef DEBUG + gc.dumpStateTransitionTable(g); +#endif const bool allowStartDs = (proper_out_degree(g.startDs, g) > 0); - struct fmstate state(g, som, utf8, allowStartDs, rm); + struct fmstate state(g, gc, utf8, allowStartDs, edit_distance, rm); for (auto it = input.begin(), ite = input.end(); it != ite; ++it) { +#ifdef DEBUG + state.states.dumpActiveStates(); +#endif state.offset = distance(input.begin(), it); state.cur = *it; @@ -328,26 +1064,26 @@ void findMatches(const NGHolder &g, const ReportManager &rm, getMatches(g, matches, state, false); - DEBUG_PRINTF("index %zu, %zu states on\n", state.offset, - state.next.s.count()); - if (state.next.s.empty()) { - if (state.som) { - filterMatches(matches); - } + DEBUG_PRINTF("offset %zu, %zu states on\n", state.offset, + state.next.count()); + if (state.next.empty()) { + filterMatches(matches); return; } state.states = state.next; state.prev = state.cur; } +#ifdef DEBUG + state.states.dumpActiveStates(); +#endif state.offset = input.size(); state.cur = 0; // do additional step to get matches after stream end, this time count eod // matches also (or not, if we're in notEod mode) + DEBUG_PRINTF("Looking for EOD matches\n"); getMatches(g, matches, state, !notEod); - if (state.som) { - filterMatches(matches); - } + filterMatches(matches); } diff --git a/util/ng_find_matches.h b/util/ng_find_matches.h index e9e47010..39ee3f68 100644 --- a/util/ng_find_matches.h +++ b/util/ng_find_matches.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -51,6 +51,7 @@ struct BoundaryReports; void findMatches(const ue2::NGHolder &g, const ue2::ReportManager &rm, const std::string &input, std::set> &matches, - const bool notEod, const bool som, const bool utf8); + const unsigned int max_edit_distance, const bool notEod, + const bool utf8); #endif // NG_FIND_MATCHES_H From 9f72dede5cd81bec450e1189633f4e5d3a49b72a Mon Sep 17 00:00:00 2001 From: Anatoly Burakov Date: Fri, 10 Feb 2017 15:44:16 +0000 Subject: [PATCH 127/326] Add support for approximate matching in NFA matcher unit tests --- unit/hyperscan/bad_patterns.txt | 5 +++++ unit/hyperscan/expr_info.cpp | 29 ++++++++++++++++++++++++- unit/internal/nfagraph_find_matches.cpp | 15 ++++++++++--- 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/unit/hyperscan/bad_patterns.txt b/unit/hyperscan/bad_patterns.txt index d970761a..09a2f7e1 100644 --- a/unit/hyperscan/bad_patterns.txt +++ b/unit/hyperscan/bad_patterns.txt @@ -136,3 +136,8 @@ 139:/foo(*UTF8)bar/ #(*UTF8) must be at start of expression, encountered at index 5. 140:/(?i)(*UTF8)foobar/ #(*UTF8) must be at start of expression, encountered at index 6. 141:/(*@&/ #Unknown control verb at index 2. +142:/abcd/si{edit_distance=4} #Approximate matching patterns that reduce to vacuous patterns are disallowed. +143:/foobar|hatstand/sL{edit_distance=6} #Approximate matching patterns that reduce to vacuous patterns are disallowed. +144:/abc\b/{edit_distance=1} #Zero-width assertions are disallowed for approximate matching. +145:/abc/8{edit_distance=1} #UTF-8 is disallowed for approximate matching. +146:/(*UTF8)abc/{edit_distance=1} #UTF-8 is disallowed for approximate matching. diff --git a/unit/hyperscan/expr_info.cpp b/unit/hyperscan/expr_info.cpp index aa242798..e6ffa9ea 100644 --- a/unit/hyperscan/expr_info.cpp +++ b/unit/hyperscan/expr_info.cpp @@ -168,10 +168,37 @@ static const expected_info ei_test[] = { // Some cases with extended parameters. {"^abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0}, 6, 10, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0}, 6, UINT_MAX, 0, 0, 0}, {"abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0}, 6, 10, 0, 0, 0}, {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0}, 100, UINT_MAX, 0, 0, 0}, {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 5, 0}, 6, UINT_MAX, 0, 0, 0}, - {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 0, UINT_MAX, 0, 0, 0}, + + {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1}, 5, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 4, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 2}, + 10, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2}, + 4, UINT_MAX, 0, 0, 0}, + {"abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2}, + 4, 6, 0, 0, 0}, + + {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1}, 5, UINT_MAX, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 4, UINT_MAX, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 2}, + 4, UINT_MAX, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2}, + 4, UINT_MAX, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2}, + 4, 6, 0, 0, 0}, + + {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1}, 5, 7, 0, 0, 0}, + {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 4, 8, 0, 0, 0}, + {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 8, 2}, + 4, 8, 0, 0, 0}, + {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2}, + 4, 8, 0, 0, 0}, + {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2}, + 4, 6, 0, 0, 0}, }; INSTANTIATE_TEST_CASE_P(ExprInfo, ExprInfop, ValuesIn(ei_test)); diff --git a/unit/internal/nfagraph_find_matches.cpp b/unit/internal/nfagraph_find_matches.cpp index 553d6dc5..99fdb09e 100644 --- a/unit/internal/nfagraph_find_matches.cpp +++ b/unit/internal/nfagraph_find_matches.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -76,7 +76,7 @@ class MatchesTest: public TestWithParam { static const MatchesTestParams matchesTests[] = { // EOD and anchored patterns - // these should produce no matches + // these should produce no matches { "^foobar", "foolish", {}, 0, false, true}, { "^foobar$", "ze foobar", {}, 0, false, true}, { "^foobar$", "foobar ", {}, 0, false, true}, @@ -212,10 +212,19 @@ TEST_P(MatchesTest, Check) { bool utf8 = (t.flags & HS_FLAG_UTF8) > 0; set> matches; - findMatches(*g, rm, t.input, matches, t.notEod, t.som, utf8); + findMatches(*g, rm, t.input, matches, 0, t.notEod, utf8); set> expected(begin(t.matches), end(t.matches)); + // findMatches returns matches with SOM, so zero them out if not SOM + if (!t.som) { + set> new_matches; + for (auto &m : matches) { + new_matches.emplace(0, m.second); + } + matches.swap(new_matches); + } + ASSERT_EQ(expected, matches) << "Pattern '" << t.pattern << "' against input '" << t.input << "'"; } From ebe849603bb0e5c40fc16ca741f02674b1dd74f5 Mon Sep 17 00:00:00 2001 From: Anatoly Burakov Date: Fri, 10 Feb 2017 15:45:09 +0000 Subject: [PATCH 128/326] Add support for approximate matching in other tools --- tools/hsbench/common.h | 4 +++- tools/hsbench/engine_hyperscan.cpp | 6 +++++- tools/hsbench/main.cpp | 16 ++++++++++++++-- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/tools/hsbench/common.h b/tools/hsbench/common.h index a4d60021..efff3f99 100644 --- a/tools/hsbench/common.h +++ b/tools/hsbench/common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,5 +38,7 @@ extern bool saveDatabases; extern bool loadDatabases; extern std::string serializePath; extern unsigned int somPrecisionMode; +extern bool forceEditDistance; +extern unsigned editDistance; #endif // COMMON_H diff --git a/tools/hsbench/engine_hyperscan.cpp b/tools/hsbench/engine_hyperscan.cpp index f5abb9fa..eadc1cc4 100644 --- a/tools/hsbench/engine_hyperscan.cpp +++ b/tools/hsbench/engine_hyperscan.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -289,6 +289,10 @@ buildEngineHyperscan(const ExpressionMap &expressions, ScanMode scan_mode, m.first); return nullptr; } + if (forceEditDistance) { + extparam.flags |= HS_EXT_FLAG_EDIT_DISTANCE; + extparam.edit_distance = editDistance; + } exprs.push_back(expr); ids.push_back(m.first); diff --git a/tools/hsbench/main.cpp b/tools/hsbench/main.cpp index 4298963b..a99760a2 100644 --- a/tools/hsbench/main.cpp +++ b/tools/hsbench/main.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -72,6 +72,8 @@ bool saveDatabases = false; bool loadDatabases = false; string serializePath(""); unsigned int somPrecisionMode = HS_MODE_SOM_HORIZON_LARGE; +bool forceEditDistance = false; +unsigned editDistance = 0; namespace /* anonymous */ { @@ -169,6 +171,8 @@ void usage(const char *error) { " instead.\n"); printf(" -w DIR After compiling, save to files in DIR.\n"); printf(" -d NUMBER Set SOM precision mode (default: 8 (large)).\n"); + printf(" -E DISTANCE Match all patterns within edit distance" + " DISTANCE.\n"); printf("\n"); printf(" --per-scan Display per-scan Mbit/sec results.\n"); printf(" --echo-matches Display all matches that occur during scan.\n"); @@ -191,7 +195,7 @@ struct BenchmarkSigs { static void processArgs(int argc, char *argv[], vector &sigSets, UNUSED Grey &grey) { - const char options[] = "-b:c:Cd:e:G:hi:n:No:p:sT:Vw:z:"; + const char options[] = "-b:c:Cd:e:E:G:hi:n:No:p:sT:Vw:z:"; int in_sigfile = 0; int do_per_scan = 0; int do_echo_matches = 0; @@ -237,6 +241,14 @@ void processArgs(int argc, char *argv[], vector &sigSets, case 'e': exprPath.assign(optarg); break; + case 'E': + if (!fromString(optarg, editDistance)) { + usage("Couldn't parse argument to -E flag, should be" + " a non-negative integer."); + exit(1); + } + forceEditDistance = true; + break; #ifndef RELEASE_BUILD case 'G': applyGreyOverrides(&grey, string(optarg)); From eed2743d042136611e77adca25437a8092f7adea Mon Sep 17 00:00:00 2001 From: Anatoly Burakov Date: Fri, 10 Feb 2017 15:46:29 +0000 Subject: [PATCH 129/326] Add approximate matching documentation --- doc/dev-reference/compilation.rst | 87 +++++++++++++++++++++++++++++-- doc/dev-reference/performance.rst | 10 ++++ 2 files changed, 94 insertions(+), 3 deletions(-) diff --git a/doc/dev-reference/compilation.rst b/doc/dev-reference/compilation.rst index de679422..02b5c3f3 100644 --- a/doc/dev-reference/compilation.rst +++ b/doc/dev-reference/compilation.rst @@ -171,6 +171,8 @@ The following regex constructs are not supported by Hyperscan: * Callouts and embedded code. * Atomic grouping and possessive quantifiers. +.. _semantics: + ********* Semantics ********* @@ -284,16 +286,24 @@ which provides the following fields: expression should match successfully. * ``min_length``: The minimum match length (from start to end) required to successfully match this expression. +* ``edit_distance``: Match this expression within a given Levenshtein distance. -These parameters allow the set of matches produced by a pattern to be -constrained at compile time, rather than relying on the application to process -unwanted matches at runtime. +These parameters either allow the set of matches produced by a pattern to be +constrained at compile time (rather than relying on the application to process +unwanted matches at runtime), or allow matching a pattern approximately (within +a given edit distance) to produce more matches. For example, the pattern :regexp:`/foo.*bar/` when given a ``min_offset`` of 10 and a ``max_offset`` of 15 will not produce matches when scanned against ``foobar`` or ``foo0123456789bar`` but will produce a match against the data streams ``foo0123bar`` or ``foo0123456bar``. +Similarly, the pattern :regexp:`/foobar/` when given an ``edit_distance`` of 2 +will produce matches when scanned against ``foobar``, ``fooba``, ``fobr``, +``fo_baz``, ``foooobar``, and anything else that lies within edit distance of 2 +(as defined by Levenshtein distance). For more details, see the +:ref:`approximate_matching` section. + ================= Prefiltering Mode ================= @@ -375,3 +385,74 @@ An :c:type:`hs_platform_info_t` structure targeted at the current host can be built with the :c:func:`hs_populate_platform` function. See :ref:`api_constants` for the full list of CPU tuning and feature flags. + +.. _approximate_matching: + +******************** +Approximate matching +******************** + +Hyperscan provides an experimental approximate matching mode, which will match +patterns within a given edit distance. The exact matching behavior is defined as +follows: + +#. **Edit distance** is defined as Levenshtein distance. That is, there are + three possible edit types considered: insertion, removal and substitution. + More formal description can be found on + `Wikipedia `_. + +#. **Approximate matching** will match all *corpora* within a given edit + distance. That is, given a pattern, approximate matching will match anything + that can be edited to arrive at a corpus that exactly matches the original + pattern. + +#. **Matching semantics** are exactly the same as described in :ref:`semantics`. + +Here are a few examples of approximate matching: + +* Pattern :regexp:`/foo/` can match ``foo`` when using regular Hyperscan + matching behavior. With approximate matching within edit distance 2, the + pattern will produce matches when scanned against ``foo``, ``foooo``, ``f00``, + ``f``, and anything else that lies within edit distance 2 of matching corpora + for the original pattern (``foo`` in this case). + +* Pattern :regexp:`/foo(bar)+/` with edit distance 1 will match ``foobarbar``, + ``foobarb0r``, ``fooarbar``, ``foobarba``, ``f0obarbar``, ``fobarbar`` and + anything else that lies within edit distance 1 of matching corpora for the + original pattern (``foobarbar`` in this case). + +* Pattern :regexp:`/foob?ar/` with edit distance 2 will match ``fooar``, + ``foo``, ``fabar``, ``oar`` and anything else that lies within edit distance 2 + of matching corpora for the original pattern (``fooar`` in this case). + +Currently, there are trade-offs and limitations that come with approximate +matching support. Here they are, in a nutshell: + +* Reduced pattern support: + + * For many patterns, approximate matching is complex and can result in + Hyperscan failing to compile a pattern with a "Pattern too large" error, + even if the pattern is supported in normal operation. + * Additionally, some patterns cannot be approximately matched because they + reduce to so-called "vacuous" patterns (patterns that match everything). For + example, pattern :regexp:`/foo/` with edit distance 3, if implemented, + would reduce to matching zero-length buffers. Such patterns will result in a + "Pattern cannot be approximately matched" compile error. + * Finally, due to the inherent complexities of defining matching behavior, + approximate matching implements a reduced subset of regular expression + syntax. Approximate matching does not support UTF-8 (and other + multibyte character encodings), and word boundaries (that is, ``\b``, ``\B`` + and other equivalent constructs). Patterns containing unsupported constructs + will result in "Pattern cannot be approximately matched" compile error. + * When using approximate matching in conjunction with SOM, all of the + restrictions of SOM also apply. See :ref:`som` for more + details. +* Increased stream state/byte code size requirements: due to approximate + matching byte code being inherently larger and more complex than exact + matching, the corresponding requirements also increase. +* Performance overhead: similarly, there is generally a performance cost + associated with approximate matching, both due to increased matching + complexity, and due to the fact that it will produce more matches. + +Approximate matching is always disabled by default, and can be enabled on a +per-pattern basis by using an extended parameter described in :ref:`extparam`. diff --git a/doc/dev-reference/performance.rst b/doc/dev-reference/performance.rst index 8cc0b675..23781bd6 100644 --- a/doc/dev-reference/performance.rst +++ b/doc/dev-reference/performance.rst @@ -333,3 +333,13 @@ Similarly, the :c:member:`hs_expr_ext::min_length` extended parameter can be used to specify a lower bound on the length of the matches for a pattern. Using this facility may be more lightweight in some circumstances than using the SOM flag and post-confirming match length in the calling application. + +******************** +Approximate matching +******************** + +.. tip:: Approximate matching is an experimental feature. + +There is generally a performance impact associated with approximate matching due +to the reduced specificity of the matches. This impact may vary significantly +depending on the pattern and edit distance. From c50a931bb43601e85807003f6058fa6a901d9b07 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Fri, 3 Mar 2017 14:53:41 +1100 Subject: [PATCH 130/326] Use std::move explicitly to avoid ambiguity with boost --- src/nfagraph/ng_limex.cpp | 4 ++-- src/nfagraph/ng_limex_accel.cpp | 4 ++-- src/rose/rose_build_anchored.cpp | 4 ++-- src/rose/rose_build_bytecode.cpp | 2 +- src/rose/rose_build_compile.cpp | 2 +- src/rose/rose_build_merge.cpp | 4 ++-- src/rose/rose_build_role_aliasing.cpp | 4 ++-- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/nfagraph/ng_limex.cpp b/src/nfagraph/ng_limex.cpp index e92790b9..7f157c33 100644 --- a/src/nfagraph/ng_limex.cpp +++ b/src/nfagraph/ng_limex.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -354,7 +354,7 @@ void attemptToUseAsStart(const NGHolder &g, NFAVertex u, auto ni_inserter = inserter(new_inter, new_inter.end()); set_intersection(top_inter.begin(), top_inter.end(), v_tops.begin(), v_tops.end(), ni_inserter); - top_inter = move(new_inter); + top_inter = std::move(new_inter); succs.insert(v); } diff --git a/src/nfagraph/ng_limex_accel.cpp b/src/nfagraph/ng_limex_accel.cpp index bfba7c71..cd662d9c 100644 --- a/src/nfagraph/ng_limex_accel.cpp +++ b/src/nfagraph/ng_limex_accel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -552,7 +552,7 @@ AccelScheme findBestAccelScheme(vector > paths, if (look_for_double_byte) { DAccelScheme da = findBestDoubleAccelScheme(paths, terminating); if (da.double_byte.size() <= DOUBLE_SHUFTI_LIMIT) { - rv.double_byte = move(da.double_byte); + rv.double_byte = std::move(da.double_byte); rv.double_cr = move(da.double_cr); rv.double_offset = da.double_offset; } diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index 7c8c9023..d4e08bb3 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -185,7 +185,7 @@ void remapAnchoredReports(raw_dfa &rdfa, const RoseBuildImpl &build) { assert(id < build.literal_info.size()); new_reports.insert(build.literal_info.at(id).fragment_id); } - ds.reports = move(new_reports); + ds.reports = std::move(new_reports); } } @@ -220,7 +220,7 @@ void remapIdsToPrograms(const RoseBuildImpl &build, raw_dfa &rdfa) { auto &frag = build.fragments.at(fragment_id); new_reports.insert(frag.lit_program_offset); } - ds.reports = move(new_reports); + ds.reports = std::move(new_reports); } } diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 8dcf1d66..4b35e9e5 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4596,7 +4596,7 @@ map> findEdgesByLiteral(const RoseBuildImpl &build) { return tie(g[source(a, g)].index, g[target(a, g)].index) < tie(g[source(b, g)].index, g[target(b, g)].index); }); - lit_edge_map.emplace(m.first, move(edge_list)); + lit_edge_map.emplace(m.first, std::move(edge_list)); } return lit_edge_map; diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 7dd55d5f..00586f65 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -1032,7 +1032,7 @@ void packInfixTops(NGHolder &h, RoseGraph &g, updated_tops.insert(top_mapping.at(t)); } } - h[e].tops = move(updated_tops); + h[e].tops = std::move(updated_tops); if (h[e].tops.empty()) { DEBUG_PRINTF("edge (start,%zu) has only unused tops\n", h[v].index); dead.push_back(e); diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index 54a7390e..685d1523 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -1759,7 +1759,7 @@ void replaceTops(NGHolder &h, const map &top_mapping) { DEBUG_PRINTF("vertex %zu has top %u\n", h[v].index, t); new_tops.insert(top_mapping.at(t)); } - h[e].tops = move(new_tops); + h[e].tops = std::move(new_tops); } } diff --git a/src/rose/rose_build_role_aliasing.cpp b/src/rose/rose_build_role_aliasing.cpp index 3ad4566e..f8174d74 100644 --- a/src/rose/rose_build_role_aliasing.cpp +++ b/src/rose/rose_build_role_aliasing.cpp @@ -838,7 +838,7 @@ void pruneUnusedTops(NGHolder &h, const RoseGraph &g, auto pt_inserter = inserter(pruned_tops, pruned_tops.end()); set_intersection(h[e].tops.begin(), h[e].tops.end(), used_tops.begin(), used_tops.end(), pt_inserter); - h[e].tops = move(pruned_tops); + h[e].tops = std::move(pruned_tops); if (h[e].tops.empty()) { DEBUG_PRINTF("edge (start,%zu) has only unused tops\n", h[v].index); dead.push_back(e); @@ -1460,7 +1460,7 @@ void splitAndFilterBuckets(vector> &buckets, return; // No new buckets created. } - buckets = move(out); + buckets = std::move(out); removeSingletonBuckets(buckets); } From 5edecbf539fb3b979f45c5832ef8c04c07bc085e Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 6 Mar 2017 14:15:56 +1100 Subject: [PATCH 131/326] ng: check can_never_match before validate_fuzzy --- src/nfagraph/ng.cpp | 4 ++++ unit/hyperscan/bad_patterns.txt | 1 + 2 files changed, 5 insertions(+) diff --git a/src/nfagraph/ng.cpp b/src/nfagraph/ng.cpp index 9453aae9..2f6d8cd7 100644 --- a/src/nfagraph/ng.cpp +++ b/src/nfagraph/ng.cpp @@ -330,6 +330,10 @@ bool NG::addGraph(NGWrapper &w) { /* ensure utf8 starts at cp boundary */ ensureCodePointStart(rm, w); + if (can_never_match(w)) { + throw CompileError(w.expressionIndex, "Pattern can never match."); + } + // validate graph's suitability for fuzzing before resolving asserts validate_fuzzy_compile(w, w.edit_distance, w.utf8, cc.grey); diff --git a/unit/hyperscan/bad_patterns.txt b/unit/hyperscan/bad_patterns.txt index 09a2f7e1..3d6d9db9 100644 --- a/unit/hyperscan/bad_patterns.txt +++ b/unit/hyperscan/bad_patterns.txt @@ -141,3 +141,4 @@ 144:/abc\b/{edit_distance=1} #Zero-width assertions are disallowed for approximate matching. 145:/abc/8{edit_distance=1} #UTF-8 is disallowed for approximate matching. 146:/(*UTF8)abc/{edit_distance=1} #UTF-8 is disallowed for approximate matching. +147:/\b\BMYBt/s{edit_distance=1} #Pattern can never match. From 96fdca6f544d2086972361df332edc543ba5fab0 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 7 Mar 2017 09:41:51 +1100 Subject: [PATCH 132/326] rose: guard writeNfaInfo from zero NFAs --- src/rose/rose_build_bytecode.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 4b35e9e5..bce26a10 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2695,9 +2695,13 @@ void writeLeftInfo(build_context &bc, RoseEngine &proto, static void writeNfaInfo(const RoseBuildImpl &build, build_context &bc, RoseEngine &proto, const set &no_retrigger_queues) { + const u32 queue_count = build.qif.allocated_count(); + if (!queue_count) { + return; + } + auto ekey_lists = buildSuffixEkeyLists(build, bc, build.qif); - const u32 queue_count = build.qif.allocated_count(); vector infos(queue_count); memset(infos.data(), 0, sizeof(NfaInfo) * queue_count); From d2416736cb586d380ffb9b1ff3b63194247d7e81 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 7 Mar 2017 09:58:24 +1100 Subject: [PATCH 133/326] Use intrinsic to get correct movq everywhere The real trick here is that _mm_set_epi64x() (note the 'x') takes a 64-bit value - not a ptr to a 128-bit value like the non-x - so compilers don't twist themselves in knots with alignment or whatever confuses them. --- src/util/simd_utils.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index c6d43f57..484b47c0 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -180,9 +180,7 @@ static really_inline u64a movq(const m128 in) { /* another form of movq */ static really_inline m128 load_m128_from_u64a(const u64a *p) { - m128 out; - __asm__ ("vmovq\t%1,%0" : "=x"(out) :"m"(*p)); - return out; + return _mm_set_epi64x(0LL, *p); } #define rshiftbyte_m128(a, count_immed) _mm_srli_si128(a, count_immed) From 7ca81ff530650859067d98ffe1645bb28fc0f7e2 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 7 Mar 2017 11:47:10 +1100 Subject: [PATCH 134/326] ng_find_matches: limit how big we're willing to go Tests which require tracking more than 15K states (including edit distance states) are very, very slow. --- unit/internal/nfagraph_find_matches.cpp | 3 ++- util/ng_find_matches.cpp | 15 ++++++++++++--- util/ng_find_matches.h | 10 +++++++--- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/unit/internal/nfagraph_find_matches.cpp b/unit/internal/nfagraph_find_matches.cpp index 99fdb09e..92c514d8 100644 --- a/unit/internal/nfagraph_find_matches.cpp +++ b/unit/internal/nfagraph_find_matches.cpp @@ -212,7 +212,8 @@ TEST_P(MatchesTest, Check) { bool utf8 = (t.flags & HS_FLAG_UTF8) > 0; set> matches; - findMatches(*g, rm, t.input, matches, 0, t.notEod, utf8); + bool success = findMatches(*g, rm, t.input, matches, 0, t.notEod, utf8); + ASSERT_TRUE(success); set> expected(begin(t.matches), end(t.matches)); diff --git a/util/ng_find_matches.cpp b/util/ng_find_matches.cpp index b3c81574..0890319d 100644 --- a/util/ng_find_matches.cpp +++ b/util/ng_find_matches.cpp @@ -52,6 +52,9 @@ using StateBitSet = boost::dynamic_bitset<>; namespace { +/** \brief Max number of states (taking edit distance into account). */ +static constexpr size_t STATE_COUNT_MAX = 15000; + // returns all successors up to a given depth in a vector of sets, indexed by // zero-based depth from source vertex static @@ -1034,7 +1037,7 @@ void filterMatches(MatchSet &matches) { * * Fills \a matches with offsets into the data stream where a match is found. */ -void findMatches(const NGHolder &g, const ReportManager &rm, +bool findMatches(const NGHolder &g, const ReportManager &rm, const string &input, MatchSet &matches, const u32 edit_distance, const bool notEod, const bool utf8) { assert(hasCorrectlyNumberedVertices(g)); @@ -1042,7 +1045,12 @@ void findMatches(const NGHolder &g, const ReportManager &rm, // compile time, so make it an assert assert(!edit_distance || !utf8); - DEBUG_PRINTF("Finding matches\n"); + const size_t total_states = num_vertices(g) * (3 * edit_distance + 1); + DEBUG_PRINTF("Finding matches (%zu total states)\n", total_states); + if (total_states > STATE_COUNT_MAX) { + DEBUG_PRINTF("too big\n"); + return false; + } GraphCache gc(edit_distance, g); #ifdef DEBUG @@ -1068,7 +1076,7 @@ void findMatches(const NGHolder &g, const ReportManager &rm, state.next.count()); if (state.next.empty()) { filterMatches(matches); - return; + return true; } state.states = state.next; state.prev = state.cur; @@ -1086,4 +1094,5 @@ void findMatches(const NGHolder &g, const ReportManager &rm, getMatches(g, matches, state, !notEod); filterMatches(matches); + return true; } diff --git a/util/ng_find_matches.h b/util/ng_find_matches.h index 39ee3f68..9860c202 100644 --- a/util/ng_find_matches.h +++ b/util/ng_find_matches.h @@ -44,11 +44,15 @@ struct BoundaryReports; } // namespace ue2 -/** \brief Find all matches for a given graph when executed against \a input. +/** + * \brief Find all matches for a given graph when executed against \a input. * - * Fills \a matches with offsets into the data stream where a match is found. + * Fills \a matches with offsets into the data stream where a match is found. + * + * Returns false if this graph is too large to find its matches in reasonable + * time. */ -void findMatches(const ue2::NGHolder &g, const ue2::ReportManager &rm, +bool findMatches(const ue2::NGHolder &g, const ue2::ReportManager &rm, const std::string &input, std::set> &matches, const unsigned int max_edit_distance, const bool notEod, From ffab97ca8c30c2434f3a524d23595ec2400d37d1 Mon Sep 17 00:00:00 2001 From: "Wang, Xiang W" Date: Wed, 8 Mar 2017 06:35:23 -0500 Subject: [PATCH 135/326] FDR: delete dead confirm code --- src/fdr/fdr.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c index e15c6c33..edd12733 100644 --- a/src/fdr/fdr.c +++ b/src/fdr/fdr.c @@ -355,16 +355,6 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control, if (!(fdrc->groups & *control)) { continue; } - if (!fdrc->mult) { - u32 id = fdrc->nBitsOrSoleID; - if ((*last_match_id == id) && (fdrc->flags & NoRepeat)) { - continue; - } - *last_match_id = id; - *control = a->cb(ptr_main + byte - a->buf, ptr_main + byte - a->buf, - id, a->ctxt); - continue; - } u64a confVal = unaligned_load_u64a(confLoc + byte - sizeof(u64a) + 1); confWithBit(fdrc, a, ptr_main - a->buf + byte, control, last_match_id, confVal); From 533fcf383dcd71e18c043cda0d61507486f3fe78 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 8 Mar 2017 11:36:24 +1100 Subject: [PATCH 136/326] ng_fuzzy: apply a resource limit to vertex count --- src/grey.cpp | 4 +++- src/grey.h | 3 +++ src/nfagraph/ng_fuzzy.cpp | 14 ++++++++++++-- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/grey.cpp b/src/grey.cpp index 05473abb..ea92fdb5 100644 --- a/src/grey.cpp +++ b/src/grey.cpp @@ -156,7 +156,8 @@ Grey::Grey(void) : limitEngineSize(1073741824), // 1 GB limitDFASize(1073741824), // 1 GB limitNFASize(1048576), // 1 MB - limitLBRSize(1048576) // 1 MB + limitLBRSize(1048576), // 1 MB + limitApproxMatchingVertices(5000) { assert(maxAnchoredRegion < 64); /* a[lm]_log_sum have limited capacity */ } @@ -317,6 +318,7 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(limitDFASize); G_UPDATE(limitNFASize); G_UPDATE(limitLBRSize); + G_UPDATE(limitApproxMatchingVertices); #undef G_UPDATE if (key == "simple_som") { diff --git a/src/grey.h b/src/grey.h index c2d5ac92..5fde7b4b 100644 --- a/src/grey.h +++ b/src/grey.h @@ -201,6 +201,9 @@ struct Grey { u32 limitDFASize; //!< max size of a DFA (in bytes) u32 limitNFASize; //!< max size of an NFA (in bytes) u32 limitLBRSize; //!< max size of an LBR engine (in bytes) + + // Approximate matching limits. + u32 limitApproxMatchingVertices; //!< max number of vertices per graph }; #ifndef RELEASE_BUILD diff --git a/src/nfagraph/ng_fuzzy.cpp b/src/nfagraph/ng_fuzzy.cpp index fecb7065..fc468126 100644 --- a/src/nfagraph/ng_fuzzy.cpp +++ b/src/nfagraph/ng_fuzzy.cpp @@ -665,13 +665,23 @@ void validate_fuzzy_compile(const NGHolder &g, u32 edit_distance, bool utf8, } } -void make_fuzzy(NGHolder &g, u32 edit_distance, UNUSED const Grey &grey) { +void make_fuzzy(NGHolder &g, u32 edit_distance, const Grey &grey) { if (edit_distance == 0) { return; } + assert(grey.allowApproximateMatching); assert(grey.maxEditDistance >= edit_distance); + ShadowGraph sg(g, edit_distance); sg.fuzz_graph(); + + // For safety, enforce limit on actual vertex count. + if (num_vertices(g) > grey.limitApproxMatchingVertices) { + DEBUG_PRINTF("built %zu vertices > limit of %u\n", num_vertices(g), + grey.limitApproxMatchingVertices); + throw ResourceLimitError(); + } } -} + +} // namespace ue2 From ce6a10ef58f974cc3e5986afcd4dbe48dfbb21cd Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 7 Mar 2017 16:06:46 +1100 Subject: [PATCH 137/326] hsbench: don't create the greybox in release build --- tools/hsbench/main.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tools/hsbench/main.cpp b/tools/hsbench/main.cpp index a99760a2..a37d4839 100644 --- a/tools/hsbench/main.cpp +++ b/tools/hsbench/main.cpp @@ -194,7 +194,7 @@ struct BenchmarkSigs { /** Process command-line arguments. Prints usage and exits on error. */ static void processArgs(int argc, char *argv[], vector &sigSets, - UNUSED Grey &grey) { + UNUSED unique_ptr &grey) { const char options[] = "-b:c:Cd:e:E:G:hi:n:No:p:sT:Vw:z:"; int in_sigfile = 0; int do_per_scan = 0; @@ -251,7 +251,7 @@ void processArgs(int argc, char *argv[], vector &sigSets, break; #ifndef RELEASE_BUILD case 'G': - applyGreyOverrides(&grey, string(optarg)); + applyGreyOverrides(grey.get(), string(optarg)); break; #endif case 'h': @@ -735,8 +735,10 @@ void runBenchmark(const EngineHyperscan &db, /** Main driver. */ int main(int argc, char *argv[]) { - Grey grey; - + unique_ptr grey; +#if !defined(RELEASE_BUILD) + grey = make_unique(); +#endif setlocale(LC_ALL, ""); // use the user's locale #ifndef NDEBUG @@ -777,7 +779,7 @@ int main(int argc, char *argv[]) { continue; } - auto engine = buildEngineHyperscan(exprMap, scan_mode, s.name, grey); + auto engine = buildEngineHyperscan(exprMap, scan_mode, s.name, *grey); if (!engine) { printf("Error: expressions failed to compile.\n"); exit(1); From 79308e6791306c187b3ced41e04e28cd2b221036 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 7 Mar 2017 16:08:02 +1100 Subject: [PATCH 138/326] cmake: don't build static libs if we don't have to --- CMakeLists.txt | 106 +++++++++++++++++++++++++++++-------------------- 1 file changed, 63 insertions(+), 43 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 78bf207d..868d5e36 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -132,6 +132,12 @@ if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) endif() endif() +if (NOT BUILD_SHARED_LIBS) + # build static libs + set(BUILD_STATIC_LIBS ON) + mark_as_advanced(BUILD_STATIC_LIBS) +endif () + #for config if (OPTIMISE) set(HS_OPTIMIZE ON) @@ -1084,11 +1090,14 @@ if (NOT FAT_RUNTIME) if (HAVE_AVX2) set(hs_exec_SRCS ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) endif() + if (BUILD_STATIC_LIBS) + add_library(hs_exec OBJECT ${hs_exec_SRCS}) - add_library(hs_exec OBJECT ${hs_exec_SRCS}) + add_library(hs_runtime STATIC src/hs_version.c src/hs_valid_platform.c $) + set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) - add_library(hs_runtime STATIC src/hs_version.c src/hs_valid_platform.c $) - set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) + add_library(hs STATIC ${hs_SRCS} src/hs_valid_platform.c $) + endif (BUILD_STATIC_LIBS) if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) add_library(hs_exec_shared OBJECT ${hs_exec_SRCS}) @@ -1096,33 +1105,52 @@ if (NOT FAT_RUNTIME) endif() else (FAT_RUNTIME) + set(BUILD_WRAPPER "${PROJECT_SOURCE_DIR}/cmake/build_wrapper.sh") - add_library(hs_exec_core2 OBJECT ${hs_exec_SRCS}) - set_target_properties(hs_exec_core2 PROPERTIES - COMPILE_FLAGS "-march=core2" - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" - ) - - add_library(hs_exec_corei7 OBJECT ${hs_exec_SRCS}) - set_target_properties(hs_exec_corei7 PROPERTIES - COMPILE_FLAGS "-march=corei7" - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in" - ) - - add_library(hs_exec_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) - set_target_properties(hs_exec_avx2 PROPERTIES - COMPILE_FLAGS "-march=core-avx2" - RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in" - ) - - add_library(hs_exec_common OBJECT - ${hs_exec_common_SRCS} - src/dispatcher.c - ) set_source_files_properties(src/dispatcher.c PROPERTIES COMPILE_FLAGS "-Wno-unused-parameter -Wno-unused-function") + if (BUILD_STATIC_LIBS) + add_library(hs_exec_core2 OBJECT ${hs_exec_SRCS}) + set_target_properties(hs_exec_core2 PROPERTIES + COMPILE_FLAGS "-march=core2" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + + add_library(hs_exec_corei7 OBJECT ${hs_exec_SRCS}) + set_target_properties(hs_exec_corei7 PROPERTIES + COMPILE_FLAGS "-march=corei7" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + + add_library(hs_exec_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + set_target_properties(hs_exec_avx2 PROPERTIES + COMPILE_FLAGS "-march=core-avx2" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + + add_library(hs_exec_common OBJECT + ${hs_exec_common_SRCS} + src/dispatcher.c + ) + + # hs_version.c is added explicitly to avoid some build systems that refuse to + # create a lib without any src (I'm looking at you Xcode) + + add_library(hs_runtime STATIC src/hs_version.c + $ $ + $ $) + set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) + + # we want the static lib for testing + add_library(hs STATIC src/hs_version.c src/hs_valid_platform.c + ${hs_SRCS} $ $ + $ $) + + endif (BUILD_STATIC_LIBS) + if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) + # build shared libs add_library(hs_exec_shared_core2 OBJECT ${hs_exec_SRCS}) set_target_properties(hs_exec_shared_core2 PROPERTIES COMPILE_FLAGS "-march=core2" @@ -1151,16 +1179,8 @@ else (FAT_RUNTIME) endif() # SHARED -# hs_version.c is added explicitly to avoid some build systems that refuse to -# create a lib without any src (I'm looking at you Xcode) - - add_library(hs_runtime STATIC src/hs_version.c - $ $ - $ $) endif (NOT FAT_RUNTIME) - -set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) if (NOT BUILD_SHARED_LIBS) install(TARGETS hs_runtime DESTINATION lib) endif() @@ -1189,16 +1209,9 @@ $) LIBRARY DESTINATION lib) endif() -if (NOT FAT_RUNTIME) - add_library(hs STATIC ${hs_SRCS} src/hs_valid_platform.c $) -else() - # we want the static lib for testing - add_library(hs STATIC src/hs_version.c src/hs_valid_platform.c - ${hs_SRCS} $ $ - $ $) -endif() - -add_dependencies(hs ragel_Parser) +if (BUILD_STATIC_LIBS) + add_dependencies(hs ragel_Parser) +endif () if (NOT BUILD_SHARED_LIBS) install(TARGETS hs DESTINATION lib) @@ -1228,6 +1241,13 @@ install(TARGETS hs_shared LIBRARY DESTINATION lib) endif() +# used by tools and other targets +if (NOT BUILD_STATIC_LIBS) + # use shared lib without having to change all the targets + add_library(hs ALIAS hs_shared) +endif () + + if(NOT WIN32) add_subdirectory(examples) endif() From 7ad21500c470998a8dbca4813425b7edaee7dc1f Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 8 Mar 2017 15:41:06 +1100 Subject: [PATCH 139/326] getActiveStates: return a sorted, uniqued vector --- util/ng_find_matches.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/util/ng_find_matches.cpp b/util/ng_find_matches.cpp index 0890319d..8c48081f 100644 --- a/util/ng_find_matches.cpp +++ b/util/ng_find_matches.cpp @@ -605,8 +605,8 @@ struct StateSet { } #endif - flat_set getActiveStates() const { - flat_set result; + vector getActiveStates() const { + vector result; for (u32 dist = 0; dist <= edit_distance; dist++) { // get all shadow vertices (including original graph) @@ -614,8 +614,8 @@ struct StateSet { for (size_t id = cur_shadow_vertices.find_first(); id != cur_shadow_vertices.npos; id = cur_shadow_vertices.find_next(id)) { - result.emplace(id, dist, shadows_som[dist][id], - State::NODE_SHADOW); + result.emplace_back(id, dist, shadows_som[dist][id], + State::NODE_SHADOW); } // the rest is only valid for edited graphs @@ -628,11 +628,12 @@ struct StateSet { for (size_t id = cur_helper_vertices.find_first(); id != cur_helper_vertices.npos; id = cur_helper_vertices.find_next(id)) { - result.emplace(id, dist, helpers_som[dist][id], - State::NODE_HELPER); + result.emplace_back(id, dist, helpers_som[dist][id], + State::NODE_HELPER); } } + sort_and_unique(result); return result; } @@ -743,6 +744,11 @@ bool operator<(const StateSet::State &a, const StateSet::State &b) { return false; } +bool operator==(const StateSet::State &a, const StateSet::State &b) { + return a.idx == b.idx && a.level == b.level && a.type == b.type && + a.som == b.som; +} + struct fmstate { const size_t num_states; // number of vertices in graph StateSet states; // currently active states From cc1191d94c987ffc4a9b3ecd867104b4f8f25835 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 8 Mar 2017 15:46:25 +1100 Subject: [PATCH 140/326] getSuccessors: reuse a vector for output --- util/ng_find_matches.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/util/ng_find_matches.cpp b/util/ng_find_matches.cpp index 8c48081f..9c1cce60 100644 --- a/util/ng_find_matches.cpp +++ b/util/ng_find_matches.cpp @@ -638,8 +638,9 @@ struct StateSet { } // does not return SOM - flat_set getSuccessors(const State &state, const GraphCache &gc) const { - flat_set result; + void getSuccessors(const State &state, const GraphCache &gc, + vector &result) const { + result.clear(); // maximum shadow depth that we can go from current level u32 max_depth = edit_distance - state.level + 1; @@ -650,7 +651,7 @@ struct StateSet { id != shadow_succ.npos; id = shadow_succ.find_next(id)) { auto new_level = state.level + d; - result.emplace(id, new_level, 0, State::NODE_SHADOW); + result.emplace_back(id, new_level, 0, State::NODE_SHADOW); } const auto &helper_succ = gc.getHelperTransitions(state.idx, d); @@ -658,11 +659,11 @@ struct StateSet { id != helper_succ.npos; id = helper_succ.find_next(id)) { auto new_level = state.level + d; - result.emplace(id, new_level, 0, State::NODE_HELPER); + result.emplace_back(id, new_level, 0, State::NODE_HELPER); } } - return result; + sort_and_unique(result); } flat_set getAcceptStates(const GraphCache &gc) const { @@ -919,9 +920,11 @@ void step(const NGHolder &g, struct fmstate &state) { const auto active = state.states.getActiveStates(); + vector succ_list; + for (const auto &cur : active) { auto u = state.vertices[cur.idx]; - auto succ_list = state.states.getSuccessors(cur, state.gc); + state.states.getSuccessors(cur, state.gc, succ_list); for (auto succ : succ_list) { auto v = state.vertices[succ.idx]; From c81c30b1442d2c1dba3e5910e98da509737da00f Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 8 Mar 2017 15:57:29 +1100 Subject: [PATCH 141/326] findMatches: persist working data --- util/ng_find_matches.cpp | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/util/ng_find_matches.cpp b/util/ng_find_matches.cpp index 9c1cce60..0bc0cc93 100644 --- a/util/ng_find_matches.cpp +++ b/util/ng_find_matches.cpp @@ -512,6 +512,13 @@ struct StateSet { node_type type; }; + // Temporary working data used for step() which we want to keep around + // (rather than reallocating vectors all the time). + struct WorkingData { + vector active; + vector succ_list; + }; + StateSet(size_t sz, u32 dist_in) : shadows(dist_in + 1), helpers(dist_in + 1), shadows_som(dist_in + 1), helpers_som(dist_in + 1), @@ -593,7 +600,8 @@ struct StateSet { #ifdef DEBUG void dumpActiveStates() const { - const auto states = getActiveStates(); + vector states; + getActiveStates(states); DEBUG_PRINTF("Dumping active states\n"); @@ -605,8 +613,8 @@ struct StateSet { } #endif - vector getActiveStates() const { - vector result; + void getActiveStates(vector &result) const { + result.clear(); for (u32 dist = 0; dist <= edit_distance; dist++) { // get all shadow vertices (including original graph) @@ -634,7 +642,6 @@ struct StateSet { } sort_and_unique(result); - return result; } // does not return SOM @@ -915,18 +922,16 @@ void getMatches(const NGHolder &g, MatchSet &matches, struct fmstate &state, } static -void step(const NGHolder &g, struct fmstate &state) { +void step(const NGHolder &g, fmstate &state, StateSet::WorkingData &wd) { state.next.reset(); - const auto active = state.states.getActiveStates(); + state.states.getActiveStates(wd.active); - vector succ_list; - - for (const auto &cur : active) { + for (const auto &cur : wd.active) { auto u = state.vertices[cur.idx]; - state.states.getSuccessors(cur, state.gc, succ_list); + state.states.getSuccessors(cur, state.gc, wd.succ_list); - for (auto succ : succ_list) { + for (auto succ : wd.succ_list) { auto v = state.vertices[succ.idx]; if (is_any_accept(v, g)) { @@ -1070,6 +1075,8 @@ bool findMatches(const NGHolder &g, const ReportManager &rm, struct fmstate state(g, gc, utf8, allowStartDs, edit_distance, rm); + StateSet::WorkingData wd; + for (auto it = input.begin(), ite = input.end(); it != ite; ++it) { #ifdef DEBUG state.states.dumpActiveStates(); @@ -1077,7 +1084,7 @@ bool findMatches(const NGHolder &g, const ReportManager &rm, state.offset = distance(input.begin(), it); state.cur = *it; - step(g, state); + step(g, state, wd); getMatches(g, matches, state, false); From 749e3e64b9e00615559b623dab4741970f69cd7e Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 8 Mar 2017 16:27:41 +1100 Subject: [PATCH 142/326] getMatches: simplify --- util/ng_find_matches.cpp | 88 ++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 43 deletions(-) diff --git a/util/ng_find_matches.cpp b/util/ng_find_matches.cpp index 0bc0cc93..13efa05c 100644 --- a/util/ng_find_matches.cpp +++ b/util/ng_find_matches.cpp @@ -868,56 +868,58 @@ bool canReach(const NGHolder &g, const NFAEdge &e, struct fmstate &state) { } static -void getMatches(const NGHolder &g, MatchSet &matches, struct fmstate &state, - bool allowEodMatches) { - flat_set accepts {g.accept, g.acceptEod}; +void getAcceptMatches(const NGHolder &g, MatchSet &matches, + struct fmstate &state, NFAVertex accept_vertex) { + assert(accept_vertex == g.accept || accept_vertex == g.acceptEod); - for (auto v : accepts) { - bool eod = v == g.acceptEod; - if (eod && !allowEodMatches) { + const bool eod = accept_vertex == g.acceptEod; + auto active_states = eod ? state.states.getAcceptEodStates(state.gc) + : state.states.getAcceptStates(state.gc); + + DEBUG_PRINTF("Number of active states: %zu\n", active_states.size()); + + for (const auto &cur : active_states) { + auto u = state.vertices[cur.idx]; + + // we can't accept anything from startDs in between UTF-8 codepoints + if (state.utf8 && u == g.startDs && !isUtf8CodePoint(state.cur)) { continue; } - auto active_states = eod ? state.states.getAcceptEodStates(state.gc) : - state.states.getAcceptStates(state.gc); + const auto &reports = + eod ? state.gc.vertex_eod_reports_by_level[cur.level][u] + : state.gc.vertex_reports_by_level[cur.level][u]; - DEBUG_PRINTF("Number of active states: %zu\n", active_states.size()); + NFAEdge e = edge(u, accept_vertex, g); - for (const auto &cur : active_states) { - auto u = state.vertices[cur.idx]; - - // we can't accept anything from startDs in between UTF-8 codepoints - if (state.utf8 && u == g.startDs && !isUtf8CodePoint(state.cur)) { - continue; - } - - const auto &reports = - eod ? - state.gc.vertex_eod_reports_by_level[cur.level][u] : - state.gc.vertex_reports_by_level[cur.level][u]; - - NFAEdge e = edge(u, v, g); - - // we assume edge assertions only exist at level 0 - if (e && !canReach(g, e, state)) { - continue; - } - - DEBUG_PRINTF("%smatch found at %zu\n", - eod ? "eod " : "", state.offset); - - assert(!reports.empty()); - for (const auto &report_id : reports) { - const Report &ri = state.rm.getReport(report_id); - - DEBUG_PRINTF("report %u has offset adjustment %d\n", - report_id, ri.offsetAdjust); - DEBUG_PRINTF("match from (i:%zu,l:%u,t:%u): (%zu,%zu)\n", - cur.idx, cur.level, cur.type, cur.som, - state.offset + ri.offsetAdjust); - matches.emplace(cur.som, state.offset + ri.offsetAdjust); - } + // we assume edge assertions only exist at level 0 + if (e && !canReach(g, e, state)) { + continue; } + + DEBUG_PRINTF("%smatch found at %zu\n", eod ? "eod " : "", state.offset); + + assert(!reports.empty()); + for (const auto &report_id : reports) { + const Report &ri = state.rm.getReport(report_id); + + DEBUG_PRINTF("report %u has offset adjustment %d\n", report_id, + ri.offsetAdjust); + DEBUG_PRINTF("match from (i:%zu,l:%u,t:%u): (%zu,%zu)\n", cur.idx, + cur.level, cur.type, cur.som, + state.offset + ri.offsetAdjust); + matches.emplace(cur.som, state.offset + ri.offsetAdjust); + } + } +} + + +static +void getMatches(const NGHolder &g, MatchSet &matches, struct fmstate &state, + bool allowEodMatches) { + getAcceptMatches(g, matches, state, g.accept); + if (allowEodMatches) { + getAcceptMatches(g, matches, state, g.acceptEod); } } From bae8ebc62d25da7d3958866efcc4ff66c189841d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 9 Mar 2017 10:55:22 +1100 Subject: [PATCH 143/326] ng_find_matches: speed up gather...ByDepth --- util/ng_find_matches.cpp | 49 ++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 27 deletions(-) diff --git a/util/ng_find_matches.cpp b/util/ng_find_matches.cpp index 13efa05c..80a06a15 100644 --- a/util/ng_find_matches.cpp +++ b/util/ng_find_matches.cpp @@ -60,8 +60,9 @@ static constexpr size_t STATE_COUNT_MAX = 15000; static vector> gatherSuccessorsByDepth(const NGHolder &g, const NFAVertex &src, u32 depth) { + assert(depth > 0); + vector> result(depth); - flat_set cur, next; // populate current set of successors for (auto v : adjacent_vertices_range(src, g)) { @@ -70,31 +71,28 @@ gatherSuccessorsByDepth(const NGHolder &g, const NFAVertex &src, u32 depth) { continue; } DEBUG_PRINTF("Node %zu depth 1\n", g[v].index); - - cur.insert(v); + result[0].insert(v); } - result[0] = cur; for (u32 d = 1; d < depth; d++) { // collect all successors for all current level vertices - for (auto v : cur) { + const auto &cur = result[d - 1]; + auto &next = result[d]; + for (auto u : cur) { // don't go past special nodes - if (is_special(v, g)) { + if (is_special(u, g)) { continue; } - for (auto succ : adjacent_vertices_range(v, g)) { + for (auto v : adjacent_vertices_range(u, g)) { // ignore self-loops - if (v == succ) { + if (u == v) { continue; } - DEBUG_PRINTF("Node %zu depth %u\n", g[succ].index, d + 1); - next.insert(succ); + DEBUG_PRINTF("Node %zu depth %u\n", g[v].index, d + 1); + next.insert(v); } } - result[d] = next; - next.swap(cur); - next.clear(); } return result; @@ -103,13 +101,12 @@ gatherSuccessorsByDepth(const NGHolder &g, const NFAVertex &src, u32 depth) { // returns all predecessors up to a given depth in a vector of sets, indexed by // zero-based depth from source vertex static -vector> gatherPredecessorsByDepth(const NGHolder &g, - NFAVertex src, u32 depth) { - vector> result(depth); - flat_set cur, next; - +vector> +gatherPredecessorsByDepth(const NGHolder &g, NFAVertex src, u32 depth) { assert(depth > 0); + vector> result(depth); + // populate current set of successors for (auto v : inv_adjacent_vertices_range(src, g)) { // ignore self-loops @@ -117,25 +114,23 @@ vector> gatherPredecessorsByDepth(const NGHolder &g, continue; } DEBUG_PRINTF("Node %zu depth 1\n", g[v].index); - cur.insert(v); + result[0].insert(v); } - result[0] = cur; for (u32 d = 1; d < depth; d++) { // collect all successors for all current level vertices + const auto &cur = result[d - 1]; + auto &next = result[d]; for (auto v : cur) { - for (auto pred : inv_adjacent_vertices_range(v, g)) { + for (auto u : inv_adjacent_vertices_range(v, g)) { // ignore self-loops - if (v == pred) { + if (v == u) { continue; } - DEBUG_PRINTF("Node %zu depth %u\n", g[pred].index, d + 1); - next.insert(pred); + DEBUG_PRINTF("Node %zu depth %u\n", g[u].index, d + 1); + next.insert(u); } } - result[d] = next; - next.swap(cur); - next.clear(); } return result; From 834aebe8b64801a712bea02cebb761ffbc9c0a6c Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 9 Mar 2017 11:12:16 +1100 Subject: [PATCH 144/326] ng_find_matches: use wd.active for accepts too --- util/ng_find_matches.cpp | 49 +++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/util/ng_find_matches.cpp b/util/ng_find_matches.cpp index 80a06a15..0a1f796f 100644 --- a/util/ng_find_matches.cpp +++ b/util/ng_find_matches.cpp @@ -668,8 +668,8 @@ struct StateSet { sort_and_unique(result); } - flat_set getAcceptStates(const GraphCache &gc) const { - flat_set result; + void getAcceptStates(const GraphCache &gc, vector &result) const { + result.clear(); for (u32 dist = 0; dist <= edit_distance; dist++) { // get all shadow vertices (including original graph) @@ -678,24 +678,24 @@ struct StateSet { for (size_t id = cur_shadow_vertices.find_first(); id != cur_shadow_vertices.npos; id = cur_shadow_vertices.find_next(id)) { - result.emplace(id, dist, shadows_som[dist][id], - State::NODE_SHADOW); + result.emplace_back(id, dist, shadows_som[dist][id], + State::NODE_SHADOW); } auto cur_helper_vertices = helpers[dist]; cur_helper_vertices &= gc.getAcceptTransitions(dist); for (size_t id = cur_helper_vertices.find_first(); id != cur_helper_vertices.npos; id = cur_helper_vertices.find_next(id)) { - result.emplace(id, dist, helpers_som[dist][id], - State::NODE_HELPER); + result.emplace_back(id, dist, helpers_som[dist][id], + State::NODE_HELPER); } } - return result; + sort_and_unique(result); } - flat_set getAcceptEodStates(const GraphCache &gc) const { - flat_set result; + void getAcceptEodStates(const GraphCache &gc, vector &result) const { + result.clear(); for (u32 dist = 0; dist <= edit_distance; dist++) { // get all shadow vertices (including original graph) @@ -704,20 +704,20 @@ struct StateSet { for (size_t id = cur_shadow_vertices.find_first(); id != cur_shadow_vertices.npos; id = cur_shadow_vertices.find_next(id)) { - result.emplace(id, dist, shadows_som[dist][id], - State::NODE_SHADOW); + result.emplace_back(id, dist, shadows_som[dist][id], + State::NODE_SHADOW); } auto cur_helper_vertices = helpers[dist]; cur_helper_vertices &= gc.getAcceptEodTransitions(dist); for (size_t id = cur_helper_vertices.find_first(); id != cur_helper_vertices.npos; id = cur_helper_vertices.find_next(id)) { - result.emplace(id, dist, helpers_som[dist][id], - State::NODE_HELPER); + result.emplace_back(id, dist, helpers_som[dist][id], + State::NODE_HELPER); } } - return result; + sort_and_unique(result); } // the caller must specify SOM at current offset, and must not attempt to @@ -864,12 +864,16 @@ bool canReach(const NGHolder &g, const NFAEdge &e, struct fmstate &state) { static void getAcceptMatches(const NGHolder &g, MatchSet &matches, - struct fmstate &state, NFAVertex accept_vertex) { + struct fmstate &state, NFAVertex accept_vertex, + vector &active_states) { assert(accept_vertex == g.accept || accept_vertex == g.acceptEod); const bool eod = accept_vertex == g.acceptEod; - auto active_states = eod ? state.states.getAcceptEodStates(state.gc) - : state.states.getAcceptStates(state.gc); + if (eod) { + state.states.getAcceptEodStates(state.gc, active_states); + } else { + state.states.getAcceptStates(state.gc, active_states); + } DEBUG_PRINTF("Number of active states: %zu\n", active_states.size()); @@ -908,13 +912,12 @@ void getAcceptMatches(const NGHolder &g, MatchSet &matches, } } - static void getMatches(const NGHolder &g, MatchSet &matches, struct fmstate &state, - bool allowEodMatches) { - getAcceptMatches(g, matches, state, g.accept); + StateSet::WorkingData &wd, bool allowEodMatches) { + getAcceptMatches(g, matches, state, g.accept, wd.active); if (allowEodMatches) { - getAcceptMatches(g, matches, state, g.acceptEod); + getAcceptMatches(g, matches, state, g.acceptEod, wd.active); } } @@ -1083,7 +1086,7 @@ bool findMatches(const NGHolder &g, const ReportManager &rm, step(g, state, wd); - getMatches(g, matches, state, false); + getMatches(g, matches, state, wd, false); DEBUG_PRINTF("offset %zu, %zu states on\n", state.offset, state.next.count()); @@ -1104,7 +1107,7 @@ bool findMatches(const NGHolder &g, const ReportManager &rm, // matches also (or not, if we're in notEod mode) DEBUG_PRINTF("Looking for EOD matches\n"); - getMatches(g, matches, state, !notEod); + getMatches(g, matches, state, wd, !notEod); filterMatches(matches); return true; From a97ec56aeebefe5e5c069b1e68053a89b47968d8 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 1 Mar 2017 13:12:39 +1100 Subject: [PATCH 145/326] serialize: add vectored mode --- unit/hyperscan/serialize.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/unit/hyperscan/serialize.cpp b/unit/hyperscan/serialize.cpp index 7e0fcb7c..5f668ffd 100644 --- a/unit/hyperscan/serialize.cpp +++ b/unit/hyperscan/serialize.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -45,9 +45,10 @@ namespace { using namespace std; using namespace testing; -static const unsigned validModes[] = { +static constexpr unsigned validModes[] = { HS_MODE_STREAM, - HS_MODE_NOSTREAM + HS_MODE_NOSTREAM, + HS_MODE_VECTORED }; class Serializep : public TestWithParam { @@ -75,6 +76,10 @@ TEST_P(Serializep, DeserializeFromAnyAlignment) { break; case HS_MODE_NOSTREAM: mode_string = "BLOCK"; + break; + case HS_MODE_VECTORED: + mode_string = "VECTORED"; + break; } ASSERT_NE(nullptr, original_info) << "hs_serialized_database_info returned null."; @@ -153,6 +158,10 @@ TEST_P(Serializep, DeserializeAtFromAnyAlignment) { break; case HS_MODE_NOSTREAM: mode_string = "BLOCK"; + break; + case HS_MODE_VECTORED: + mode_string = "VECTORED"; + break; } ASSERT_NE(nullptr, original_info) << "hs_serialized_database_info returned null."; From 1376f3849aef06c5a5a86491552cc272c94303e1 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 1 Mar 2017 16:07:28 +1100 Subject: [PATCH 146/326] serialize: parameterize on pattern as well --- unit/hyperscan/serialize.cpp | 93 ++++++++++++++++++++---------------- unit/hyperscan/test_util.cpp | 9 +++- unit/hyperscan/test_util.h | 27 +++++++---- 3 files changed, 76 insertions(+), 53 deletions(-) diff --git a/unit/hyperscan/serialize.cpp b/unit/hyperscan/serialize.cpp index 5f668ffd..4248fa91 100644 --- a/unit/hyperscan/serialize.cpp +++ b/unit/hyperscan/serialize.cpp @@ -45,46 +45,64 @@ namespace { using namespace std; using namespace testing; -static constexpr unsigned validModes[] = { - HS_MODE_STREAM, +static const unsigned validModes[] = { HS_MODE_NOSTREAM, + HS_MODE_STREAM | HS_MODE_SOM_HORIZON_LARGE, HS_MODE_VECTORED }; -class Serializep : public TestWithParam { +static const pattern testPatterns[] = { + pattern("hatstand.*teakettle.*badgerbrush", HS_FLAG_CASELESS, 1000), + pattern("hatstand.*teakettle.*badgerbrush", HS_FLAG_DOTALL, 1001), + pattern("hatstand|teakettle|badgerbrush", 0, 1002), + pattern("^hatstand|teakettle|badgerbrush$", 0, 1003), + pattern("foobar.{10,1000}xyzzy", HS_FLAG_DOTALL, 1004), + pattern("foobar.{2,501}roobar", 0, 1005), + pattern("abc.*def.*ghi", HS_FLAG_SOM_LEFTMOST, 1006), + pattern("(\\p{L}){4}", HS_FLAG_UTF8|HS_FLAG_UCP, 1007), + pattern("\\.(exe|pdf|gif|jpg|png|wav|riff|mp4)\\z", 0, 1008) }; +class SerializeP : public TestWithParam> {}; + +static +const char *getModeString(unsigned mode) { + if (mode & HS_MODE_STREAM) { + return "STREAM"; + } + if (mode & HS_MODE_BLOCK) { + return "BLOCK"; + } + if (mode & HS_MODE_VECTORED) { + return "VECTORED"; + } + return "UNKNOWN"; +} + // Check that we can deserialize from a char array at any alignment and the info // is consistent -TEST_P(Serializep, DeserializeFromAnyAlignment) { - const unsigned mode = GetParam(); +TEST_P(SerializeP, DeserializeFromAnyAlignment) { + const unsigned mode = get<0>(GetParam()); + const pattern &pat = get<1>(GetParam()); SCOPED_TRACE(mode); + SCOPED_TRACE(pat); hs_error_t err; - hs_database_t *db = buildDB("hatstand.*teakettle.*badgerbrush", - HS_FLAG_CASELESS, 1000, mode); + hs_database_t *db = buildDB(pat, mode); ASSERT_TRUE(db != nullptr) << "database build failed."; char *original_info = nullptr; err = hs_database_info(db, &original_info); ASSERT_EQ(HS_SUCCESS, err); - const char *mode_string = nullptr; - switch (mode) { - case HS_MODE_STREAM: - mode_string = "STREAM"; - break; - case HS_MODE_NOSTREAM: - mode_string = "BLOCK"; - break; - case HS_MODE_VECTORED: - mode_string = "VECTORED"; - break; - } + const char *mode_string = getModeString(mode); - ASSERT_NE(nullptr, original_info) << "hs_serialized_database_info returned null."; + ASSERT_NE(nullptr, original_info) + << "hs_serialized_database_info returned null."; ASSERT_STREQ("Version:", string(original_info).substr(0, 8).c_str()); - ASSERT_TRUE(strstr(original_info, mode_string) != nullptr); + ASSERT_TRUE(strstr(original_info, mode_string) != nullptr) + << "Original info \"" << original_info + << "\" does not contain " << mode_string; char *bytes = nullptr; size_t length = 0; @@ -138,35 +156,28 @@ TEST_P(Serializep, DeserializeFromAnyAlignment) { // Check that we can deserialize_at from a char array at any alignment and the // info is consistent -TEST_P(Serializep, DeserializeAtFromAnyAlignment) { - const unsigned mode = GetParam(); +TEST_P(SerializeP, DeserializeAtFromAnyAlignment) { + const unsigned mode = get<0>(GetParam()); + const pattern &pat = get<1>(GetParam()); SCOPED_TRACE(mode); + SCOPED_TRACE(pat); hs_error_t err; - hs_database_t *db = buildDB("hatstand.*teakettle.*badgerbrush", - HS_FLAG_CASELESS, 1000, mode); + hs_database_t *db = buildDB(pat, mode); ASSERT_TRUE(db != nullptr) << "database build failed."; char *original_info; err = hs_database_info(db, &original_info); ASSERT_EQ(HS_SUCCESS, err); - const char *mode_string = nullptr; - switch (mode) { - case HS_MODE_STREAM: - mode_string = "STREAM"; - break; - case HS_MODE_NOSTREAM: - mode_string = "BLOCK"; - break; - case HS_MODE_VECTORED: - mode_string = "VECTORED"; - break; - } + const char *mode_string = getModeString(mode); - ASSERT_NE(nullptr, original_info) << "hs_serialized_database_info returned null."; + ASSERT_NE(nullptr, original_info) + << "hs_serialized_database_info returned null."; ASSERT_STREQ("Version:", string(original_info).substr(0, 8).c_str()); - ASSERT_TRUE(strstr(original_info, mode_string) != nullptr); + ASSERT_TRUE(strstr(original_info, mode_string) != nullptr) + << "Original info \"" << original_info + << "\" does not contain " << mode_string; char *bytes = nullptr; size_t length = 0; @@ -226,8 +237,8 @@ TEST_P(Serializep, DeserializeAtFromAnyAlignment) { delete[] mem; } -INSTANTIATE_TEST_CASE_P(Serialize, Serializep, - ValuesIn(validModes)); +INSTANTIATE_TEST_CASE_P(Serialize, SerializeP, + Combine(ValuesIn(validModes), ValuesIn(testPatterns))); // Attempt to reproduce the scenario in UE-1946. TEST(Serialize, CrossCompileSom) { diff --git a/unit/hyperscan/test_util.cpp b/unit/hyperscan/test_util.cpp index 345b05d0..f3f6e610 100644 --- a/unit/hyperscan/test_util.cpp +++ b/unit/hyperscan/test_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -48,10 +48,15 @@ int record_cb(unsigned id, unsigned long long, unsigned long long to, return (int)c->halt; } -std::ostream &operator<< (std::ostream &o, const MatchRecord &m) { +std::ostream &operator<<(std::ostream &o, const MatchRecord &m) { return o << "[" << m.to << ", " << m.id << "]"; } +std::ostream &operator<<(std::ostream &o, const pattern &p) { + return o << "[" << "expr=\"" << p.expression << "\", flags=" << p.flags + << ", id=" << p.id << "]"; +} + hs_database_t *buildDB(const vector &patterns, unsigned int mode, hs_platform_info *plat) { vector expressions; diff --git a/unit/hyperscan/test_util.h b/unit/hyperscan/test_util.h index fad6137c..9b963529 100644 --- a/unit/hyperscan/test_util.h +++ b/unit/hyperscan/test_util.h @@ -53,7 +53,7 @@ struct MatchRecord { int id; }; -std::ostream &operator<< (std::ostream &o, const MatchRecord &m); +std::ostream &operator<<(std::ostream &o, const MatchRecord &m); struct CallBackContext { CallBackContext() : halt(false) {} @@ -79,22 +79,29 @@ int dummy_cb(unsigned, unsigned long long, unsigned long long, unsigned, struct pattern { std::string expression; - unsigned int flags; - unsigned int id; + unsigned int flags = 0; + unsigned int id = 0; hs_expr_ext ext; - pattern(const std::string &expression_in, unsigned int flags_in = 0, - unsigned int id_in = 0) : expression(expression_in), - flags(flags_in), id(id_in) { + // We need a default constructor for combining in parameterised tests. + pattern() { memset(&ext, 0, sizeof(ext)); } - pattern(const std::string &expression_in, unsigned int flags_in, - unsigned int id_in, const hs_expr_ext &ext_in) : - expression(expression_in), flags(flags_in), id(id_in), - ext(ext_in) { } + explicit pattern(std::string expression_in, + unsigned int flags_in = 0, unsigned int id_in = 0) + : expression(std::move(expression_in)), flags(flags_in), id(id_in) { + memset(&ext, 0, sizeof(ext)); + } + + pattern(std::string expression_in, unsigned int flags_in, + unsigned int id_in, hs_expr_ext ext_in) + : expression(std::move(expression_in)), flags(flags_in), id(id_in), + ext(std::move(ext_in)) {} }; +std::ostream &operator<<(std::ostream &o, const pattern &p); + hs_database_t *buildDB(const std::vector &patterns, unsigned int mode, hs_platform_info *plat = nullptr); hs_database_t *buildDB(const pattern &pat, unsigned int mode); From bc7da2807a054c92689b059e0615c444f27b055a Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 1 Mar 2017 16:55:24 +1100 Subject: [PATCH 147/326] unit: modernise test_util --- unit/hyperscan/test_util.cpp | 50 +++++++++++++++++------------------- unit/hyperscan/test_util.h | 9 +++---- 2 files changed, 27 insertions(+), 32 deletions(-) diff --git a/unit/hyperscan/test_util.cpp b/unit/hyperscan/test_util.cpp index f3f6e610..f6c20a74 100644 --- a/unit/hyperscan/test_util.cpp +++ b/unit/hyperscan/test_util.cpp @@ -26,24 +26,24 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include -#include -#include -#include - #include "hs.h" #include "test_util.h" #include "gtest/gtest.h" #include "util/expressions.h" #include "util/ExpressionParser.h" +#include +#include +#include +#include + using namespace std; int record_cb(unsigned id, unsigned long long, unsigned long long to, unsigned, void *ctxt) { CallBackContext *c = (CallBackContext *)ctxt; - c->matches.push_back(MatchRecord(to, id)); + c->matches.emplace_back(to, id); return (int)c->halt; } @@ -64,20 +64,20 @@ hs_database_t *buildDB(const vector &patterns, unsigned int mode, vector ids; vector ext; - for (vector::const_iterator it = patterns.begin(); - it != patterns.end(); ++it) { - expressions.push_back(it->expression.c_str()); - flags.push_back(it->flags); - ids.push_back(it->id); - ext.push_back(&it->ext); + for (const auto &pat : patterns) { + expressions.push_back(pat.expression.c_str()); + flags.push_back(pat.flags); + ids.push_back(pat.id); + ext.push_back(&pat.ext); } hs_database_t *db = nullptr; hs_compile_error_t *compile_err = nullptr; hs_error_t err; - err = hs_compile_ext_multi(&expressions[0], &flags[0], &ids[0], &ext[0], - patterns.size(), mode, plat, &db, &compile_err); + err = hs_compile_ext_multi(expressions.data(), flags.data(), ids.data(), + ext.data(), patterns.size(), mode, plat, &db, + &compile_err); if (err != HS_SUCCESS) { return nullptr; @@ -87,15 +87,13 @@ hs_database_t *buildDB(const vector &patterns, unsigned int mode, } hs_database_t *buildDB(const pattern &expr, unsigned int mode) { - return buildDB(vector(1, expr), mode); + return buildDB(vector({expr}), mode); } hs_database_t *buildDB(const char *expression, unsigned int flags, unsigned int id, unsigned int mode, hs_platform_info_t *plat) { - vector patterns; - patterns.push_back(pattern(expression, flags, id)); - return buildDB(patterns, mode, plat); + return buildDB({pattern(expression, flags, id)}, mode, plat); } hs_database_t *buildDB(const char *filename, unsigned int mode, @@ -104,16 +102,14 @@ hs_database_t *buildDB(const char *filename, unsigned int mode, ExpressionMap expressions; loadExpressionsFromFile(filename, expressions); - for (ExpressionMap::iterator it = expressions.begin(); - it != expressions.end(); ++it) { + for (const auto &expr : expressions) { unsigned int flags = 0; string regex; hs_expr_ext ext; - if (!readExpression(it->second, regex, &flags, &ext)) { + if (!readExpression(expr.second, regex, &flags, &ext)) { return nullptr; } - patterns.push_back(pattern(regex, flags | extra_flags, it->first, - ext)); + patterns.emplace_back(regex, flags | extra_flags, expr.first, ext); } return buildDB(patterns, mode); } @@ -150,13 +146,13 @@ hs_database_t *buildDB(const char *filename, unsigned int mode, ExpressionMap expressions; loadExpressionsFromFile(filename, expressions); - for (ExpressionMap::iterator it = expressions.begin(); - it != expressions.end(); ++it) { + for (const auto &expr : expressions) { unsigned int flags = 0; string regex; hs_expr_ext ext; bool must_be_ordered; - if (!readExpression(it->second, regex, &flags, &ext, &must_be_ordered)) { + if (!readExpression(expr.second, regex, &flags, &ext, + &must_be_ordered)) { return nullptr; } @@ -164,7 +160,7 @@ hs_database_t *buildDB(const char *filename, unsigned int mode, return nullptr; } - patterns.emplace_back(regex, flags, it->first, ext); + patterns.emplace_back(regex, flags, expr.first, ext); } return buildDB(patterns, mode); } diff --git a/unit/hyperscan/test_util.h b/unit/hyperscan/test_util.h index 9b963529..efa0570c 100644 --- a/unit/hyperscan/test_util.h +++ b/unit/hyperscan/test_util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,13 +29,13 @@ #ifndef TEST_UTIL_H #define TEST_UTIL_H +#include "hs.h" + #include #include #include #include -#include "hs.h" - #ifndef UNUSED #if defined(_WIN32) || defined(_WIN64) #define UNUSED @@ -56,8 +56,7 @@ struct MatchRecord { std::ostream &operator<<(std::ostream &o, const MatchRecord &m); struct CallBackContext { - CallBackContext() : halt(false) {} - bool halt; + bool halt = false; std::vector matches; void clear() { From d8eb259ac73c4a334c7794a2099c95abaf496a6d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 1 Mar 2017 17:12:45 +1100 Subject: [PATCH 148/326] serialize: tidy --- unit/hyperscan/serialize.cpp | 56 ++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/unit/hyperscan/serialize.cpp b/unit/hyperscan/serialize.cpp index 4248fa91..3b34abac 100644 --- a/unit/hyperscan/serialize.cpp +++ b/unit/hyperscan/serialize.cpp @@ -31,15 +31,15 @@ */ #include "config.h" -#include -#include -#include - #include "gtest/gtest.h" #include "hs.h" #include "hs_internal.h" #include "test_util.h" +#include +#include +#include + namespace { using namespace std; @@ -246,11 +246,10 @@ TEST(Serialize, CrossCompileSom) { plat.cpu_features = 0; plat.tune = HS_TUNE_FAMILY_GENERIC; - static const char *pattern = "hatstand.*(badgerbrush|teakettle)"; + static const char *pat = "hatstand.*(badgerbrush|teakettle)"; const unsigned mode = HS_MODE_STREAM | HS_MODE_SOM_HORIZON_LARGE; - hs_database_t *db = buildDB(pattern, HS_FLAG_SOM_LEFTMOST, 1000, mode, - &plat); + hs_database_t *db = buildDB(pat, HS_FLAG_SOM_LEFTMOST, 1000, mode, &plat); ASSERT_TRUE(db != nullptr) << "database build failed."; size_t db_len; @@ -295,15 +294,16 @@ static void misaligned_free(void *p) { free(c - 1); } -// make sure that serializing/deserializing to null or an unaligned address fails +// make sure that serializing/deserializing to null or an unaligned address +// fails TEST(Serialize, CompileNullMalloc) { hs_database_t *db; hs_compile_error_t *c_err; - static const char *pattern = "hatstand.*(badgerbrush|teakettle)"; + static const char *pat = "hatstand.*(badgerbrush|teakettle)"; // mallocing null should fail compile hs_set_allocator(null_malloc, nullptr); - hs_error_t err = hs_compile(pattern, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); + hs_error_t err = hs_compile(pat, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); ASSERT_NE(HS_SUCCESS, err); ASSERT_TRUE(db == nullptr); ASSERT_TRUE(c_err != nullptr); @@ -314,14 +314,14 @@ TEST(Serialize, CompileNullMalloc) { TEST(Serialize, CompileErrorAllocator) { hs_database_t *db; hs_compile_error_t *c_err; - static const char *pattern = "hatsta^nd.*(badgerbrush|teakettle)"; + static const char *pat = "hatsta^nd.*(badgerbrush|teakettle)"; // failing to compile should use the misc allocator allocated_count = 0; allocated_count_b = 0; hs_set_allocator(count_malloc_b, count_free_b); hs_set_misc_allocator(count_malloc, count_free); - hs_error_t err = hs_compile(pattern, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); + hs_error_t err = hs_compile(pat, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); ASSERT_NE(HS_SUCCESS, err); ASSERT_TRUE(db == nullptr); ASSERT_TRUE(c_err != nullptr); @@ -335,13 +335,13 @@ TEST(Serialize, CompileErrorAllocator) { TEST(Serialize, AllocatorsUsed) { hs_database_t *db; hs_compile_error_t *c_err; - static const char *pattern = "hatstand.*(badgerbrush|teakettle)"; + static const char *pat = "hatstand.*(badgerbrush|teakettle)"; allocated_count = 0; allocated_count_b = 0; hs_set_allocator(count_malloc_b, count_free_b); hs_set_database_allocator(count_malloc, count_free); - hs_error_t err = hs_compile(pattern, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); + hs_error_t err = hs_compile(pat, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(db != nullptr); ASSERT_TRUE(c_err == nullptr); @@ -364,15 +364,14 @@ TEST(Serialize, AllocatorsUsed) { ASSERT_EQ(0, allocated_count_b); } - TEST(Serialize, CompileUnalignedMalloc) { hs_database_t *db; hs_compile_error_t *c_err; - static const char *pattern = "hatstand.*(badgerbrush|teakettle)"; + static const char *pat = "hatstand.*(badgerbrush|teakettle)"; // unaligned malloc should fail compile hs_set_allocator(misaligned_malloc, misaligned_free); - hs_error_t err = hs_compile(pattern, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); + hs_error_t err = hs_compile(pat, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); ASSERT_NE(HS_SUCCESS, err); ASSERT_TRUE(db == nullptr); ASSERT_TRUE(c_err != nullptr); @@ -383,8 +382,8 @@ TEST(Serialize, CompileUnalignedMalloc) { TEST(Serialize, SerializeNullMalloc) { hs_database_t *db; hs_compile_error_t *c_err; - static const char *pattern = "hatstand.*(badgerbrush|teakettle)"; - hs_error_t err = hs_compile(pattern, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); + static const char *pat = "hatstand.*(badgerbrush|teakettle)"; + hs_error_t err = hs_compile(pat, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(db != nullptr); @@ -404,13 +403,14 @@ TEST(Serialize, SerializeNullMalloc) { hs_free_database(db); } -// make sure that serializing/deserializing to null or an unaligned address fails +// make sure that serializing/deserializing to null or an unaligned address +// fails TEST(Serialize, SerializeUnalignedMalloc) { hs_database_t *db; hs_compile_error_t *c_err; - static const char *pattern = "hatstand.*(badgerbrush|teakettle)"; + static const char *pat= "hatstand.*(badgerbrush|teakettle)"; - hs_error_t err = hs_compile(pattern, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); + hs_error_t err = hs_compile(pat, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(db != nullptr); @@ -434,9 +434,9 @@ TEST(Serialize, SerializeUnalignedMalloc) { TEST(Serialize, DeserializeNullMalloc) { hs_database_t *db; hs_compile_error_t *c_err; - static const char *pattern = "hatstand.*(badgerbrush|teakettle)"; + static const char *pat = "hatstand.*(badgerbrush|teakettle)"; - hs_error_t err = hs_compile(pattern, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); + hs_error_t err = hs_compile(pat, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(db != nullptr); @@ -467,9 +467,9 @@ TEST(Serialize, DeserializeNullMalloc) { TEST(Serialize, DeserializeUnalignedMalloc) { hs_database_t *db; hs_compile_error_t *c_err; - static const char *pattern = "hatstand.*(badgerbrush|teakettle)"; + static const char *pat = "hatstand.*(badgerbrush|teakettle)"; - hs_error_t err = hs_compile(pattern, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); + hs_error_t err = hs_compile(pat, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(db != nullptr); @@ -506,9 +506,9 @@ TEST(Serialize, DeserializeUnalignedMalloc) { TEST(Serialize, DeserializeGarbage) { hs_database_t *db; hs_compile_error_t *c_err; - static const char *pattern = "hatstand.*(badgerbrush|teakettle)"; + static const char *pat = "hatstand.*(badgerbrush|teakettle)"; - hs_error_t err = hs_compile(pattern, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); + hs_error_t err = hs_compile(pat, 0, HS_MODE_BLOCK, nullptr, &db, &c_err); ASSERT_EQ(HS_SUCCESS, err); ASSERT_TRUE(db != nullptr); From 037e39b6f5a68ee7ca1de4189591ccc355bae889 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 8 Mar 2017 10:04:20 +1100 Subject: [PATCH 149/326] ng_corpus_generator: stop using ptr_vector Nowadays we can use vector>. --- util/ng_corpus_generator.cpp | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/util/ng_corpus_generator.cpp b/util/ng_corpus_generator.cpp index 6d0de539..8692a323 100644 --- a/util/ng_corpus_generator.cpp +++ b/util/ng_corpus_generator.cpp @@ -48,15 +48,13 @@ #include #include +#include #include #include #include -#include - using namespace std; using namespace ue2; -using boost::ptr_vector; typedef vector VertexPath; @@ -139,8 +137,8 @@ void findPaths(const NGHolder &g, CorpusProperties &cProps, // limit will evict a random existing one. const size_t MAX_OPEN = min((size_t)1000, corpusLimit * 10); - ptr_vector open; - open.push_back(new VertexPath(1, g.start)); + vector> open; + open.push_back(ue2::make_unique({g.start})); ue2::unordered_set one_way_in; for (const auto &v : vertices_range(g)) { @@ -152,7 +150,8 @@ void findPaths(const NGHolder &g, CorpusProperties &cProps, while (!open.empty()) { u32 slot = cProps.rand(0, open.size() - 1); swap(open.at(slot), open.back()); - ptr_vector::auto_type p = open.pop_back(); + auto p = std::move(open.back()); + open.pop_back(); NFAVertex u = p->back(); DEBUG_PRINTF("dequeuing path %s, back %zu\n", @@ -194,19 +193,19 @@ void findPaths(const NGHolder &g, CorpusProperties &cProps, // If we've got no further adjacent vertices, re-use p rather than // copying it for the next path. - VertexPath *new_path; + unique_ptr new_path; if (boost::next(ai) == ae) { - new_path = p.release(); + new_path = std::move(p); } else { - new_path = new VertexPath(*p); + new_path = make_unique(*p); } new_path->push_back(v); if (open.size() < MAX_OPEN) { - open.push_back(new_path); + open.push_back(std::move(new_path)); } else { u32 victim = cProps.rand(0, open.size() - 1); - open.replace(victim, new_path); + open[victim] = std::move(new_path); } } } From 26ec7dd332bae9af2ddf1ae0b590cc49944ec588 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 8 Mar 2017 10:42:28 +1100 Subject: [PATCH 150/326] ng_corpus_generator: use std::next, not boost::next --- util/ng_corpus_generator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/ng_corpus_generator.cpp b/util/ng_corpus_generator.cpp index 8692a323..f2e8f7f9 100644 --- a/util/ng_corpus_generator.cpp +++ b/util/ng_corpus_generator.cpp @@ -194,7 +194,7 @@ void findPaths(const NGHolder &g, CorpusProperties &cProps, // If we've got no further adjacent vertices, re-use p rather than // copying it for the next path. unique_ptr new_path; - if (boost::next(ai) == ae) { + if (std::next(ai) == ae) { new_path = std::move(p); } else { new_path = make_unique(*p); From 448ce8a496d2577702bed5f12bcf6488e554e299 Mon Sep 17 00:00:00 2001 From: "Wang, Xiang W" Date: Thu, 9 Mar 2017 09:33:29 -0500 Subject: [PATCH 151/326] UE-3098: add unaligned load for andn without BMI --- src/fdr/fdr.c | 64 +++++++++++++++++++++++++-------------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c index edd12733..d5d40c38 100644 --- a/src/fdr/fdr.c +++ b/src/fdr/fdr.c @@ -117,16 +117,16 @@ const ALIGN_CL_DIRECTIVE u8 zone_or_mask[ITER_BYTES+1][ITER_BYTES] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }; -/* compilers don't reliably synthesize the ANDN instruction here, +/* compilers don't reliably synthesize the 32-bit ANDN instruction here, * so we force its generation. */ static really_inline -u64a andn(const u32 a, const u32 *b) { +u64a andn(const u32 a, const u8 *b) { u64a r; #if defined(__BMI__) - __asm__ ("andn\t%2,%1,%k0" : "=r"(r) : "r"(a), "m"(*b)); + __asm__ ("andn\t%2,%1,%k0" : "=r"(r) : "r"(a), "m"(*(const u32 *)b)); #else - r = *b & ~a; + r = unaligned_load_u32(b) & ~a; #endif return r; } @@ -158,20 +158,20 @@ void get_conf_stride_1(const u8 *itPtr, UNUSED const u8 *start_ptr, const u64a *ft, u64a *conf0, u64a *conf8, m128 *s) { /* +1: the zones ensure that we can read the byte at z->end */ assert(itPtr >= start_ptr && itPtr + ITER_BYTES <= end_ptr); - u64a reach0 = andn(domain_mask_flipped, (const u32 *)(itPtr)); - u64a reach1 = andn(domain_mask_flipped, (const u32 *)(itPtr + 1)); - u64a reach2 = andn(domain_mask_flipped, (const u32 *)(itPtr + 2)); - u64a reach3 = andn(domain_mask_flipped, (const u32 *)(itPtr + 3)); + u64a reach0 = andn(domain_mask_flipped, itPtr); + u64a reach1 = andn(domain_mask_flipped, itPtr + 1); + u64a reach2 = andn(domain_mask_flipped, itPtr + 2); + u64a reach3 = andn(domain_mask_flipped, itPtr + 3); m128 st0 = load_m128_from_u64a(ft + reach0); m128 st1 = load_m128_from_u64a(ft + reach1); m128 st2 = load_m128_from_u64a(ft + reach2); m128 st3 = load_m128_from_u64a(ft + reach3); - u64a reach4 = andn(domain_mask_flipped, (const u32 *)(itPtr + 4)); - u64a reach5 = andn(domain_mask_flipped, (const u32 *)(itPtr + 5)); - u64a reach6 = andn(domain_mask_flipped, (const u32 *)(itPtr + 6)); - u64a reach7 = andn(domain_mask_flipped, (const u32 *)(itPtr + 7)); + u64a reach4 = andn(domain_mask_flipped, itPtr + 4); + u64a reach5 = andn(domain_mask_flipped, itPtr + 5); + u64a reach6 = andn(domain_mask_flipped, itPtr + 6); + u64a reach7 = andn(domain_mask_flipped, itPtr + 7); m128 st4 = load_m128_from_u64a(ft + reach4); m128 st5 = load_m128_from_u64a(ft + reach5); @@ -199,20 +199,20 @@ void get_conf_stride_1(const u8 *itPtr, UNUSED const u8 *start_ptr, *s = rshiftbyte_m128(*s, 8); *conf0 ^= ~0ULL; - u64a reach8 = andn(domain_mask_flipped, (const u32 *)(itPtr + 8)); - u64a reach9 = andn(domain_mask_flipped, (const u32 *)(itPtr + 9)); - u64a reach10 = andn(domain_mask_flipped, (const u32 *)(itPtr + 10)); - u64a reach11 = andn(domain_mask_flipped, (const u32 *)(itPtr + 11)); + u64a reach8 = andn(domain_mask_flipped, itPtr + 8); + u64a reach9 = andn(domain_mask_flipped, itPtr + 9); + u64a reach10 = andn(domain_mask_flipped, itPtr + 10); + u64a reach11 = andn(domain_mask_flipped, itPtr + 11); m128 st8 = load_m128_from_u64a(ft + reach8); m128 st9 = load_m128_from_u64a(ft + reach9); m128 st10 = load_m128_from_u64a(ft + reach10); m128 st11 = load_m128_from_u64a(ft + reach11); - u64a reach12 = andn(domain_mask_flipped, (const u32 *)(itPtr + 12)); - u64a reach13 = andn(domain_mask_flipped, (const u32 *)(itPtr + 13)); - u64a reach14 = andn(domain_mask_flipped, (const u32 *)(itPtr + 14)); - u64a reach15 = andn(domain_mask_flipped, (const u32 *)(itPtr + 15)); + u64a reach12 = andn(domain_mask_flipped, itPtr + 12); + u64a reach13 = andn(domain_mask_flipped, itPtr + 13); + u64a reach14 = andn(domain_mask_flipped, itPtr + 14); + u64a reach15 = andn(domain_mask_flipped, itPtr + 15); m128 st12 = load_m128_from_u64a(ft + reach12); m128 st13 = load_m128_from_u64a(ft + reach13); @@ -246,20 +246,20 @@ void get_conf_stride_2(const u8 *itPtr, UNUSED const u8 *start_ptr, UNUSED const u8 *end_ptr, u32 domain_mask_flipped, const u64a *ft, u64a *conf0, u64a *conf8, m128 *s) { assert(itPtr >= start_ptr && itPtr + ITER_BYTES <= end_ptr); - u64a reach0 = andn(domain_mask_flipped, (const u32 *)itPtr); - u64a reach2 = andn(domain_mask_flipped, (const u32 *)(itPtr + 2)); - u64a reach4 = andn(domain_mask_flipped, (const u32 *)(itPtr + 4)); - u64a reach6 = andn(domain_mask_flipped, (const u32 *)(itPtr + 6)); + u64a reach0 = andn(domain_mask_flipped, itPtr); + u64a reach2 = andn(domain_mask_flipped, itPtr + 2); + u64a reach4 = andn(domain_mask_flipped, itPtr + 4); + u64a reach6 = andn(domain_mask_flipped, itPtr + 6); m128 st0 = load_m128_from_u64a(ft + reach0); m128 st2 = load_m128_from_u64a(ft + reach2); m128 st4 = load_m128_from_u64a(ft + reach4); m128 st6 = load_m128_from_u64a(ft + reach6); - u64a reach8 = andn(domain_mask_flipped, (const u32 *)(itPtr + 8)); - u64a reach10 = andn(domain_mask_flipped, (const u32 *)(itPtr + 10)); - u64a reach12 = andn(domain_mask_flipped, (const u32 *)(itPtr + 12)); - u64a reach14 = andn(domain_mask_flipped, (const u32 *)(itPtr + 14)); + u64a reach8 = andn(domain_mask_flipped, itPtr + 8); + u64a reach10 = andn(domain_mask_flipped, itPtr + 10); + u64a reach12 = andn(domain_mask_flipped, itPtr + 12); + u64a reach14 = andn(domain_mask_flipped, itPtr + 14); m128 st8 = load_m128_from_u64a(ft + reach8); m128 st10 = load_m128_from_u64a(ft + reach10); @@ -298,10 +298,10 @@ void get_conf_stride_4(const u8 *itPtr, UNUSED const u8 *start_ptr, UNUSED const u8 *end_ptr, u32 domain_mask_flipped, const u64a *ft, u64a *conf0, u64a *conf8, m128 *s) { assert(itPtr >= start_ptr && itPtr + ITER_BYTES <= end_ptr); - u64a reach0 = andn(domain_mask_flipped, (const u32 *)itPtr); - u64a reach4 = andn(domain_mask_flipped, (const u32 *)(itPtr + 4)); - u64a reach8 = andn(domain_mask_flipped, (const u32 *)(itPtr + 8)); - u64a reach12 = andn(domain_mask_flipped, (const u32 *)(itPtr + 12)); + u64a reach0 = andn(domain_mask_flipped, itPtr); + u64a reach4 = andn(domain_mask_flipped, itPtr + 4); + u64a reach8 = andn(domain_mask_flipped, itPtr + 8); + u64a reach12 = andn(domain_mask_flipped, itPtr + 12); m128 st0 = load_m128_from_u64a(ft + reach0); m128 st4 = load_m128_from_u64a(ft + reach4); From 3e597e85ffcebcb2d11b80282f7ace3ed98c7fcb Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 10 Mar 2017 09:42:57 +1100 Subject: [PATCH 152/326] ng_corpus_generator: restore use of boost::next() libc++ checks for forward_iterator_tag in std::next(), which isn't provided by our (Boost-derived) ue2_graph iterators. --- util/ng_corpus_generator.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/util/ng_corpus_generator.cpp b/util/ng_corpus_generator.cpp index f2e8f7f9..72c13e84 100644 --- a/util/ng_corpus_generator.cpp +++ b/util/ng_corpus_generator.cpp @@ -53,6 +53,8 @@ #include #include +#include + using namespace std; using namespace ue2; @@ -194,7 +196,7 @@ void findPaths(const NGHolder &g, CorpusProperties &cProps, // If we've got no further adjacent vertices, re-use p rather than // copying it for the next path. unique_ptr new_path; - if (std::next(ai) == ae) { + if (boost::next(ai) == ae) { new_path = std::move(p); } else { new_path = make_unique(*p); From 187a4b82c2bb1160da3697e2f4b85f98fbdd0df3 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 10 Mar 2017 13:43:31 +1100 Subject: [PATCH 153/326] small_vector: add header to handle older Boost This provides ue2::small_vector which will fall back to std::vector where the Boost version is too old or not present. --- CMakeLists.txt | 1 + src/util/small_vector.h | 63 +++++++++++++++++++++++++++++++++++++++ src/util/ue2_containers.h | 4 +-- 3 files changed, 66 insertions(+), 2 deletions(-) create mode 100644 src/util/small_vector.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 868d5e36..0fd3639f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1012,6 +1012,7 @@ SET (hs_SRCS src/util/report_manager.cpp src/util/report_manager.h src/util/simd_utils.h + src/util/small_vector.h src/util/target_info.cpp src/util/target_info.h src/util/ue2_containers.h diff --git a/src/util/small_vector.h b/src/util/small_vector.h new file mode 100644 index 00000000..c67ad562 --- /dev/null +++ b/src/util/small_vector.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UTIL_SMALL_VECTOR_H +#define UTIL_SMALL_VECTOR_H + +#include + +#include + +#if BOOST_VERSION >= 105800 +# define HAVE_BOOST_CONTAINER_SMALL_VECTOR +#endif + +#if defined(HAVE_BOOST_CONTAINER_SMALL_VECTOR) +# include +#endif + +namespace ue2 { + +#if defined(HAVE_BOOST_CONTAINER_SMALL_VECTOR) + +template > +using small_vector = boost::container::small_vector; + +#else + +// Boost version isn't new enough, fall back to just using std::vector. +template > +using small_vector = std::vector; + +#endif // HAVE_BOOST_CONTAINER_SMALL_VECTOR + +} // namespace ue2 + +#endif // UTIL_SMALL_VECTOR_H diff --git a/src/util/ue2_containers.h b/src/util/ue2_containers.h index ea8ff7bc..ef93b2d9 100644 --- a/src/util/ue2_containers.h +++ b/src/util/ue2_containers.h @@ -30,13 +30,13 @@ #define UTIL_UE2_CONTAINERS_H_ #include "ue2common.h" +#include "util/small_vector.h" #include #include #include #include -#include #include #include #include @@ -97,7 +97,7 @@ template class flat_base { protected: // Underlying storage is a small vector with local space for one element. - using storage_type = boost::container::small_vector; + using storage_type = small_vector; using storage_alloc_type = typename storage_type::allocator_type; // Putting our storage and comparator in a tuple allows us to make use of From 2d660ce4db2b9db597277b98baa5ab4795a79eab Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 10 Mar 2017 15:58:11 +1100 Subject: [PATCH 154/326] ng_corpus_generator: small fix for MSVC compat --- util/ng_corpus_generator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/ng_corpus_generator.cpp b/util/ng_corpus_generator.cpp index 72c13e84..9d75a7ad 100644 --- a/util/ng_corpus_generator.cpp +++ b/util/ng_corpus_generator.cpp @@ -140,7 +140,7 @@ void findPaths(const NGHolder &g, CorpusProperties &cProps, const size_t MAX_OPEN = min((size_t)1000, corpusLimit * 10); vector> open; - open.push_back(ue2::make_unique({g.start})); + open.push_back(ue2::make_unique(1, g.start)); ue2::unordered_set one_way_in; for (const auto &v : vertices_range(g)) { From 5005d5005047837b1f41fe485331482bba9ff98a Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 13 Mar 2017 11:49:19 +1100 Subject: [PATCH 155/326] limex_compile: don't repeatedly calc dominators --- src/nfa/limex_compile.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index c75eae59..e064420d 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -721,6 +721,7 @@ typedef vector> static u32 getEffectiveAccelStates(const build_info &args, + const unordered_map &dom_map, u32 active_accel_mask, const vector &accelStates) { /* accelStates is indexed by the acceleration bit index and contains a @@ -756,7 +757,6 @@ u32 getEffectiveAccelStates(const build_info &args, * so we may still require on earlier states to be accurately modelled. */ const NGHolder &h = args.h; - auto dom_map = findDominators(h); /* map from accel_id to mask of accel_ids that it is dominated by */ vector dominated_by(accelStates.size()); @@ -773,8 +773,8 @@ u32 getEffectiveAccelStates(const build_info &args, u32 accel_id = findAndClearLSB_32(&local_accel_mask); assert(accel_id < accelStates.size()); NFAVertex v = accelStates[accel_id].v; - while (dom_map[v]) { - v = dom_map[v]; + while (contains(dom_map, v) && dom_map.at(v)) { + v = dom_map.at(v); if (contains(accel_id_map, v)) { dominated_by[accel_id] |= 1U << accel_id_map[v]; } @@ -887,6 +887,8 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask, return; } + const auto dom_map = findDominators(args.h); + // We have 2^n different accel entries, one for each possible // combination of accelerable states. assert(accelStates.size() < 32); @@ -900,7 +902,8 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask, effective_accel_set.push_back(0); /* empty is effectively empty */ for (u32 i = 1; i < accelCount; i++) { - u32 effective_i = getEffectiveAccelStates(args, i, accelStates); + u32 effective_i = getEffectiveAccelStates(args, dom_map, i, + accelStates); effective_accel_set.push_back(effective_i); if (effective_i == IMPOSSIBLE_ACCEL_MASK) { From 8a6b38a9b526d9a2374f8b6e4c853b840e4a06d3 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 13 Mar 2017 15:18:12 +1100 Subject: [PATCH 156/326] ng_dominators: use a vector for doms internally --- src/nfagraph/ng_dominators.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/nfagraph/ng_dominators.cpp b/src/nfagraph/ng_dominators.cpp index d01af994..50536b76 100644 --- a/src/nfagraph/ng_dominators.cpp +++ b/src/nfagraph/ng_dominators.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -62,8 +62,8 @@ unordered_map calcDominators(const Graph &g, vector vertices_by_dfnum(num_verts, Graph::null_vertex()); // Output map. - unordered_map doms; - auto dom_map = make_assoc_property_map(doms); + vector doms(num_verts, Graph::null_vertex()); + auto dom_map = make_iterator_property_map(doms.begin(), index_map); boost_ue2::lengauer_tarjan_dominator_tree(g, source, index_map, dfnum_map, parent_map, vertices_by_dfnum, @@ -71,10 +71,12 @@ unordered_map calcDominators(const Graph &g, /* Translate back to an NFAVertex map */ unordered_map doms2; - for (const auto &e : doms) { - NFAVertex f(e.first); - NFAVertex s(e.second); - doms2[f] = s; + doms2.reserve(num_verts); + for (auto v : vertices_range(g)) { + auto dom_of_v = doms[g[v].index]; + if (dom_of_v) { + doms2.emplace(v, dom_of_v); + } } return doms2; } From b4ad7359d69d0cf18eb03f6d8fcf6252540e0d8d Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 15 Mar 2017 13:27:18 +1100 Subject: [PATCH 157/326] build: More robust temp file handling --- cmake/build_wrapper.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/cmake/build_wrapper.sh b/cmake/build_wrapper.sh index 5baf209b..756d70e8 100755 --- a/cmake/build_wrapper.sh +++ b/cmake/build_wrapper.sh @@ -1,13 +1,18 @@ #!/bin/sh -e # This is used for renaming symbols for the fat runtime, don't call directly # TODO: make this a lot less fragile! +cleanup () { + rm -f ${SYMSFILE} ${KEEPSYMS} +} + PREFIX=$1 KEEPSYMS_IN=$2 shift 2 BUILD=$@ OUT=$(echo $BUILD | sed 's/.* -o \(.*\.o\).*/\1/') -SYMSFILE=/tmp/${PREFIX}_rename.syms.$$ -KEEPSYMS=/tmp/keep.syms.$$ +trap cleanup INT QUIT EXIT +SYMSFILE=$(mktemp --tmpdir ${PREFIX}_rename.syms.XXXXX) +KEEPSYMS=$(mktemp --tmpdir keep.syms.XXXXX) # grab the command without the target obj or src file flags # we don't just call gcc directly as there may be flags modifying the arch CC_CMD=$(echo $BUILD | sed 's/ -o .*\.o//;s/ -c //;s/ .[^ ]*\.c//;') @@ -24,4 +29,3 @@ if test -s ${SYMSFILE} then objcopy --redefine-syms=${SYMSFILE} ${OUT} fi -rm -f ${SYMSFILE} ${KEEPSYMS} From aeba9bc42caedc2620cc59ed455002d0eb102e85 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 16 Mar 2017 12:51:24 +1100 Subject: [PATCH 158/326] rose_build_bytecode: make build_context, RoseBuildImpl params const --- src/rose/rose_build_bytecode.cpp | 74 +++++++++++++++++--------------- 1 file changed, 40 insertions(+), 34 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index bce26a10..e7859405 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -1713,7 +1713,7 @@ private: } static -aligned_unique_ptr buildOutfix(RoseBuildImpl &build, OutfixInfo &outfix) { +aligned_unique_ptr buildOutfix(const RoseBuildImpl &build, OutfixInfo &outfix) { assert(!outfix.is_dead()); // should not be marked dead. auto n = boost::apply_visitor(OutfixBuilder(build), outfix.proto); @@ -3179,8 +3179,8 @@ void makeLookaroundInstruction(build_context &bc, const vector &look, } static -void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v, - RoseProgram &program) { +void makeRoleLookaround(const RoseBuildImpl &build, build_context &bc, + RoseVertex v, RoseProgram &program) { if (!build.cc.grey.roseLookaroundMasks) { return; } @@ -3207,8 +3207,8 @@ void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v, } static -void makeRoleCheckLeftfix(RoseBuildImpl &build, build_context &bc, RoseVertex v, - RoseProgram &program) { +void makeRoleCheckLeftfix(const RoseBuildImpl &build, build_context &bc, + RoseVertex v, RoseProgram &program) { auto it = bc.leftfix_info.find(v); if (it == end(bc.leftfix_info)) { return; @@ -3238,7 +3238,7 @@ void makeRoleCheckLeftfix(RoseBuildImpl &build, build_context &bc, RoseVertex v, } static -void makeRoleAnchoredDelay(RoseBuildImpl &build, build_context &bc, +void makeRoleAnchoredDelay(const RoseBuildImpl &build, build_context &bc, RoseVertex v, RoseProgram &program) { // Only relevant for roles that can be triggered by the anchored table. if (!build.isAnchored(v)) { @@ -3277,7 +3277,7 @@ void makeDedupeSom(const RoseBuildImpl &build, const Report &report, } static -void makeCatchup(RoseBuildImpl &build, build_context &bc, +void makeCatchup(const RoseBuildImpl &build, const build_context &bc, const flat_set &reports, RoseProgram &program) { if (!bc.needs_catchup) { return; @@ -3300,8 +3300,8 @@ void makeCatchup(RoseBuildImpl &build, build_context &bc, } static -void makeCatchupMpv(RoseBuildImpl &build, build_context &bc, ReportID id, - RoseProgram &program) { +void makeCatchupMpv(const RoseBuildImpl &build, const build_context &bc, + ReportID id, RoseProgram &program) { if (!bc.needs_mpv_catchup) { return; } @@ -3526,8 +3526,8 @@ void makeReport(const RoseBuildImpl &build, const ReportID id, } static -void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v, - RoseProgram &program) { +void makeRoleReports(const RoseBuildImpl &build, const build_context &bc, + RoseVertex v, RoseProgram &program) { const auto &g = build.g; /* we are a suffaig - need to update role to provide som to the @@ -3556,8 +3556,8 @@ void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v, } static -void makeRoleSuffix(RoseBuildImpl &build, build_context &bc, RoseVertex v, - RoseProgram &program) { +void makeRoleSuffix(const RoseBuildImpl &build, const build_context &bc, + RoseVertex v, RoseProgram &program) { const auto &g = build.g; if (!g[v].suffix) { return; @@ -3591,7 +3591,7 @@ void makeRoleSuffix(RoseBuildImpl &build, build_context &bc, RoseVertex v, } static -void makeRoleGroups(RoseBuildImpl &build, ProgramBuild &prog_build, +void makeRoleGroups(const RoseBuildImpl &build, ProgramBuild &prog_build, RoseVertex v, RoseProgram &program) { const auto &g = build.g; rose_group groups = g[v].groups; @@ -3627,7 +3627,7 @@ void makeRoleGroups(RoseBuildImpl &build, ProgramBuild &prog_build, } static -void makeRoleInfixTriggers(RoseBuildImpl &build, build_context &bc, +void makeRoleInfixTriggers(const RoseBuildImpl &build, const build_context &bc, RoseVertex u, RoseProgram &program) { const auto &g = build.g; @@ -3758,7 +3758,7 @@ void makeRoleCheckNotHandled(ProgramBuild &prog_build, RoseVertex v, } static -void makeRoleEagerEodReports(RoseBuildImpl &build, build_context &bc, +void makeRoleEagerEodReports(const RoseBuildImpl &build, build_context &bc, RoseVertex v, RoseProgram &program) { RoseProgram eod_program; @@ -3786,7 +3786,7 @@ void makeRoleEagerEodReports(RoseBuildImpl &build, build_context &bc, } static -RoseProgram makeProgram(RoseBuildImpl &build, build_context &bc, +RoseProgram makeProgram(const RoseBuildImpl &build, build_context &bc, ProgramBuild &prog_build, const RoseEdge &e) { const RoseGraph &g = build.g; auto v = target(e, g); @@ -4041,7 +4041,8 @@ void addPredBlockSingle(u32 pred_state, RoseProgram &pred_block, } static -void addPredBlocksAny(build_context &bc, map &pred_blocks, +void addPredBlocksAny(const build_context &bc, + map &pred_blocks, RoseProgram &program) { RoseProgram sparse_program; @@ -4060,7 +4061,8 @@ void addPredBlocksAny(build_context &bc, map &pred_blocks, } static -void addPredBlocksMulti(build_context &bc, map &pred_blocks, +void addPredBlocksMulti(const build_context &bc, + map &pred_blocks, RoseProgram &program) { assert(!pred_blocks.empty()); @@ -4111,7 +4113,7 @@ void addPredBlocksMulti(build_context &bc, map &pred_blocks, } static -void addPredBlocks(build_context &bc, map &pred_blocks, +void addPredBlocks(const build_context &bc, map &pred_blocks, RoseProgram &program) { // Trim empty blocks, if any exist. for (auto it = pred_blocks.begin(); it != pred_blocks.end();) { @@ -4368,7 +4370,7 @@ void makeCheckLiteralInstruction(const RoseBuildImpl &build, } static -bool hasDelayedLiteral(RoseBuildImpl &build, +bool hasDelayedLiteral(const RoseBuildImpl &build, const vector &lit_edges) { auto is_delayed = bind(&RoseBuildImpl::isDelayed, &build, _1); for (const auto &e : lit_edges) { @@ -4382,8 +4384,9 @@ bool hasDelayedLiteral(RoseBuildImpl &build, } static -RoseProgram buildLitInitialProgram(RoseBuildImpl &build, build_context &bc, - ProgramBuild &prog_build, u32 lit_id, +RoseProgram buildLitInitialProgram(const RoseBuildImpl &build, + build_context &bc, ProgramBuild &prog_build, + u32 lit_id, const vector &lit_edges) { RoseProgram program; @@ -4410,7 +4413,7 @@ RoseProgram buildLitInitialProgram(RoseBuildImpl &build, build_context &bc, } static -RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, +RoseProgram buildLiteralProgram(const RoseBuildImpl &build, build_context &bc, ProgramBuild &prog_build, u32 lit_id, const vector &lit_edges, bool is_anchored_program) { @@ -4507,7 +4510,7 @@ RoseProgram assembleProgramBlocks(vector &&blocks) { } static -u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc, +u32 writeLiteralProgram(const RoseBuildImpl &build, build_context &bc, ProgramBuild &prog_build, const flat_set &lit_ids, const map> &lit_edge_map, bool is_anchored_program) { @@ -4540,7 +4543,7 @@ u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc, } static -u32 writeDelayRebuildProgram(RoseBuildImpl &build, build_context &bc, +u32 writeDelayRebuildProgram(const RoseBuildImpl &build, build_context &bc, ProgramBuild &prog_build, const flat_set &lit_ids) { assert(!lit_ids.empty()); @@ -4750,7 +4753,7 @@ void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc, * programs. */ static -pair writeDelayPrograms(RoseBuildImpl &build, build_context &bc, +pair writeDelayPrograms(const RoseBuildImpl &build, build_context &bc, ProgramBuild &prog_build) { auto lit_edge_map = findEdgesByLiteral(build); @@ -4797,7 +4800,8 @@ pair writeDelayPrograms(RoseBuildImpl &build, build_context &bc, * programs. */ static -pair writeAnchoredPrograms(RoseBuildImpl &build, build_context &bc, +pair writeAnchoredPrograms(const RoseBuildImpl &build, + build_context &bc, ProgramBuild &prog_build) { auto lit_edge_map = findEdgesByLiteral(build); @@ -4876,7 +4880,8 @@ set findEngineReports(const RoseBuildImpl &build) { } static -pair buildReportPrograms(RoseBuildImpl &build, build_context &bc) { +pair buildReportPrograms(const RoseBuildImpl &build, + build_context &bc) { const auto reports = findEngineReports(build); vector programs; programs.reserve(reports.size()); @@ -4900,7 +4905,8 @@ pair buildReportPrograms(RoseBuildImpl &build, build_context &bc) { } static -RoseProgram makeEodAnchorProgram(RoseBuildImpl &build, build_context &bc, +RoseProgram makeEodAnchorProgram(const RoseBuildImpl &build, + const build_context &bc, ProgramBuild &prog_build, const RoseEdge &e, const bool multiple_preds) { const RoseGraph &g = build.g; @@ -4956,7 +4962,7 @@ bool hasEodMatcher(const RoseBuildImpl &build) { } static -void addEodAnchorProgram(RoseBuildImpl &build, build_context &bc, +void addEodAnchorProgram(const RoseBuildImpl &build, const build_context &bc, ProgramBuild &prog_build, bool in_etable, RoseProgram &program) { const RoseGraph &g = build.g; @@ -5001,7 +5007,7 @@ void addEodAnchorProgram(RoseBuildImpl &build, build_context &bc, } static -void addEodEventProgram(RoseBuildImpl &build, build_context &bc, +void addEodEventProgram(const RoseBuildImpl &build, build_context &bc, ProgramBuild &prog_build, RoseProgram &program) { if (build.eod_event_literal_id == MO_INVALID_IDX) { return; @@ -5066,7 +5072,7 @@ void addMatcherEodProgram(const RoseBuildImpl &build, RoseProgram &program) { } static -u32 writeEodProgram(RoseBuildImpl &build, build_context &bc, +u32 writeEodProgram(const RoseBuildImpl &build, build_context &bc, ProgramBuild &prog_build, u32 eodNfaIterOffset) { RoseProgram program; @@ -5205,7 +5211,7 @@ u32 writeEagerQueueIter(const set &eager, u32 leftfixBeginQueue, } static -aligned_unique_ptr addSmallWriteEngine(RoseBuildImpl &build, +aligned_unique_ptr addSmallWriteEngine(const RoseBuildImpl &build, aligned_unique_ptr rose) { assert(rose); From de52b30c3ed0c41c38f8d977a5526ca2cdc45384 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 16 Mar 2017 15:30:33 +1100 Subject: [PATCH 159/326] make rose responsible for dumping its bytecode --- src/compiler/compiler.cpp | 2 -- src/rose/rose_build_bytecode.cpp | 4 +++ src/rose/rose_build_dump.cpp | 56 +++++++------------------------- src/rose/rose_build_dump.h | 24 ++++++++++---- src/rose/rose_build_matchers.cpp | 33 ++++++++++++++++++- src/rose/rose_build_matchers.h | 32 +----------------- 6 files changed, 65 insertions(+), 86 deletions(-) diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index d59c5cc6..56ed5f41 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -55,7 +55,6 @@ #include "parser/unsupported.h" #include "parser/utf8_validate.h" #include "rose/rose_build.h" -#include "rose/rose_build_dump.h" #include "som/slot_manager_dump.h" #include "util/alloc.h" #include "util/compile_error.h" @@ -310,7 +309,6 @@ aligned_unique_ptr generateRoseEngine(NG &ng) { return nullptr; } - dumpRose(*ng.rose, rose.get(), ng.cc.grey); dumpReportManager(ng.rm, ng.cc.grey); dumpSomSlotManager(ng.ssm, ng.cc.grey); dumpSmallWrite(rose.get(), ng.cc.grey); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index e7859405..32a1d075 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -33,6 +33,7 @@ #include "hs_compile.h" // for HS_MODE_* #include "rose_build_add_internal.h" #include "rose_build_anchored.h" +#include "rose_build_dump.h" #include "rose_build_engine_blob.h" #include "rose_build_exclusive.h" #include "rose_build_groups.h" @@ -5582,6 +5583,9 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine = addSmallWriteEngine(*this, move(engine)); DEBUG_PRINTF("rose done %p\n", engine.get()); + + dumpRose(*this, engine.get()); + return engine; } diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index a13fc964..0d05e8ac 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -275,10 +275,8 @@ private: } // namespace -void dumpRoseGraph(const RoseBuild &build_base, const RoseEngine *t, +void dumpRoseGraph(const RoseBuildImpl &build, const RoseEngine *t, const char *filename) { - const RoseBuildImpl &build = dynamic_cast(build_base); - const Grey &grey = build.cc.grey; /* "early" rose graphs should only be dumped if we are dumping intermediate @@ -497,9 +495,13 @@ string toRegex(const string &lit) { return os.str(); } -static -void dumpTestLiterals(const string &filename, const vector &lits) { - ofstream of(filename.c_str()); +void dumpMatcherLiterals(const vector &lits, const string &name, + const Grey &grey) { + if (!grey.dumpFlags) { + return; + } + + ofstream of(grey.dumpPath + "rose_" + name + "_test_literals.txt"); // Unique regex index, as literals may share an ID. u32 i = 0; @@ -528,40 +530,6 @@ void dumpTestLiterals(const string &filename, const vector &lits) { of.close(); } -static -void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) { - size_t historyRequired = build.calcHistoryRequired(); - size_t longLitLengthThreshold = - calcLongLitThreshold(build, historyRequired); - - auto mp = - makeMatcherProto(build, ROSE_ANCHORED, false, longLitLengthThreshold); - dumpTestLiterals(base + "rose_anchored_test_literals.txt", mp.lits); - - mp = makeMatcherProto(build, ROSE_FLOATING, false, longLitLengthThreshold); - dumpTestLiterals(base + "rose_float_test_literals.txt", mp.lits); - - if (build.cc.streaming) { - mp = makeMatcherProto(build, ROSE_FLOATING, true, - longLitLengthThreshold); - dumpTestLiterals(base + "rose_delay_rebuild_test_literals.txt", - mp.lits); - } - - mp = makeMatcherProto(build, ROSE_EOD_ANCHORED, false, - build.ematcher_region_size); - dumpTestLiterals(base + "rose_eod_test_literals.txt", mp.lits); - - if (!build.cc.streaming) { - mp = makeMatcherProto(build, ROSE_FLOATING, false, ROSE_SMALL_BLOCK_LEN, - ROSE_SMALL_BLOCK_LEN); - auto mp2 = makeMatcherProto(build, ROSE_ANCHORED_SMALL_BLOCK, false, - ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); - mp.lits.insert(end(mp.lits), begin(mp2.lits), end(mp2.lits)); - dumpTestLiterals(base + "rose_smallblock_test_literals.txt", mp.lits); - } -} - static const void *loadFromByteCodeOffset(const RoseEngine *t, u32 offset) { if (!offset) { @@ -1894,14 +1862,13 @@ void roseDumpPrograms(const RoseBuildImpl &build, const RoseEngine *t, dumpRoseDelayPrograms(t, base + "/rose_delay_programs.txt"); } -void dumpRose(const RoseBuild &build_base, const RoseEngine *t, - const Grey &grey) { +void dumpRose(const RoseBuildImpl &build, const RoseEngine *t) { + const Grey &grey = build.cc.grey; + if (!grey.dumpFlags) { return; } - const RoseBuildImpl &build = dynamic_cast(build_base); - stringstream ss; ss << grey.dumpPath << "rose.txt"; @@ -1929,7 +1896,6 @@ void dumpRose(const RoseBuild &build_base, const RoseEngine *t, ss.clear(); ss << grey.dumpPath << "rose_literals.txt"; dumpRoseLiterals(build, ss.str().c_str()); - dumpRoseTestLiterals(build, grey.dumpPath); f = fopen((grey.dumpPath + "/rose_struct.txt").c_str(), "w"); roseDumpStructRaw(t, f); diff --git a/src/rose/rose_build_dump.h b/src/rose/rose_build_dump.h index 28e9f53a..601f5914 100644 --- a/src/rose/rose_build_dump.h +++ b/src/rose/rose_build_dump.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,30 +29,40 @@ #ifndef ROSE_BUILD_DUMP_H #define ROSE_BUILD_DUMP_H +#include +#include + struct RoseEngine; namespace ue2 { -class RoseBuild; +class RoseBuildImpl; struct Grey; +struct hwlmLiteral; #ifdef DUMP_SUPPORT // Dump the Rose graph in graphviz representation. -void dumpRoseGraph(const RoseBuild &build, const RoseEngine *t, +void dumpRoseGraph(const RoseBuildImpl &build, const RoseEngine *t, const char *filename); -void dumpRose(const RoseBuild &build_base, const RoseEngine *t, - const Grey &grey); +void dumpRose(const RoseBuildImpl &build, const RoseEngine *t); + +void dumpMatcherLiterals(const std::vector &lits, + const std::string &name, const Grey &grey); #else static UNUSED -void dumpRoseGraph(const RoseBuild &, const RoseEngine *, const char *) { +void dumpRoseGraph(const RoseBuildImpl &, const RoseEngine *, const char *) { } static UNUSED -void dumpRose(const RoseBuild &, const RoseEngine *, const Grey &) { +void dumpRose(const RoseBuildImpl &, const RoseEngine *) { } +static UNUSED +void dumpMatcherLiterals(const std::vector &, const std::string &, + const Grey &) { +} #endif } // namespace ue2 diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index 50e48a5b..5625437b 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -33,6 +33,7 @@ #include "rose_build_matchers.h" +#include "rose_build_dump.h" #include "rose_build_impl.h" #include "rose_build_lit_accel.h" #include "rose_build_width.h" @@ -645,9 +646,35 @@ void trim_to_suffix(Container &c, size_t len) { c.erase(c.begin(), c.begin() + suffix_len); } +namespace { + +/** \brief Prototype for literal matcher construction. */ +struct MatcherProto { + /** \brief Literal fragments used to construct the literal matcher. */ + vector lits; + + /** \brief Longer literals used for acceleration analysis. */ + vector accel_lits; + + /** \brief The history required by the literal matcher. */ + size_t history_required = 0; + + /** \brief Insert the contents of another MatcherProto. */ + void insert(const MatcherProto &a); +}; +} + +/** + * \brief Build up a vector of literals (and associated other data) for the + * given table. + * + * If max_offset is specified (and not ROSE_BOUND_INF), then literals that can + * only lead to a pattern match after max_offset may be excluded. + */ +static MatcherProto makeMatcherProto(const RoseBuildImpl &build, rose_literal_table table, bool delay_rebuild, - size_t max_len, u32 max_offset) { + size_t max_len, u32 max_offset = ROSE_BOUND_INF) { MatcherProto mp; if (delay_rebuild) { @@ -794,6 +821,7 @@ buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, DEBUG_PRINTF("empty floating matcher\n"); return nullptr; } + dumpMatcherLiterals(mp.lits, "floating", build.cc.grey); for (const hwlmLiteral &lit : mp.lits) { *fgroups |= lit.groups; @@ -834,6 +862,7 @@ aligned_unique_ptr buildDelayRebuildMatcher(const RoseBuildImpl &build, DEBUG_PRINTF("empty delay rebuild matcher\n"); return nullptr; } + dumpMatcherLiterals(mp.lits, "delay_rebuild", build.cc.grey); auto hwlm = hwlmBuild(mp.lits, false, build.cc, build.getInitialGroups()); if (!hwlm) { @@ -883,6 +912,7 @@ aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, } mp.insert(mp_anchored); + dumpMatcherLiterals(mp.lits, "smallblock", build.cc.grey); // None of our literals should be longer than the small block limit. assert(all_of(begin(mp.lits), end(mp.lits), [](const hwlmLiteral &lit) { @@ -919,6 +949,7 @@ aligned_unique_ptr buildEodAnchoredMatcher(const RoseBuildImpl &build, assert(!build.ematcher_region_size); return nullptr; } + dumpMatcherLiterals(mp.lits, "eod", build.cc.grey); assert(build.ematcher_region_size); diff --git a/src/rose/rose_build_matchers.h b/src/rose/rose_build_matchers.h index 184c2633..cb56037d 100644 --- a/src/rose/rose_build_matchers.h +++ b/src/rose/rose_build_matchers.h @@ -35,42 +35,12 @@ #define ROSE_BUILD_MATCHERS_H #include "rose_build_impl.h" -#include "rose_build_lit_accel.h" -#include "hwlm/hwlm_literal.h" - -#include -#include +struct Grey; struct HWLM; namespace ue2 { -/** \brief Prototype for literal matcher construction. */ -struct MatcherProto { - /** \brief Literal fragments used to construct the literal matcher. */ - std::vector lits; - - /** \brief Longer literals used for acceleration analysis. */ - std::vector accel_lits; - - /** \brief The history required by the literal matcher. */ - size_t history_required = 0; - - /** \brief Insert the contents of another MatcherProto. */ - void insert(const MatcherProto &a); -}; - -/** - * \brief Build up a vector of literals (and associated other data) for the - * given table. - * - * If max_offset is specified (and not ROSE_BOUND_INF), then literals that can - * only lead to a pattern match after max_offset may be excluded. - */ -MatcherProto makeMatcherProto(const RoseBuildImpl &build, - rose_literal_table table, bool delay_rebuild, - size_t max_len, u32 max_offset = ROSE_BOUND_INF); - aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, rose_group *fgroups, From 01b91da1cd1ae9e9ab46999b1d549836062887ee Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Thu, 16 Mar 2017 16:30:01 +1100 Subject: [PATCH 160/326] Use the default allocator for std::vector --- src/util/small_vector.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/util/small_vector.h b/src/util/small_vector.h index c67ad562..0b60d8c0 100644 --- a/src/util/small_vector.h +++ b/src/util/small_vector.h @@ -52,8 +52,7 @@ using small_vector = boost::container::small_vector; #else // Boost version isn't new enough, fall back to just using std::vector. -template > +template > using small_vector = std::vector; #endif // HAVE_BOOST_CONTAINER_SMALL_VECTOR From 287f519205308b07017ac147604f1a87d0b1a526 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Thu, 16 Mar 2017 16:58:42 +1100 Subject: [PATCH 161/326] cmake: look for Boost in-tree first During development we tend to use a symlink to the most recent version of Boost from the Hyperscan source tree, so look there first. Also, unset some cached results so we recheck when the Boost version changes. --- CMakeLists.txt | 15 +++++++++------ cmake/boost.cmake | 3 +++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0fd3639f..0776e42c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,18 +72,21 @@ else () endif () set(BOOST_NO_BOOST_CMAKE ON) -# first check for Boost installed on the system -find_package(Boost ${BOOST_MINVERSION}) +unset(Boost_INCLUDE_DIR CACHE) +# we might have boost in tree, so provide a hint and try again +set(BOOST_INCLUDEDIR "${PROJECT_SOURCE_DIR}/include") +find_package(Boost ${BOOST_MINVERSION} QUIET) if(NOT Boost_FOUND) - # we might have boost in tree, so provide a hint and try again - message(STATUS "trying include dir for boost") - set(BOOST_INCLUDEDIR "${PROJECT_SOURCE_DIR}/include") - find_package(Boost ${BOOST_MINVERSION}) + # otherwise check for Boost installed on the system + unset(BOOST_INCLUDEDIR) + find_package(Boost ${BOOST_MINVERSION} QUIET) if(NOT Boost_FOUND) message(FATAL_ERROR "Boost ${BOOST_MINVERSION} or later not found. Either install system packages if available, extract Boost headers to ${CMAKE_SOURCE_DIR}/include, or set the CMake BOOST_ROOT variable.") endif() endif() +message(STATUS "Boost version: ${Boost_MAJOR_VERSION}.${Boost_MINOR_VERSION}.${Boost_SUBMINOR_VERSION}") + include (${CMAKE_MODULE_PATH}/boost.cmake) # -- make this work? set(python_ADDITIONAL_VERSIONS 2.7 2.6) diff --git a/cmake/boost.cmake b/cmake/boost.cmake index 3d513deb..b7fda421 100644 --- a/cmake/boost.cmake +++ b/cmake/boost.cmake @@ -38,4 +38,7 @@ ${BOOST_REV_TEST}" BOOST_REVGRAPH_PATCH) endif() unset (CMAKE_REQUIRED_INCLUDES) +else () + unset(BOOST_REVGRAPH_OK CACHE) + unset(BOOST_REVGRAPH_PATCH CACHE) endif () # Boost 1.62.0 From b68e4ea336613b9f1251dbe466df712c5d479428 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Fri, 17 Mar 2017 09:25:03 +1100 Subject: [PATCH 162/326] cmake: move boost tests --- CMakeLists.txt | 28 ---------------------------- cmake/boost.cmake | 30 ++++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 28 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0776e42c..d9bf171a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,34 +59,6 @@ include_directories(${PROJECT_SOURCE_DIR}/src) include_directories(${PROJECT_BINARY_DIR}) include_directories(SYSTEM include) -set(BOOST_USE_STATIC_LIBS OFF) -set(BOOST_USE_MULTITHREADED OFF) -set(BOOST_USE_STATIC_RUNTIME OFF) -if (CMAKE_SYSTEM_NAME MATCHES "Darwin" - OR (CMAKE_SYSTEM_NAME MATCHES "FreeBSD" - AND CMAKE_C_COMPILER_ID MATCHES "Clang")) - # we need a more recent boost for libc++ used by clang on OSX and FreeBSD - set(BOOST_MINVERSION 1.61.0) -else () - set(BOOST_MINVERSION 1.57.0) -endif () -set(BOOST_NO_BOOST_CMAKE ON) - -unset(Boost_INCLUDE_DIR CACHE) -# we might have boost in tree, so provide a hint and try again -set(BOOST_INCLUDEDIR "${PROJECT_SOURCE_DIR}/include") -find_package(Boost ${BOOST_MINVERSION} QUIET) -if(NOT Boost_FOUND) - # otherwise check for Boost installed on the system - unset(BOOST_INCLUDEDIR) - find_package(Boost ${BOOST_MINVERSION} QUIET) - if(NOT Boost_FOUND) - message(FATAL_ERROR "Boost ${BOOST_MINVERSION} or later not found. Either install system packages if available, extract Boost headers to ${CMAKE_SOURCE_DIR}/include, or set the CMake BOOST_ROOT variable.") - endif() -endif() - -message(STATUS "Boost version: ${Boost_MAJOR_VERSION}.${Boost_MINOR_VERSION}.${Boost_SUBMINOR_VERSION}") - include (${CMAKE_MODULE_PATH}/boost.cmake) # -- make this work? set(python_ADDITIONAL_VERSIONS 2.7 2.6) diff --git a/cmake/boost.cmake b/cmake/boost.cmake index b7fda421..ff4e2f94 100644 --- a/cmake/boost.cmake +++ b/cmake/boost.cmake @@ -1,3 +1,33 @@ +# Various checks related to Boost + +set(BOOST_USE_STATIC_LIBS OFF) +set(BOOST_USE_MULTITHREADED OFF) +set(BOOST_USE_STATIC_RUNTIME OFF) +if (CMAKE_SYSTEM_NAME MATCHES "Darwin" + OR (CMAKE_SYSTEM_NAME MATCHES "FreeBSD" + AND CMAKE_C_COMPILER_ID MATCHES "Clang")) + # we need a more recent boost for libc++ used by clang on OSX and FreeBSD + set(BOOST_MINVERSION 1.61.0) +else () + set(BOOST_MINVERSION 1.57.0) +endif () +set(BOOST_NO_BOOST_CMAKE ON) + +unset(Boost_INCLUDE_DIR CACHE) +# we might have boost in tree, so provide a hint and try again +set(BOOST_INCLUDEDIR "${PROJECT_SOURCE_DIR}/include") +find_package(Boost ${BOOST_MINVERSION} QUIET) +if(NOT Boost_FOUND) + # otherwise check for Boost installed on the system + unset(BOOST_INCLUDEDIR) + find_package(Boost ${BOOST_MINVERSION} QUIET) + if(NOT Boost_FOUND) + message(FATAL_ERROR "Boost ${BOOST_MINVERSION} or later not found. Either install system packages if available, extract Boost headers to ${CMAKE_SOURCE_DIR}/include, or set the CMake BOOST_ROOT variable.") + endif() +endif() + +message(STATUS "Boost version: ${Boost_MAJOR_VERSION}.${Boost_MINOR_VERSION}.${Boost_SUBMINOR_VERSION}") + # Boost 1.62 has a bug that we've patched around, check if it is required if (Boost_VERSION EQUAL 106200) set (CMAKE_REQUIRED_INCLUDES ${BOOST_INCLUDEDIR} "${PROJECT_SOURCE_DIR}/include") From 11aac0785503a35d9b9906f29a616a7e6606ceb2 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 15 Mar 2017 16:49:05 +1100 Subject: [PATCH 163/326] cmake: override gcc's tuning guess --- CMakeLists.txt | 49 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d9bf171a..f71eef39 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -173,6 +173,35 @@ else() string(REGEX REPLACE "-O[^ ]*" "" CMAKE_CXX_FLAGS_${CONFIG} "${CMAKE_CXX_FLAGS_${CONFIG}}") endforeach () + if (CMAKE_COMPILER_IS_GNUCC) + # If gcc doesn't recognise the host cpu, then mtune=native becomes + # generic, which isn't very good in some cases. march=native looks at + # cpuid info and then chooses the best microarch it can (and replaces + # the flag), so use that for tune. + + # arg1 might exist if using ccache + string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1) + set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -march=native -mtune=native) + execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} + OUTPUT_VARIABLE _GCC_OUTPUT) + string(REGEX REPLACE ".*march=[ \t]*([^ \n]*)[ \n].*" "\\1" + GNUCC_ARCH "${_GCC_OUTPUT}") + + # test the parsed flag + set (EXEC_ARGS ${CC_ARG1} -E - -mtune=${GNUCC_ARCH}) + execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS} + OUTPUT_QUIET ERROR_QUIET + INPUT_FILE /dev/null + RESULT_VARIABLE GNUCC_TUNE_TEST) + if (NOT GNUCC_TUNE_TEST EQUAL 0) + message(SEND_ERROR "Something went wrong determining gcc tune: -mtune=${GNUCC_ARCH} not valid") + endif() + set(TUNE_FLAG ${GNUCC_ARCH}) + else () + set(TUNE_FLAG native) + endif() + + if(OPTIMISE) set(OPT_C_FLAG "-O3") set(OPT_CXX_FLAG "-O2") @@ -197,12 +226,12 @@ else() set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -DNDEBUG") endif() - if (NOT CMAKE_C_FLAGS MATCHES .*march.*) - set(ARCH_C_FLAGS "${ARCH_C_FLAGS} -march=native -mtune=native") + if (NOT CMAKE_C_FLAGS MATCHES .*march.* AND NOT CMAKE_C_FLAGS MATCHES .*mtune.*) + set(ARCH_C_FLAGS "-march=native -mtune=${TUNE_FLAG}") endif() - if (NOT CMAKE_CXX_FLAGS MATCHES .*march.*) - set(ARCH_CXX_FLAGS "${ARCH_CXX_FLAGS} -march=native -mtune=native") + if (NOT CMAKE_CXX_FLAGS MATCHES .*march.* AND NOT CMAKE_CXX_FLAGS MATCHES .*mtune.*) + set(ARCH_CXX_FLAGS "-march=native -mtune=${TUNE_FLAG}") endif() if(CMAKE_COMPILER_IS_GNUCC) @@ -385,13 +414,13 @@ endif() endif() if (NOT FAT_RUNTIME) -message(STATUS "Building for current host CPU") -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARCH_C_FLAGS}") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_CXX_FLAGS}") + message(STATUS "Building for current host CPU: ${ARCH_C_FLAGS}") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARCH_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_CXX_FLAGS}") else() -message(STATUS "Building runtime for multiple microarchitectures") -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + message(STATUS "Building runtime for multiple microarchitectures") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") endif() add_subdirectory(util) From 5daa191313d8886c5d93ff28a323728108bf7aee Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Fri, 17 Mar 2017 09:11:53 +1100 Subject: [PATCH 164/326] cmake: simplify g++ version check --- CMakeLists.txt | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f71eef39..1ba4fe65 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -151,22 +151,6 @@ if(MSVC OR MSVC_IDE) else() - # compiler version checks TODO: test more compilers - if (CMAKE_COMPILER_IS_GNUCXX) - set (GNUCXX_MINVER "4.8.1") - exec_program(${CMAKE_CXX_COMPILER} - ARGS ${CMAKE_CXX_COMPILER_ARG1} --version - OUTPUT_VARIABLE _GXX_OUTPUT) - # is the following too fragile? - string(REGEX REPLACE ".* ([0-9]\\.[0-9](\\.[0-9])?)( |\n).*" "\\1" - GNUCXX_VERSION "${_GXX_OUTPUT}") - message(STATUS "g++ version ${GNUCXX_VERSION}") - if (GNUCXX_VERSION VERSION_LESS ${GNUCXX_MINVER}) - message(FATAL_ERROR "A minimum of g++ ${GNUCXX_MINVER} is required for C++11 support") - endif() - unset(_GXX_OUTPUT) - endif() - # remove CMake's idea of optimisation foreach (CONFIG ${CMAKE_BUILD_TYPE} ${CMAKE_CONFIGURATION_TYPES}) string(REGEX REPLACE "-O[^ ]*" "" CMAKE_C_FLAGS_${CONFIG} "${CMAKE_C_FLAGS_${CONFIG}}") @@ -174,6 +158,7 @@ else() endforeach () if (CMAKE_COMPILER_IS_GNUCC) + message(STATUS "gcc version ${CMAKE_C_COMPILER_VERSION}") # If gcc doesn't recognise the host cpu, then mtune=native becomes # generic, which isn't very good in some cases. march=native looks at # cpuid info and then chooses the best microarch it can (and replaces @@ -201,6 +186,14 @@ else() set(TUNE_FLAG native) endif() + # compiler version checks TODO: test more compilers + if (CMAKE_COMPILER_IS_GNUCXX) + set(GNUCXX_MINVER "4.8.1") + message(STATUS "g++ version ${CMAKE_CXX_COMPILER_VERSION}") + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS GNUCXX_MINVER) + message(FATAL_ERROR "A minimum of g++ ${GNUCXX_MINVER} is required for C++11 support") + endif() + endif() if(OPTIMISE) set(OPT_C_FLAG "-O3") From ce50ff611910369a5be3bcb223ffff6c0594458d Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Thu, 2 Mar 2017 15:26:03 +1100 Subject: [PATCH 165/326] Use GNUInstallDirs for install paths --- CMakeLists.txt | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1ba4fe65..7a60142f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required (VERSION 2.8.11) -project (Hyperscan C CXX) +project (hyperscan C CXX) set (HS_MAJOR_VERSION 4) set (HS_MINOR_VERSION 4) @@ -15,6 +15,7 @@ INCLUDE (CheckIncludeFileCXX) INCLUDE (CheckLibraryExists) INCLUDE (CheckSymbolExists) include (CMakeDependentOption) +include (GNUInstallDirs) include (${CMAKE_MODULE_PATH}/platform.cmake) include (${CMAKE_MODULE_PATH}/ragel.cmake) @@ -36,6 +37,7 @@ endif() set(BINDIR "${PROJECT_BINARY_DIR}/bin") set(LIBDIR "${PROJECT_BINARY_DIR}/lib") +set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR}) # First for the generic no-config case set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${BINDIR}") @@ -438,7 +440,7 @@ if (NOT WIN32) configure_file(libhs.pc.in libhs.pc @ONLY) # only replace @ quoted vars install(FILES ${CMAKE_BINARY_DIR}/libhs.pc - DESTINATION "${CMAKE_INSTALL_PREFIX}/lib/pkgconfig") + DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig") endif() # only set these after all tests are done @@ -475,7 +477,7 @@ SET(hs_HEADERS src/hs_compile.h src/hs_runtime.h ) -install(FILES ${hs_HEADERS} DESTINATION include/hs) +install(FILES ${hs_HEADERS} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/hs") set (hs_exec_common_SRCS src/alloc.c @@ -1180,7 +1182,7 @@ else (FAT_RUNTIME) endif (NOT FAT_RUNTIME) if (NOT BUILD_SHARED_LIBS) - install(TARGETS hs_runtime DESTINATION lib) + install(TARGETS hs_runtime DESTINATION ${CMAKE_INSTALL_LIBDIR}) endif() if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) @@ -1202,9 +1204,9 @@ $) MACOSX_RPATH ON LINKER_LANGUAGE C) install(TARGETS hs_runtime_shared - RUNTIME DESTINATION bin - ARCHIVE DESTINATION lib - LIBRARY DESTINATION lib) + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) endif() if (BUILD_STATIC_LIBS) @@ -1212,7 +1214,7 @@ if (BUILD_STATIC_LIBS) endif () if (NOT BUILD_SHARED_LIBS) -install(TARGETS hs DESTINATION lib) + install(TARGETS hs DESTINATION ${CMAKE_INSTALL_LIBDIR}) endif() if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) @@ -1234,9 +1236,9 @@ if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) SOVERSION ${LIB_SOVERSION} MACOSX_RPATH ON) install(TARGETS hs_shared - RUNTIME DESTINATION bin - ARCHIVE DESTINATION lib - LIBRARY DESTINATION lib) + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) endif() # used by tools and other targets From fadfab6d8c0c0d71c08c68f8b4178eb5a406867b Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Mon, 20 Mar 2017 10:00:51 +1100 Subject: [PATCH 166/326] Install example source in docdir/examples --- examples/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index b52bbdfa..c252c9ac 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -22,3 +22,6 @@ set_source_files_properties(patbench.cc PROPERTIES COMPILE_FLAGS "-Wall -Wno-unused-parameter") target_link_libraries(patbench hs pcap) endif() + +install(FILES simplegrep.c pcapscan.cc patbench.cc README.md + DESTINATION ${CMAKE_INSTALL_DOCDIR}/examples) From 5dfae12a62f8145b01962ec8b37c825f28c0849d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 16 Mar 2017 18:18:34 +1100 Subject: [PATCH 167/326] ng: split NGWrapper into NGHolder, ExpressionInfo We now use NGHolder for all graph information, while other expression properties (report, flag information, etc) go in new class ExpressionInfo. --- CMakeLists.txt | 1 + src/compiler/asserts.cpp | 28 ++-- src/compiler/asserts.h | 8 +- src/compiler/compiler.cpp | 130 ++++++++-------- src/compiler/compiler.h | 48 +++--- src/compiler/expression_info.h | 102 +++++++++++++ src/hs.cpp | 14 +- src/nfagraph/ng.cpp | 192 +++++++++++------------- src/nfagraph/ng.h | 37 +---- src/nfagraph/ng_asserts.cpp | 58 +++---- src/nfagraph/ng_asserts.h | 10 +- src/nfagraph/ng_builder.cpp | 33 ++-- src/nfagraph/ng_builder.h | 10 +- src/nfagraph/ng_dump.cpp | 25 +-- src/nfagraph/ng_dump.h | 13 +- src/nfagraph/ng_expr_info.cpp | 61 ++++---- src/nfagraph/ng_expr_info.h | 12 +- src/nfagraph/ng_extparam.cpp | 191 ++++++++++++----------- src/nfagraph/ng_extparam.h | 7 +- src/nfagraph/ng_literal_component.cpp | 15 +- src/nfagraph/ng_literal_component.h | 6 +- src/nfagraph/ng_som.cpp | 48 +++--- src/nfagraph/ng_som.h | 12 +- src/nfagraph/ng_utf8.cpp | 21 +-- src/nfagraph/ng_utf8.h | 6 +- src/nfagraph/ng_vacuous.cpp | 42 +++--- src/nfagraph/ng_vacuous.h | 7 +- src/parser/shortcut_literal.cpp | 13 +- src/smallwrite/smallwrite_build.cpp | 17 ++- src/smallwrite/smallwrite_build.h | 15 +- src/util/report_manager.cpp | 20 ++- src/util/report_manager.h | 10 +- unit/internal/lbr.cpp | 5 +- unit/internal/limex_nfa.cpp | 11 +- unit/internal/nfagraph_common.h | 15 +- unit/internal/nfagraph_equivalence.cpp | 19 +-- unit/internal/nfagraph_find_matches.cpp | 3 +- unit/internal/nfagraph_redundancy.cpp | 15 +- unit/internal/nfagraph_width.cpp | 8 +- util/ng_corpus_generator.cpp | 44 ++++-- util/ng_corpus_generator.h | 6 +- 41 files changed, 726 insertions(+), 612 deletions(-) create mode 100644 src/compiler/expression_info.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a60142f..732a73b4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -674,6 +674,7 @@ SET (hs_SRCS src/compiler/compiler.h src/compiler/error.cpp src/compiler/error.h + src/compiler/expression_info.h src/fdr/engine_description.cpp src/fdr/engine_description.h src/fdr/fdr_compile.cpp diff --git a/src/compiler/asserts.cpp b/src/compiler/asserts.cpp index be836b06..44442226 100644 --- a/src/compiler/asserts.cpp +++ b/src/compiler/asserts.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,6 +42,8 @@ * word-to-word and word-to-nonword) are dropped. */ #include "asserts.h" + +#include "compiler/compiler.h" #include "nfagraph/ng.h" #include "nfagraph/ng_prune.h" #include "nfagraph/ng_redundancy.h" @@ -115,8 +117,8 @@ u32 conjunct(u32 flags1, u32 flags2) { typedef map, NFAEdge> edge_cache_t; static -void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache, - u32 &assert_edge_count) { +void replaceAssertVertex(NGHolder &g, NFAVertex t, const ExpressionInfo &expr, + edge_cache_t &edge_cache, u32 &assert_edge_count) { DEBUG_PRINTF("replacing assert vertex %zu\n", g[t].index); const u32 flags = g[t].assert_flags; @@ -178,8 +180,7 @@ void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache, edge_cache.emplace(cache_key, e); g[e].assert_flags = flags; if (++assert_edge_count > MAX_ASSERT_EDGES) { - throw CompileError(g.expressionIndex, - "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } } else { NFAEdge e = ecit->second; @@ -200,21 +201,23 @@ void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache, } static -void setReportId(ReportManager &rm, NGWrapper &g, NFAVertex v, s32 adj) { +void setReportId(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, + NFAVertex v, s32 adj) { // Don't try and set the report ID of a special vertex. assert(!is_special(v, g)); // There should be no reports set already. assert(g[v].reports.empty()); - Report r = rm.getBasicInternalReport(g, adj); + Report r = rm.getBasicInternalReport(expr, adj); g[v].reports.insert(rm.getInternalId(r)); DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj); } static -void checkForMultilineStart(ReportManager &rm, NGWrapper &g) { +void checkForMultilineStart(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr) { vector dead; for (auto v : adjacent_vertices_range(g.start, g)) { if (!(g[v].assert_flags & POS_FLAG_MULTILINE_START)) { @@ -238,7 +241,7 @@ void checkForMultilineStart(ReportManager &rm, NGWrapper &g) { for (const auto &e : dead) { NFAVertex dummy = add_vertex(g); g[dummy].char_reach.setall(); - setReportId(rm, g, dummy, -1); + setReportId(rm, g, expr, dummy, -1); add_edge(source(e, g), dummy, g[e], g); add_edge(dummy, g.accept, g); } @@ -263,7 +266,8 @@ bool hasAssertVertices(const NGHolder &g) { * Remove the horrors that are the temporary assert vertices which arise from * our construction method. Allows the rest of our code base to live in * blissful ignorance of their existence. */ -void removeAssertVertices(ReportManager &rm, NGWrapper &g) { +void removeAssertVertices(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr) { size_t num = 0; DEBUG_PRINTF("before: graph has %zu vertices\n", num_vertices(g)); @@ -285,12 +289,12 @@ void removeAssertVertices(ReportManager &rm, NGWrapper &g) { for (auto v : vertices_range(g)) { if (g[v].assert_flags & WORDBOUNDARY_FLAGS) { - replaceAssertVertex(g, v, edge_cache, assert_edge_count); + replaceAssertVertex(g, v, expr, edge_cache, assert_edge_count); num++; } } - checkForMultilineStart(rm, g); + checkForMultilineStart(rm, g, expr); if (num) { DEBUG_PRINTF("resolved %zu assert vertices\n", num); diff --git a/src/compiler/asserts.h b/src/compiler/asserts.h index b9ec80c7..b4d64c6c 100644 --- a/src/compiler/asserts.h +++ b/src/compiler/asserts.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,8 +35,9 @@ namespace ue2 { +class ExpressionInfo; class ReportManager; -class NGWrapper; +class NGHolder; /** \brief Convert temporary assert vertices (from construction method) to * edge-based flags. @@ -44,7 +45,8 @@ class NGWrapper; * Remove the horrors that are the temporary assert vertices which arise from * our construction method. Allows the rest of our code base to live in * blissful ignorance of their existence. */ -void removeAssertVertices(ReportManager &rm, NGWrapper &g); +void removeAssertVertices(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr); } // namespace ue2 diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index 56ed5f41..49ed88f4 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -73,7 +73,6 @@ using namespace std; namespace ue2 { - static void validateExt(const hs_expr_ext &ext) { static const unsigned long long ALL_EXT_FLAGS = HS_EXT_FLAG_MIN_OFFSET | @@ -100,26 +99,18 @@ void validateExt(const hs_expr_ext &ext) { } ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, - unsigned flags, ReportID actionId, + unsigned flags, ReportID report, const hs_expr_ext *ext) - : utf8(false), - allow_vacuous(flags & HS_FLAG_ALLOWEMPTY), - highlander(flags & HS_FLAG_SINGLEMATCH), - prefilter(flags & HS_FLAG_PREFILTER), - som(SOM_NONE), - index(index_in), - id(actionId), - min_offset(0), - max_offset(MAX_OFFSET), - min_length(0), - edit_distance(0) { + : expr(index_in, flags & HS_FLAG_ALLOWEMPTY, flags & HS_FLAG_SINGLEMATCH, + false, flags & HS_FLAG_PREFILTER, SOM_NONE, report, 0, MAX_OFFSET, + 0, 0) { ParseMode mode(flags); component = parse(expression, mode); - utf8 = mode.utf8; /* utf8 may be set by parse() */ + expr.utf8 = mode.utf8; /* utf8 may be set by parse() */ - if (utf8 && !isValidUtf8(expression)) { + if (expr.utf8 && !isValidUtf8(expression)) { throw ParseError("Expression is not valid UTF-8."); } @@ -147,7 +138,7 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, // Set SOM type. if (flags & HS_FLAG_SOM_LEFTMOST) { - som = SOM_LEFT; + expr.som = SOM_LEFT; } // Set extended parameters, if we have them. @@ -156,29 +147,29 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, validateExt(*ext); if (ext->flags & HS_EXT_FLAG_MIN_OFFSET) { - min_offset = ext->min_offset; + expr.min_offset = ext->min_offset; } if (ext->flags & HS_EXT_FLAG_MAX_OFFSET) { - max_offset = ext->max_offset; + expr.max_offset = ext->max_offset; } if (ext->flags & HS_EXT_FLAG_MIN_LENGTH) { - min_length = ext->min_length; + expr.min_length = ext->min_length; } if (ext->flags & HS_EXT_FLAG_EDIT_DISTANCE) { - edit_distance = ext->edit_distance; + expr.edit_distance = ext->edit_distance; } } // These are validated in validateExt, so an error will already have been // thrown if these conditions don't hold. - assert(max_offset >= min_offset); - assert(max_offset >= min_length); + assert(expr.max_offset >= expr.min_offset); + assert(expr.max_offset >= expr.min_length); // Since prefiltering and SOM aren't supported together, we must squash any // min_length constraint as well. - if (flags & HS_FLAG_PREFILTER && min_length) { + if (flags & HS_FLAG_PREFILTER && expr.min_length) { DEBUG_PRINTF("prefiltering mode: squashing min_length constraint\n"); - min_length = 0; + expr.min_length = 0; } } @@ -187,25 +178,25 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, * \brief Dumps the parse tree to screen in debug mode and to disk in dump * mode. */ -void dumpExpression(UNUSED const ParsedExpression &expr, +void dumpExpression(UNUSED const ParsedExpression &pe, UNUSED const char *stage, UNUSED const Grey &grey) { #if defined(DEBUG) - DEBUG_PRINTF("===== Rule ID: %u (internalID: %u) =====\n", expr.id, - expr.index); + DEBUG_PRINTF("===== Rule ID: %u (expression index: %u) =====\n", + pe.expr.report, pe.expr.index); ostringstream debug_tree; - dumpTree(debug_tree, expr.component.get()); + dumpTree(debug_tree, pe.component.get()); printf("%s\n", debug_tree.str().c_str()); #endif // DEBUG #if defined(DUMP_SUPPORT) if (grey.dumpFlags & Grey::DUMP_PARSE) { stringstream ss; - ss << grey.dumpPath << "Expr_" << expr.index << "_componenttree_" + ss << grey.dumpPath << "Expr_" << pe.expr.index << "_componenttree_" << stage << ".txt"; ofstream out(ss.str().c_str()); - out << "Component Tree for " << expr.id << endl; - dumpTree(out, expr.component.get()); - if (expr.utf8) { + out << "Component Tree for " << pe.expr.report << endl; + dumpTree(out, pe.component.get()); + if (pe.expr.utf8) { out << "UTF8 mode" << endl; } } @@ -215,13 +206,13 @@ void dumpExpression(UNUSED const ParsedExpression &expr, /** \brief Run Component tree optimisations on \a expr. */ static -void optimise(ParsedExpression &expr) { - if (expr.min_length || expr.som) { +void optimise(ParsedExpression &pe) { + if (pe.expr.min_length || pe.expr.som) { return; } DEBUG_PRINTF("optimising\n"); - expr.component->optimise(true /* root is connected to sds */); + pe.component->optimise(true /* root is connected to sds */); } void addExpression(NG &ng, unsigned index, const char *expression, @@ -238,34 +229,34 @@ void addExpression(NG &ng, unsigned index, const char *expression, // Do per-expression processing: errors here will result in an exception // being thrown up to our caller - ParsedExpression expr(index, expression, flags, id, ext); - dumpExpression(expr, "orig", cc.grey); + ParsedExpression pe(index, expression, flags, id, ext); + dumpExpression(pe, "orig", cc.grey); // Apply prefiltering transformations if desired. - if (expr.prefilter) { - prefilterTree(expr.component, ParseMode(flags)); - dumpExpression(expr, "prefiltered", cc.grey); + if (pe.expr.prefilter) { + prefilterTree(pe.component, ParseMode(flags)); + dumpExpression(pe, "prefiltered", cc.grey); } // Expressions containing zero-width assertions and other extended pcre // types aren't supported yet. This call will throw a ParseError exception // if the component tree contains such a construct. - checkUnsupported(*expr.component); + checkUnsupported(*pe.component); - expr.component->checkEmbeddedStartAnchor(true); - expr.component->checkEmbeddedEndAnchor(true); + pe.component->checkEmbeddedStartAnchor(true); + pe.component->checkEmbeddedEndAnchor(true); if (cc.grey.optimiseComponentTree) { - optimise(expr); - dumpExpression(expr, "opt", cc.grey); + optimise(pe); + dumpExpression(pe, "opt", cc.grey); } DEBUG_PRINTF("component=%p, nfaId=%u, reportId=%u\n", - expr.component.get(), expr.index, expr.id); + pe.component.get(), pe.expr.index, pe.expr.report); // You can only use the SOM flags if you've also specified an SOM // precision mode. - if (expr.som != SOM_NONE && cc.streaming && !ng.ssm.somPrecision()) { + if (pe.expr.som != SOM_NONE && cc.streaming && !ng.ssm.somPrecision()) { throw CompileError("To use a SOM expression flag in streaming mode, " "an SOM precision mode (e.g. " "HS_MODE_SOM_HORIZON_LARGE) must be specified."); @@ -273,26 +264,26 @@ void addExpression(NG &ng, unsigned index, const char *expression, // If this expression is a literal, we can feed it directly to Rose rather // than building the NFA graph. - if (shortcutLiteral(ng, expr)) { + if (shortcutLiteral(ng, pe)) { DEBUG_PRINTF("took literal short cut\n"); return; } - unique_ptr g = buildWrapper(ng.rm, cc, expr); - - if (!g) { + auto built_expr = buildGraph(ng.rm, cc, pe); + if (!built_expr.g) { DEBUG_PRINTF("NFA build failed on ID %u, but no exception was " - "thrown.\n", expr.id); + "thrown.\n", pe.expr.report); throw CompileError("Internal error."); } - if (!expr.allow_vacuous && matches_everywhere(*g)) { + auto &g = *built_expr.g; + if (!pe.expr.allow_vacuous && matches_everywhere(g)) { throw CompileError("Pattern matches empty buffer; use " "HS_FLAG_ALLOWEMPTY to enable support."); } - if (!ng.addGraph(*g)) { - DEBUG_PRINTF("NFA addGraph failed on ID %u.\n", expr.id); + if (!ng.addGraph(built_expr.expr, g)) { + DEBUG_PRINTF("NFA addGraph failed on ID %u.\n", pe.expr.report); throw CompileError("Error compiling expression."); } } @@ -453,41 +444,42 @@ bool isSupported(const Component &c) { } #endif -unique_ptr buildWrapper(ReportManager &rm, const CompileContext &cc, - const ParsedExpression &expr) { - assert(isSupported(*expr.component)); +BuiltExpression buildGraph(ReportManager &rm, const CompileContext &cc, + const ParsedExpression &pe) { + assert(isSupported(*pe.component)); - const unique_ptr builder = makeNFABuilder(rm, cc, expr); + const auto builder = makeNFABuilder(rm, cc, pe); assert(builder); // Set up START and ACCEPT states; retrieve the special states - const auto bs = makeGlushkovBuildState(*builder, expr.prefilter); + const auto bs = makeGlushkovBuildState(*builder, pe.expr.prefilter); // Map position IDs to characters/components - expr.component->notePositions(*bs); + pe.component->notePositions(*bs); // Wire the start dotstar state to the firsts - connectInitialStates(*bs, expr); + connectInitialStates(*bs, pe); DEBUG_PRINTF("wire up body of expr\n"); // Build the rest of the FOLLOW set vector initials = {builder->getStartDotStar(), builder->getStart()}; - expr.component->buildFollowSet(*bs, initials); + pe.component->buildFollowSet(*bs, initials); // Wire the lasts to the accept state - connectFinalStates(*bs, expr); + connectFinalStates(*bs, pe); // Create our edges bs->buildEdges(); - auto g = builder->getGraph(); - assert(g); + BuiltExpression built_expr = builder->getGraph(); + assert(built_expr.g); - dumpDotWrapper(*g, "00_before_asserts", cc.grey); - removeAssertVertices(rm, *g); + dumpDotWrapper(*built_expr.g, built_expr.expr, "00_before_asserts", + cc.grey); + removeAssertVertices(rm, *built_expr.g, built_expr.expr); - return g; + return built_expr; } } // namespace ue2 diff --git a/src/compiler/compiler.h b/src/compiler/compiler.h index 48987fc3..8f5f9b65 100644 --- a/src/compiler/compiler.h +++ b/src/compiler/compiler.h @@ -35,8 +35,8 @@ #include "ue2common.h" #include "database.h" +#include "compiler/expression_info.h" #include "parser/Component.h" -#include "som/som.h" #include #include @@ -50,35 +50,32 @@ struct CompileContext; struct Grey; struct target_t; class NG; +class NGHolder; class ReportManager; -class NGWrapper; -/** Class gathering together the pieces of a parsed expression. - * Note: Owns the provided component. - */ +/** \brief Class gathering together the pieces of a parsed expression. */ class ParsedExpression : boost::noncopyable { public: ParsedExpression(unsigned index, const char *expression, unsigned flags, - ReportID actionId, const hs_expr_ext *ext = nullptr); + ReportID report, const hs_expr_ext *ext = nullptr); - bool utf8; //!< UTF-8 mode flag specified + /** \brief Expression information (from flags, extparam etc) */ + ExpressionInfo expr; - /** \brief root node of parsed component tree. */ - std::unique_ptr component; + /** \brief Root node of parsed component tree. */ + std::unique_ptr component; +}; - const bool allow_vacuous; //!< HS_FLAG_ALLOWEMPTY specified - const bool highlander; //!< HS_FLAG_SINGLEMATCH specified - const bool prefilter; //!< HS_FLAG_PREFILTER specified - som_type som; //!< chosen SOM mode, or SOM_NONE +/** + * \brief Class gathering together the pieces of an expression that has been + * built into an NFA graph. + */ +struct BuiltExpression { + /** \brief Expression information (from flags, extparam etc) */ + ExpressionInfo expr; - /** \brief index in expressions array passed to \ref hs_compile_multi */ - const unsigned index; - - const ReportID id; //!< user-specified pattern ID - u64a min_offset; //!< 0 if not used - u64a max_offset; //!< MAX_OFFSET if not used - u64a min_length; //!< 0 if not used - u32 edit_distance; //!< 0 if not used + /** \brief Built Glushkov NFA graph. */ + std::unique_ptr g; }; /** @@ -95,12 +92,12 @@ public: * @param ext * Struct containing extra parameters for this expression, or NULL if * none. - * @param actionId + * @param report * The identifier to associate with the expression; returned by engine on * match. */ void addExpression(NG &ng, unsigned index, const char *expression, - unsigned flags, const hs_expr_ext *ext, ReportID actionId); + unsigned flags, const hs_expr_ext *ext, ReportID report); /** * Build a Hyperscan database out of the expressions we've been given. A @@ -128,9 +125,8 @@ struct hs_database *build(NG &ng, unsigned int *length); * @return * nullptr on error. */ -std::unique_ptr buildWrapper(ReportManager &rm, - const CompileContext &cc, - const ParsedExpression &expr); +BuiltExpression buildGraph(ReportManager &rm, const CompileContext &cc, + const ParsedExpression &expr); /** * Build a platform_t out of a target_t. diff --git a/src/compiler/expression_info.h b/src/compiler/expression_info.h new file mode 100644 index 00000000..7775f59e --- /dev/null +++ b/src/compiler/expression_info.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief ExpressionInfo class for storing the properties of an expression. + */ + +#ifndef COMPILER_EXPRESSION_INFO_H +#define COMPILER_EXPRESSION_INFO_H + +#include "ue2common.h" +#include "som/som.h" + +namespace ue2 { + +/** \brief Properties of an expression. */ +class ExpressionInfo { +public: + ExpressionInfo(unsigned int index_in, bool allow_vacuous_in, + bool highlander_in, bool utf8_in, bool prefilter_in, + som_type som_in, ReportID report_in, u64a min_offset_in, + u64a max_offset_in, u64a min_length_in, u32 edit_distance_in) + : index(index_in), report(report_in), allow_vacuous(allow_vacuous_in), + highlander(highlander_in), utf8(utf8_in), prefilter(prefilter_in), + som(som_in), min_offset(min_offset_in), max_offset(max_offset_in), + min_length(min_length_in), edit_distance(edit_distance_in) {} + + /** + * \brief Index of the expression represented by this graph. + * + * Used: + * - down the track in error handling; + * - for identifying parts of an expression in highlander mode. + */ + unsigned int index; + + /** \brief Report ID specified by the user. */ + ReportID report; + + /** \brief Vacuous pattern is allowed. (HS_FLAG_ALLOWEMPTY) */ + bool allow_vacuous; + + /** \brief "Highlander" (single match) pattern. (HS_FLAG_SINGLEMATCH) */ + bool highlander; + + /** \brief UTF-8 pattern. (HS_FLAG_UTF8) */ + bool utf8; + + /** \brief Prefiltering pattern. (HS_FLAG_PREFILTER) */ + bool prefilter; + + /** \brief Start-of-match type requested, or SOM_NONE. */ + som_type som; + + /** \brief Minimum match offset extended parameter. 0 if not used. */ + u64a min_offset; + + /** + * \brief Maximum match offset extended parameter. + * MAX_OFFSET if not used. + */ + u64a max_offset; + + /** \brief Minimum match length extended parameter. 0 if not used. */ + u64a min_length; + + /** + * \brief Approximate matching edit distance extended parameter. + * 0 if not used. + */ + u32 edit_distance; +}; + +} + +#endif // COMPILER_EXPRESSION_INFO_H diff --git a/src/hs.cpp b/src/hs.cpp index 6cd3a3ee..c1e1cdce 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -369,11 +369,13 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, assert(pe.component); // Apply prefiltering transformations if desired. - if (pe.prefilter) { + if (pe.expr.prefilter) { prefilterTree(pe.component, ParseMode(flags)); } - unique_ptr g = buildWrapper(rm, cc, pe); + auto built_expr = buildGraph(rm, cc, pe); + unique_ptr &g = built_expr.g; + ExpressionInfo &expr = built_expr.expr; if (!g) { DEBUG_PRINTF("NFA build failed, but no exception was thrown.\n"); @@ -381,13 +383,13 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, } // validate graph's suitability for fuzzing - validate_fuzzy_compile(*g, g->edit_distance, g->utf8, cc.grey); + validate_fuzzy_compile(*g, expr.edit_distance, expr.utf8, cc.grey); // fuzz graph - this must happen before any transformations are made - make_fuzzy(*g, g->edit_distance, cc.grey); + make_fuzzy(*g, expr.edit_distance, cc.grey); - handleExtendedParams(rm, *g, cc); - fillExpressionInfo(rm, *g, &local_info); + handleExtendedParams(rm, *g, expr, cc); + fillExpressionInfo(rm, *g, expr, &local_info); } catch (const CompileError &e) { // Compiler error occurred diff --git a/src/nfagraph/ng.cpp b/src/nfagraph/ng.cpp index 2f6d8cd7..eded7af2 100644 --- a/src/nfagraph/ng.cpp +++ b/src/nfagraph/ng.cpp @@ -27,10 +27,11 @@ */ /** \file - * \brief NG, NGHolder, NGWrapper and graph handling. + * \brief NG and graph handling. */ -#include "grey.h" #include "ng.h" + +#include "grey.h" #include "ng_anchored_acyclic.h" #include "ng_anchored_dots.h" #include "ng_asserts.h" @@ -62,6 +63,7 @@ #include "ng_util.h" #include "ng_width.h" #include "ue2common.h" +#include "compiler/compiler.h" #include "nfa/goughcompile.h" #include "rose/rose_build.h" #include "smallwrite/smallwrite_build.h" @@ -100,16 +102,16 @@ NG::~NG() { * \throw CompileError if SOM cannot be supported for the component. */ static -bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w, +bool addComponentSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, const som_type som, const u32 comp_id) { DEBUG_PRINTF("doing som\n"); - dumpComponent(g, "03_presom", w.expressionIndex, comp_id, ng.cc.grey); + dumpComponent(g, "03_presom", expr.index, comp_id, ng.cc.grey); assert(hasCorrectlyNumberedVertices(g)); - assert(allMatchStatesHaveReports(w)); + assert(allMatchStatesHaveReports(g)); // First, we try the "SOM chain" support in ng_som.cpp. - sombe_rv rv = doSom(ng, g, w, comp_id, som); + sombe_rv rv = doSom(ng, g, expr, comp_id, som); if (rv == SOMBE_HANDLED_INTERNAL) { return false; } else if (rv == SOMBE_HANDLED_ALL) { @@ -118,7 +120,7 @@ bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w, assert(rv == SOMBE_FAIL); /* Next, Sombe style approaches */ - rv = doSomWithHaig(ng, g, w, comp_id, som); + rv = doSomWithHaig(ng, g, expr, comp_id, som); if (rv == SOMBE_HANDLED_INTERNAL) { return false; } else if (rv == SOMBE_HANDLED_ALL) { @@ -132,7 +134,7 @@ bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w, vector > triggers; /* empty for outfix */ assert(g.kind == NFA_OUTFIX); - dumpComponent(g, "haig", w.expressionIndex, comp_id, ng.cc.grey); + dumpComponent(g, "haig", expr.index, comp_id, ng.cc.grey); makeReportsSomPass(ng.rm, g); auto haig = attemptToBuildHaig(g, som, ng.ssm.somPrecision(), triggers, ng.cc.grey); @@ -145,7 +147,7 @@ bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w, /* Our various strategies for supporting SOM for this pattern have failed. * Provide a generic pattern not supported/too large return value as it is * unclear what the meaning of a specific SOM error would be */ - throw CompileError(w.expressionIndex, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); assert(0); // unreachable return false; @@ -200,21 +202,21 @@ void reduceGraph(NGHolder &g, som_type som, bool utf8, } static -bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, - const u32 comp_id) { +bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr, + const som_type som, const u32 comp_id) { const CompileContext &cc = ng.cc; assert(hasCorrectlyNumberedVertices(g)); DEBUG_PRINTF("expr=%u, comp=%u: %zu vertices, %zu edges\n", - w.expressionIndex, comp_id, num_vertices(g), num_edges(g)); + expr.index, comp_id, num_vertices(g), num_edges(g)); - dumpComponent(g, "01_begin", w.expressionIndex, comp_id, ng.cc.grey); + dumpComponent(g, "01_begin", expr.index, comp_id, ng.cc.grey); - assert(allMatchStatesHaveReports(w)); + assert(allMatchStatesHaveReports(g)); - reduceGraph(g, som, w.utf8, cc); + reduceGraph(g, som, expr.utf8, cc); - dumpComponent(g, "02_reduced", w.expressionIndex, comp_id, ng.cc.grey); + dumpComponent(g, "02_reduced", expr.index, comp_id, ng.cc.grey); // There may be redundant regions that we can remove if (cc.grey.performGraphSimplification) { @@ -231,12 +233,12 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, // Start Of Match handling. if (som) { - if (addComponentSom(ng, g, w, som, comp_id)) { + if (addComponentSom(ng, g, expr, som, comp_id)) { return true; } } - assert(allMatchStatesHaveReports(w)); + assert(allMatchStatesHaveReports(g)); if (splitOffAnchoredAcyclic(*ng.rose, g, cc)) { return true; @@ -251,11 +253,11 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, return true; } - if (doViolet(*ng.rose, g, w.prefilter, false, ng.rm, cc)) { + if (doViolet(*ng.rose, g, expr.prefilter, false, ng.rm, cc)) { return true; } - if (splitOffPuffs(*ng.rose, ng.rm, g, w.prefilter, cc)) { + if (splitOffPuffs(*ng.rose, ng.rm, g, expr.prefilter, cc)) { return true; } @@ -268,7 +270,7 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, return true; } - if (doViolet(*ng.rose, g, w.prefilter, true, ng.rm, cc)) { + if (doViolet(*ng.rose, g, expr.prefilter, true, ng.rm, cc)) { return true; } @@ -283,7 +285,7 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, // Returns true if all components have been added. static -bool processComponents(NG &ng, NGWrapper &w, +bool processComponents(NG &ng, ExpressionInfo &expr, deque> &g_comp, const som_type som) { const u32 num_components = g_comp.size(); @@ -293,7 +295,7 @@ bool processComponents(NG &ng, NGWrapper &w, if (!g_comp[i]) { continue; } - if (addComponent(ng, *g_comp[i], w, som, i)) { + if (addComponent(ng, *g_comp[i], expr, som, i)) { g_comp[i].reset(); continue; } @@ -313,48 +315,48 @@ bool processComponents(NG &ng, NGWrapper &w, return false; } -bool NG::addGraph(NGWrapper &w) { +bool NG::addGraph(ExpressionInfo &expr, NGHolder &g) { // remove reports that aren't on vertices connected to accept. - clearReports(w); + clearReports(g); - som_type som = w.som; - if (som && isVacuous(w)) { - throw CompileError(w.expressionIndex, "Start of match is not " + som_type som = expr.som; + if (som && isVacuous(g)) { + throw CompileError(expr.index, "Start of match is not " "currently supported for patterns which match an " "empty buffer."); } - dumpDotWrapper(w, "01_initial", cc.grey); - assert(allMatchStatesHaveReports(w)); + dumpDotWrapper(g, expr, "01_initial", cc.grey); + assert(allMatchStatesHaveReports(g)); /* ensure utf8 starts at cp boundary */ - ensureCodePointStart(rm, w); + ensureCodePointStart(rm, g, expr); - if (can_never_match(w)) { - throw CompileError(w.expressionIndex, "Pattern can never match."); + if (can_never_match(g)) { + throw CompileError(expr.index, "Pattern can never match."); } // validate graph's suitability for fuzzing before resolving asserts - validate_fuzzy_compile(w, w.edit_distance, w.utf8, cc.grey); + validate_fuzzy_compile(g, expr.edit_distance, expr.utf8, cc.grey); - resolveAsserts(rm, w); - dumpDotWrapper(w, "02_post_assert_resolve", cc.grey); - assert(allMatchStatesHaveReports(w)); + resolveAsserts(rm, g, expr); + dumpDotWrapper(g, expr, "02_post_assert_resolve", cc.grey); + assert(allMatchStatesHaveReports(g)); - make_fuzzy(w, w.edit_distance, cc.grey); - dumpDotWrapper(w, "02a_post_fuzz", cc.grey); + make_fuzzy(g, expr.edit_distance, cc.grey); + dumpDotWrapper(g, expr, "02a_post_fuzz", cc.grey); - pruneUseless(w); - pruneEmptyVertices(w); + pruneUseless(g); + pruneEmptyVertices(g); - if (can_never_match(w)) { - throw CompileError(w.expressionIndex, "Pattern can never match."); + if (can_never_match(g)) { + throw CompileError(expr.index, "Pattern can never match."); } - optimiseVirtualStarts(w); /* good for som */ + optimiseVirtualStarts(g); /* good for som */ - handleExtendedParams(rm, w, cc); - if (w.min_length) { + handleExtendedParams(rm, g, expr, cc); + if (expr.min_length) { // We have a minimum length constraint, which we currently use SOM to // satisfy. som = SOM_LEFT; @@ -368,70 +370,70 @@ bool NG::addGraph(NGWrapper &w) { // first, we can perform graph work that can be done on an individual // expression basis. - if (w.utf8) { - relaxForbiddenUtf8(w); + if (expr.utf8) { + relaxForbiddenUtf8(g, expr); } - if (w.highlander && !w.min_length && !w.min_offset) { + if (expr.highlander && !expr.min_length && !expr.min_offset) { // In highlander mode: if we don't have constraints on our reports that // may prevent us accepting our first match (i.e. extended params) we // can prune the other out-edges of all vertices connected to accept. - pruneHighlanderAccepts(w, rm); + pruneHighlanderAccepts(g, rm); } - dumpDotWrapper(w, "02b_fairly_early", cc.grey); + dumpDotWrapper(g, expr, "02b_fairly_early", cc.grey); // If we're a vacuous pattern, we can handle this early. - if (splitOffVacuous(boundary, rm, w)) { + if (splitOffVacuous(boundary, rm, g, expr)) { DEBUG_PRINTF("split off vacuous\n"); } // We might be done at this point: if we've run out of vertices, we can // stop processing. - if (num_vertices(w) == N_SPECIALS) { + if (num_vertices(g) == N_SPECIALS) { DEBUG_PRINTF("all vertices claimed by vacuous handling\n"); return true; } // Now that vacuous edges have been removed, update the min width exclusive // of boundary reports. - minWidth = min(minWidth, findMinWidth(w)); + minWidth = min(minWidth, findMinWidth(g)); // Add the pattern to the small write builder. - smwr->add(w); + smwr->add(g, expr); if (!som) { - removeSiblingsOfStartDotStar(w); + removeSiblingsOfStartDotStar(g); } - dumpDotWrapper(w, "03_early", cc.grey); + dumpDotWrapper(g, expr, "03_early", cc.grey); // Perform a reduction pass to merge sibling character classes together. if (cc.grey.performGraphSimplification) { - removeRedundancy(w, som); - prunePathsRedundantWithSuccessorOfCyclics(w, som); + removeRedundancy(g, som); + prunePathsRedundantWithSuccessorOfCyclics(g, som); } - dumpDotWrapper(w, "04_reduced", cc.grey); + dumpDotWrapper(g, expr, "04_reduced", cc.grey); // If we've got some literals that span the graph from start to accept, we // can split them off into Rose from here. if (!som) { - if (splitOffLiterals(*this, w)) { + if (splitOffLiterals(*this, g)) { DEBUG_PRINTF("some vertices claimed by literals\n"); } } // We might be done at this point: if we've run out of vertices, we can // stop processing. - if (num_vertices(w) == N_SPECIALS) { + if (num_vertices(g) == N_SPECIALS) { DEBUG_PRINTF("all vertices claimed before calc components\n"); return true; } // Split the graph into a set of connected components. - deque> g_comp = calcComponents(w); + deque> g_comp = calcComponents(g); assert(!g_comp.empty()); if (!som) { @@ -443,14 +445,14 @@ bool NG::addGraph(NGWrapper &w) { recalcComponents(g_comp); } - if (processComponents(*this, w, g_comp, som)) { + if (processComponents(*this, expr, g_comp, som)) { return true; } // If we're in prefiltering mode, we can run the prefilter reductions and // have another shot at accepting the graph. - if (cc.grey.prefilterReductions && w.prefilter) { + if (cc.grey.prefilterReductions && expr.prefilter) { for (u32 i = 0; i < g_comp.size(); i++) { if (!g_comp[i]) { continue; @@ -459,7 +461,7 @@ bool NG::addGraph(NGWrapper &w) { prefilterReductions(*g_comp[i], cc); } - if (processComponents(*this, w, g_comp, som)) { + if (processComponents(*this, expr, g_comp, som)) { return true; } } @@ -469,7 +471,7 @@ bool NG::addGraph(NGWrapper &w) { if (g_comp[i]) { DEBUG_PRINTF("could not compile component %u with %zu vertices\n", i, num_vertices(*g_comp[i])); - throw CompileError(w.expressionIndex, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } } @@ -478,60 +480,60 @@ bool NG::addGraph(NGWrapper &w) { } /** \brief Used from SOM mode to add an arbitrary NGHolder as an engine. */ -bool NG::addHolder(NGHolder &w) { - DEBUG_PRINTF("adding holder of %zu states\n", num_vertices(w)); - assert(allMatchStatesHaveReports(w)); - assert(hasCorrectlyNumberedVertices(w)); +bool NG::addHolder(NGHolder &g) { + DEBUG_PRINTF("adding holder of %zu states\n", num_vertices(g)); + assert(allMatchStatesHaveReports(g)); + assert(hasCorrectlyNumberedVertices(g)); /* We don't update the global minWidth here as we care about the min width * of the whole pattern - not a just a prefix of it. */ bool prefilter = false; - //dumpDotComp(comp, w, *this, 20, "prefix_init"); + //dumpDotComp(comp, g, *this, 20, "prefix_init"); som_type som = SOM_NONE; /* the prefixes created by the SOM code do not themselves track som */ bool utf8 = false; // handling done earlier - reduceGraph(w, som, utf8, cc); + reduceGraph(g, som, utf8, cc); // There may be redundant regions that we can remove if (cc.grey.performGraphSimplification) { - removeRegionRedundancy(w, som); + removeRegionRedundancy(g, som); } // "Short Exhaustible Passthrough" patterns always become outfixes. - if (isSEP(w, rm, cc.grey)) { + if (isSEP(g, rm, cc.grey)) { DEBUG_PRINTF("graph is SEP\n"); - if (rose->addOutfix(w)) { + if (rose->addOutfix(g)) { return true; } } - if (splitOffAnchoredAcyclic(*rose, w, cc)) { + if (splitOffAnchoredAcyclic(*rose, g, cc)) { return true; } - if (handleSmallLiteralSets(*rose, w, cc) - || handleFixedWidth(*rose, w, cc.grey)) { + if (handleSmallLiteralSets(*rose, g, cc) + || handleFixedWidth(*rose, g, cc.grey)) { return true; } - if (handleDecoratedLiterals(*rose, w, cc)) { + if (handleDecoratedLiterals(*rose, g, cc)) { return true; } - if (doViolet(*rose, w, prefilter, false, rm, cc)) { + if (doViolet(*rose, g, prefilter, false, rm, cc)) { return true; } - if (splitOffPuffs(*rose, rm, w, prefilter, cc)) { + if (splitOffPuffs(*rose, rm, g, prefilter, cc)) { return true; } - if (doViolet(*rose, w, prefilter, true, rm, cc)) { + if (doViolet(*rose, g, prefilter, true, rm, cc)) { return true; } DEBUG_PRINTF("trying for outfix\n"); - if (rose->addOutfix(w)) { + if (rose->addOutfix(g)) { DEBUG_PRINTF("ok\n"); return true; } @@ -586,26 +588,4 @@ bool NG::addLiteral(const ue2_literal &literal, u32 expr_index, return true; } -NGWrapper::NGWrapper(unsigned int ei, bool highlander_in, bool utf8_in, - bool prefilter_in, som_type som_in, ReportID r, - u64a min_offset_in, u64a max_offset_in, u64a min_length_in, - u32 edit_distance_in) - : expressionIndex(ei), reportId(r), highlander(highlander_in), - utf8(utf8_in), prefilter(prefilter_in), som(som_in), - min_offset(min_offset_in), max_offset(max_offset_in), - min_length(min_length_in), edit_distance(edit_distance_in) { - // All special nodes/edges are added in NGHolder's constructor. - DEBUG_PRINTF("built %p: expr=%u report=%u%s%s%s%s " - "min_offset=%llu max_offset=%llu min_length=%llu " - "edit_distance=%u\n", - this, expressionIndex, reportId, - highlander ? " highlander" : "", - utf8 ? " utf8" : "", - prefilter ? " prefilter" : "", - (som != SOM_NONE) ? " som" : "", - min_offset, max_offset, min_length, edit_distance); -} - -NGWrapper::~NGWrapper() {} - } // namespace ue2 diff --git a/src/nfagraph/ng.h b/src/nfagraph/ng.h index d6e5d3c0..6693773e 100644 --- a/src/nfagraph/ng.h +++ b/src/nfagraph/ng.h @@ -27,7 +27,7 @@ */ /** \file - * \brief NG, NGHolder, NGWrapper declarations. + * \brief NG declaration. */ #ifndef NG_H @@ -58,31 +58,7 @@ namespace ue2 { struct CompileContext; struct ue2_literal; -class NGWrapper : public NGHolder { -public: - NGWrapper(unsigned int expressionIndex, bool highlander, bool utf8, - bool prefilter, const som_type som, ReportID rid, u64a min_offset, - u64a max_offset, u64a min_length, u32 edit_distance); - - ~NGWrapper() override; - - /** index of the expression represented by this graph, used - * - down the track in error handling - * - identifying parts of an expression in highlander mode - */ - const unsigned int expressionIndex; - - const ReportID reportId; /**< user-visible report id */ - const bool highlander; /**< user-specified single match only */ - const bool utf8; /**< UTF-8 mode */ - const bool prefilter; /**< prefiltering mode */ - const som_type som; /**< SOM type requested */ - u64a min_offset; /**< extparam min_offset value */ - u64a max_offset; /**< extparam max_offset value */ - u64a min_length; /**< extparam min_length value */ - u32 edit_distance; /**< extparam edit_distance value */ -}; - +class ExpressionInfo; class RoseBuild; class SmallWriteBuild; @@ -94,14 +70,14 @@ public: /** \brief Consumes a pattern, returns false or throws a CompileError * exception if the graph cannot be consumed. */ - bool addGraph(NGWrapper &w); + bool addGraph(ExpressionInfo &expr, NGHolder &h); /** \brief Consumes a graph, cut-down version of addGraph for use by SOM * processing. */ bool addHolder(NGHolder &h); - /** \brief Adds a literal to Rose, used by literal shortcut passes (instead of - * using \ref addGraph) */ + /** \brief Adds a literal to Rose, used by literal shortcut passes (instead + * of using \ref addGraph) */ bool addLiteral(const ue2_literal &lit, u32 expr_index, u32 external_report, bool highlander, som_type som); @@ -128,7 +104,8 @@ public: * * Shared with the small write compiler. */ -void reduceGraph(NGHolder &g, som_type som, bool utf8, const CompileContext &cc); +void reduceGraph(NGHolder &g, som_type som, bool utf8, + const CompileContext &cc); } // namespace ue2 diff --git a/src/nfagraph/ng_asserts.cpp b/src/nfagraph/ng_asserts.cpp index c2f0d68f..8812afad 100644 --- a/src/nfagraph/ng_asserts.cpp +++ b/src/nfagraph/ng_asserts.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -47,6 +47,7 @@ #include "ng_prune.h" #include "ng_redundancy.h" #include "ng_util.h" +#include "compiler/compiler.h" #include "parser/position.h" // for POS flags #include "util/bitutils.h" // for findAndClearLSB_32 #include "util/boundary_reports.h" @@ -184,43 +185,45 @@ void findSplitters(const NGHolder &g, const vector &asserts, } static -void setReportId(ReportManager &rm, NGWrapper &g, NFAVertex v, s32 adj) { +void setReportId(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, + NFAVertex v, s32 adj) { // Don't try and set the report ID of a special vertex. assert(!is_special(v, g)); // If there's a report set already, we're replacing it. g[v].reports.clear(); - Report ir = rm.getBasicInternalReport(g, adj); + Report ir = rm.getBasicInternalReport(expr, adj); g[v].reports.insert(rm.getInternalId(ir)); DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj); } static -NFAVertex makeClone(ReportManager &rm, NGWrapper &g, NFAVertex v, - const CharReach &cr_mask) { +NFAVertex makeClone(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, + NFAVertex v, const CharReach &cr_mask) { NFAVertex clone = clone_vertex(g, v); g[clone].char_reach &= cr_mask; clone_out_edges(g, v, clone); clone_in_edges(g, v, clone); if (v == g.startDs) { - if (g.utf8) { + if (expr.utf8) { g[clone].char_reach &= ~UTF_START_CR; } DEBUG_PRINTF("marked as virt\n"); g[clone].assert_flags = POS_FLAG_VIRTUAL_START; - setReportId(rm, g, clone, 0); + setReportId(rm, g, expr, clone, 0); } return clone; } static -void splitVertex(ReportManager &rm, NGWrapper &g, NFAVertex v, bool ucp) { +void splitVertex(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, + NFAVertex v, bool ucp) { assert(v != g.start); assert(v != g.accept); assert(v != g.acceptEod); @@ -232,14 +235,14 @@ void splitVertex(ReportManager &rm, NGWrapper &g, NFAVertex v, bool ucp) { auto has_no_assert = [&g](const NFAEdge &e) { return !g[e].assert_flags; }; // Split v into word/nonword vertices with only asserting out-edges. - NFAVertex w_out = makeClone(rm, g, v, cr_word); - NFAVertex nw_out = makeClone(rm, g, v, cr_nonword); + NFAVertex w_out = makeClone(rm, g, expr, v, cr_word); + NFAVertex nw_out = makeClone(rm, g, expr, v, cr_nonword); remove_out_edge_if(w_out, has_no_assert, g); remove_out_edge_if(nw_out, has_no_assert, g); // Split v into word/nonword vertices with only asserting in-edges. - NFAVertex w_in = makeClone(rm, g, v, cr_word); - NFAVertex nw_in = makeClone(rm, g, v, cr_nonword); + NFAVertex w_in = makeClone(rm, g, expr, v, cr_word); + NFAVertex nw_in = makeClone(rm, g, expr, v, cr_nonword); remove_in_edge_if(w_in, has_no_assert, g); remove_in_edge_if(nw_in, has_no_assert, g); @@ -250,7 +253,8 @@ void splitVertex(ReportManager &rm, NGWrapper &g, NFAVertex v, bool ucp) { } static -void resolveEdges(ReportManager &rm, NGWrapper &g, set *dead) { +void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, + set *dead) { for (const auto &e : edges_range(g)) { u32 flags = g[e].assert_flags; if (!flags) { @@ -363,7 +367,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set *dead) { } else if (v_w) { /* need to add a word byte */ NFAVertex vv = add_vertex(g); - setReportId(rm, g, vv, -1); + setReportId(rm, g, expr, vv, -1); g[vv].char_reach = CHARREACH_WORD; add_edge(vv, g.accept, g); g[e].assert_flags = 0; @@ -372,7 +376,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set *dead) { } else { /* need to add a non word byte or see eod */ NFAVertex vv = add_vertex(g); - setReportId(rm, g, vv, -1); + setReportId(rm, g, expr, vv, -1); g[vv].char_reach = CHARREACH_NONWORD; add_edge(vv, g.accept, g); g[e].assert_flags = 0; @@ -416,7 +420,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set *dead) { } else if (v_w) { /* need to add a word byte */ NFAVertex vv = add_vertex(g); - setReportId(rm, g, vv, -1); + setReportId(rm, g, expr, vv, -1); g[vv].char_reach = CHARREACH_WORD_UCP_PRE; add_edge(vv, g.accept, g); g[e].assert_flags = 0; @@ -425,7 +429,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set *dead) { } else { /* need to add a non word byte or see eod */ NFAVertex vv = add_vertex(g); - setReportId(rm, g, vv, -1); + setReportId(rm, g, expr, vv, -1); g[vv].char_reach = CHARREACH_NONWORD_UCP_PRE; add_edge(vv, g.accept, g); g[e].assert_flags = 0; @@ -450,7 +454,8 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set *dead) { } } -void resolveAsserts(ReportManager &rm, NGWrapper &g) { +void resolveAsserts(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr) { vector asserts = getAsserts(g); if (asserts.empty()) { return; @@ -460,20 +465,20 @@ void resolveAsserts(ReportManager &rm, NGWrapper &g) { map to_split_ucp; /* by index, for determinism */ findSplitters(g, asserts, &to_split, &to_split_ucp); if (to_split.size() + to_split_ucp.size() > MAX_CLONED_VERTICES) { - throw CompileError(g.expressionIndex, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } for (const auto &m : to_split) { assert(!contains(to_split_ucp, m.first)); - splitVertex(rm, g, m.second, false); + splitVertex(rm, g, expr, m.second, false); } for (const auto &m : to_split_ucp) { - splitVertex(rm, g, m.second, true); + splitVertex(rm, g, expr, m.second, true); } set dead; - resolveEdges(rm, g, &dead); + resolveEdges(rm, g, expr, &dead); remove_edges(dead, g); renumber_vertices(g); @@ -485,15 +490,16 @@ void resolveAsserts(ReportManager &rm, NGWrapper &g) { clearReports(g); } -void ensureCodePointStart(ReportManager &rm, NGWrapper &g) { +void ensureCodePointStart(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr) { /* In utf8 mode there is an implicit assertion that we start at codepoint * boundaries. Assert resolution handles the badness coming from asserts. * The only other source of trouble is startDs->accept connections. */ NFAEdge orig = edge(g.startDs, g.accept, g); - if (g.utf8 && orig) { - DEBUG_PRINTF("rectifying %u\n", g.reportId); - Report ir = rm.getBasicInternalReport(g); + if (expr.utf8 && orig) { + DEBUG_PRINTF("rectifying %u\n", expr.report); + Report ir = rm.getBasicInternalReport(expr); ReportID rep = rm.getInternalId(ir); NFAVertex v_a = add_vertex(g); diff --git a/src/nfagraph/ng_asserts.h b/src/nfagraph/ng_asserts.h index 8183490a..2534f571 100644 --- a/src/nfagraph/ng_asserts.h +++ b/src/nfagraph/ng_asserts.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,12 +36,14 @@ namespace ue2 { struct BoundaryReports; -class NGWrapper; +class ExpressionInfo; +class NGHolder; class ReportManager; -void resolveAsserts(ReportManager &rm, NGWrapper &g); +void resolveAsserts(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr); -void ensureCodePointStart(ReportManager &rm, NGWrapper &g); +void ensureCodePointStart(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr); } // namespace ue2 diff --git a/src/nfagraph/ng_builder.cpp b/src/nfagraph/ng_builder.cpp index 385e114f..60f667f4 100644 --- a/src/nfagraph/ng_builder.cpp +++ b/src/nfagraph/ng_builder.cpp @@ -28,11 +28,13 @@ /** \file * \brief: NFA Graph Builder: used by Glushkov construction to construct an - * NGWrapper from a parsed expression. + * NGHolder from a parsed expression. */ + +#include "ng_builder.h" + #include "grey.h" #include "ng.h" -#include "ng_builder.h" #include "ng_util.h" #include "ue2common.h" #include "compiler/compiler.h" // for ParsedExpression @@ -79,7 +81,7 @@ public: void cloneRegion(Position first, Position last, unsigned posOffset) override; - unique_ptr getGraph() override; + BuiltExpression getGraph() override; private: /** fetch a vertex given its Position ID. */ @@ -94,8 +96,11 @@ private: /** \brief Greybox: used for resource limits. */ const Grey &grey; - /** \brief Underlying NGWrapper graph. */ - unique_ptr graph; + /** \brief Underlying graph. */ + unique_ptr graph; + + /** \brief Underlying expression info. */ + ExpressionInfo expr; /** \brief mapping from position to vertex. Use \ref getVertex for access. * */ @@ -108,13 +113,9 @@ private: } // namespace NFABuilderImpl::NFABuilderImpl(ReportManager &rm_in, const Grey &grey_in, - const ParsedExpression &expr) - : rm(rm_in), grey(grey_in), - graph(ue2::make_unique( - expr.index, expr.highlander, expr.utf8, expr.prefilter, expr.som, - expr.id, expr.min_offset, expr.max_offset, expr.min_length, - expr.edit_distance)), - vertIdx(N_SPECIALS) { + const ParsedExpression &parsed) + : rm(rm_in), grey(grey_in), graph(ue2::make_unique()), + expr(parsed.expr), vertIdx(N_SPECIALS) { // Reserve space for a reasonably-sized NFA id2vertex.reserve(64); @@ -151,7 +152,7 @@ void NFABuilderImpl::addVertex(Position pos) { (*graph)[v].index = pos; } -unique_ptr NFABuilderImpl::getGraph() { +BuiltExpression NFABuilderImpl::getGraph() { DEBUG_PRINTF("built graph has %zu vertices and %zu edges\n", num_vertices(*graph), num_edges(*graph)); @@ -162,13 +163,13 @@ unique_ptr NFABuilderImpl::getGraph() { throw CompileError("Pattern too large."); } - return move(graph); + return { expr, move(graph) }; } void NFABuilderImpl::setNodeReportID(Position pos, int offsetAdjust) { - Report ir = rm.getBasicInternalReport(*graph, offsetAdjust); + Report ir = rm.getBasicInternalReport(expr, offsetAdjust); DEBUG_PRINTF("setting report id on %u = (%u, %d, %u)\n", - pos, graph->reportId, offsetAdjust, ir.ekey); + pos, expr.report, offsetAdjust, ir.ekey); NFAVertex v = getVertex(pos); auto &reports = (*graph)[v].reports; diff --git a/src/nfagraph/ng_builder.h b/src/nfagraph/ng_builder.h index 5bd95ba9..df2e0dd8 100644 --- a/src/nfagraph/ng_builder.h +++ b/src/nfagraph/ng_builder.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -28,7 +28,7 @@ /** \file * \brief: NFA Graph Builder: used by Glushkov construction to construct an - * NGWrapper from a parsed expression. + * NGHolder from a parsed expression. */ #ifndef NG_BUILDER_H @@ -44,8 +44,8 @@ namespace ue2 { class CharReach; -class NGWrapper; class ReportManager; +struct BuiltExpression; struct CompileContext; class ParsedExpression; @@ -83,10 +83,10 @@ public: unsigned posOffset) = 0; /** - * \brief Returns the built NGWrapper graph. + * \brief Returns the built NGHolder graph and ExpressionInfo. * Note that this builder cannot be used after this call. */ - virtual std::unique_ptr getGraph() = 0; + virtual BuiltExpression getGraph() = 0; }; /** Construct a usable NFABuilder. */ diff --git a/src/nfagraph/ng_dump.cpp b/src/nfagraph/ng_dump.cpp index fc840f25..094d2401 100644 --- a/src/nfagraph/ng_dump.cpp +++ b/src/nfagraph/ng_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,24 +35,25 @@ #include "config.h" -#include "ng_dump.h" +#include "nfagraph/ng_dump.h" -#include "hwlm/hwlm_build.h" -#include "ng.h" -#include "ng_util.h" -#include "parser/position.h" +#include "hs_compile.h" /* for HS_MODE_* flags */ #include "ue2common.h" +#include "compiler/compiler.h" +#include "hwlm/hwlm_build.h" #include "nfa/accel.h" #include "nfa/nfa_internal.h" // for MO_INVALID_IDX -#include "smallwrite/smallwrite_dump.h" +#include "nfagraph/ng.h" +#include "nfagraph/ng_util.h" +#include "parser/position.h" #include "rose/rose_build.h" #include "rose/rose_internal.h" +#include "smallwrite/smallwrite_dump.h" #include "util/bitutils.h" #include "util/dump_charclass.h" #include "util/report.h" #include "util/report_manager.h" #include "util/ue2string.h" -#include "hs_compile.h" /* for HS_MODE_* flags */ #include #include @@ -287,13 +288,13 @@ void dumpGraphImpl(const char *name, const GraphT &g, // manual instantiation of templated dumpGraph above. template void dumpGraphImpl(const char *, const NGHolder &); -void dumpDotWrapperImpl(const NGWrapper &nw, const char *name, - const Grey &grey) { +void dumpDotWrapperImpl(const NGHolder &g, const ExpressionInfo &expr, + const char *name, const Grey &grey) { if (grey.dumpFlags & Grey::DUMP_INT_GRAPH) { stringstream ss; - ss << grey.dumpPath << "Expr_" << nw.expressionIndex << "_" << name << ".dot"; + ss << grey.dumpPath << "Expr_" << expr.index << "_" << name << ".dot"; DEBUG_PRINTF("dumping dot graph to '%s'\n", ss.str().c_str()); - dumpGraphImpl(ss.str().c_str(), nw); + dumpGraphImpl(ss.str().c_str(), g); } } diff --git a/src/nfagraph/ng_dump.h b/src/nfagraph/ng_dump.h index b20d9f1b..077f07ce 100644 --- a/src/nfagraph/ng_dump.h +++ b/src/nfagraph/ng_dump.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -48,7 +48,7 @@ namespace ue2 { class NGHolder; class NG; -class NGWrapper; +class ExpressionInfo; class ReportManager; // Implementations for stubs below -- all have the suffix "Impl". @@ -61,7 +61,8 @@ void dumpGraphImpl(const char *name, const GraphT &g); template void dumpGraphImpl(const char *name, const GraphT &g, const ReportManager &rm); -void dumpDotWrapperImpl(const NGWrapper &w, const char *name, const Grey &grey); +void dumpDotWrapperImpl(const NGHolder &g, const ExpressionInfo &expr, + const char *name, const Grey &grey); void dumpComponentImpl(const NGHolder &g, const char *name, u32 expr, u32 comp, const Grey &grey); @@ -88,10 +89,10 @@ static inline void dumpGraph(UNUSED const char *name, UNUSED const GraphT &g) { // Stubs which call through to dump code if compiled in. UNUSED static inline -void dumpDotWrapper(UNUSED const NGWrapper &w, UNUSED const char *name, - UNUSED const Grey &grey) { +void dumpDotWrapper(UNUSED const NGHolder &g, UNUSED const ExpressionInfo &expr, + UNUSED const char *name, UNUSED const Grey &grey) { #ifdef DUMP_SUPPORT - dumpDotWrapperImpl(w, name, grey); + dumpDotWrapperImpl(g, expr, name, grey); #endif } diff --git a/src/nfagraph/ng_expr_info.cpp b/src/nfagraph/ng_expr_info.cpp index 7419609b..1f601c61 100644 --- a/src/nfagraph/ng_expr_info.cpp +++ b/src/nfagraph/ng_expr_info.cpp @@ -27,8 +27,8 @@ */ /** \file - * \brief Code for discovering properties of an NGWrapper used by - * hs_expression_info. + * \brief Code for discovering properties of an NFA graph used by + * hs_expression_info(). */ #include "ng_expr_info.h" @@ -58,42 +58,42 @@ namespace ue2 { /* get rid of leading \b and multiline ^ vertices */ static -void removeLeadingVirtualVerticesFromRoot(NGWrapper &w, NFAVertex root) { +void removeLeadingVirtualVerticesFromRoot(NGHolder &g, NFAVertex root) { vector victims; - for (auto v : adjacent_vertices_range(root, w)) { - if (w[v].assert_flags & POS_FLAG_VIRTUAL_START) { + for (auto v : adjacent_vertices_range(root, g)) { + if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) { DEBUG_PRINTF("(?m)^ vertex or leading \\[bB] vertex\n"); victims.push_back(v); } } for (auto u : victims) { - for (auto v : adjacent_vertices_range(u, w)) { - add_edge_if_not_present(root, v, w); + for (auto v : adjacent_vertices_range(u, g)) { + add_edge_if_not_present(root, v, g); } } - remove_vertices(victims, w); + remove_vertices(victims, g); } static -void checkVertex(const ReportManager &rm, const NGWrapper &w, NFAVertex v, +void checkVertex(const ReportManager &rm, const NGHolder &g, NFAVertex v, const vector &depths, DepthMinMax &info) { - if (is_any_accept(v, w)) { + if (is_any_accept(v, g)) { return; } - if (is_any_start(v, w)) { + if (is_any_start(v, g)) { info.min = 0; info.max = max(info.max, depth(0)); return; } - u32 idx = w[v].index; + u32 idx = g[v].index; assert(idx < depths.size()); const DepthMinMax &d = depths.at(idx); - for (ReportID report_id : w[v].reports) { + for (ReportID report_id : g[v].reports) { const Report &report = rm.getReport(report_id); assert(report.type == EXTERNAL_CALLBACK); @@ -118,7 +118,7 @@ void checkVertex(const ReportManager &rm, const NGWrapper &w, NFAVertex v, rd.max = min(rd.max, max_offset); } - DEBUG_PRINTF("vertex %zu report %u: %s\n", w[v].index, report_id, + DEBUG_PRINTF("vertex %zu report %u: %s\n", g[v].index, report_id, rd.str().c_str()); info = unionDepthMinMax(info, rd); @@ -126,8 +126,8 @@ void checkVertex(const ReportManager &rm, const NGWrapper &w, NFAVertex v, } static -bool hasOffsetAdjust(const ReportManager &rm, const NGWrapper &w) { - for (const auto &report_id : all_reports(w)) { +bool hasOffsetAdjust(const ReportManager &rm, const NGHolder &g) { + for (const auto &report_id : all_reports(g)) { if (rm.getReport(report_id).offsetAdjust) { return true; } @@ -135,28 +135,29 @@ bool hasOffsetAdjust(const ReportManager &rm, const NGWrapper &w) { return false; } -void fillExpressionInfo(ReportManager &rm, NGWrapper &w, hs_expr_info *info) { +void fillExpressionInfo(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr, hs_expr_info *info) { assert(info); /* ensure utf8 starts at cp boundary */ - ensureCodePointStart(rm, w); - resolveAsserts(rm, w); - optimiseVirtualStarts(w); + ensureCodePointStart(rm, g, expr); + resolveAsserts(rm, g, expr); + optimiseVirtualStarts(g); - removeLeadingVirtualVerticesFromRoot(w, w.start); - removeLeadingVirtualVerticesFromRoot(w, w.startDs); + removeLeadingVirtualVerticesFromRoot(g, g.start); + removeLeadingVirtualVerticesFromRoot(g, g.startDs); vector depths; - calcDepthsFrom(w, w.start, depths); + calcDepthsFrom(g, g.start, depths); DepthMinMax d; - for (auto u : inv_adjacent_vertices_range(w.accept, w)) { - checkVertex(rm, w, u, depths, d); + for (auto u : inv_adjacent_vertices_range(g.accept, g)) { + checkVertex(rm, g, u, depths, d); } - for (auto u : inv_adjacent_vertices_range(w.acceptEod, w)) { - checkVertex(rm, w, u, depths, d); + for (auto u : inv_adjacent_vertices_range(g.acceptEod, g)) { + checkVertex(rm, g, u, depths, d); } if (d.max.is_finite()) { @@ -170,9 +171,9 @@ void fillExpressionInfo(ReportManager &rm, NGWrapper &w, hs_expr_info *info) { info->min_width = UINT_MAX; } - info->unordered_matches = hasOffsetAdjust(rm, w); - info->matches_at_eod = can_match_at_eod(w); - info->matches_only_at_eod = can_only_match_at_eod(w); + info->unordered_matches = hasOffsetAdjust(rm, g); + info->matches_at_eod = can_match_at_eod(g); + info->matches_only_at_eod = can_only_match_at_eod(g); } } // namespace ue2 diff --git a/src/nfagraph/ng_expr_info.h b/src/nfagraph/ng_expr_info.h index dcc5a419..e518738c 100644 --- a/src/nfagraph/ng_expr_info.h +++ b/src/nfagraph/ng_expr_info.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -27,7 +27,7 @@ */ /** \file - * \brief Code for discovering properties of an NGWrapper used by + * \brief Code for discovering properties of an expression used by * hs_expression_info. */ @@ -36,14 +36,14 @@ struct hs_expr_info; -#include "ue2common.h" - namespace ue2 { -class NGWrapper; +class ExpressionInfo; +class NGHolder; class ReportManager; -void fillExpressionInfo(ReportManager &rm, NGWrapper &w, hs_expr_info *info); +void fillExpressionInfo(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr, hs_expr_info *info); } // namespace ue2 diff --git a/src/nfagraph/ng_extparam.cpp b/src/nfagraph/ng_extparam.cpp index a504ac50..31a1f81b 100644 --- a/src/nfagraph/ng_extparam.cpp +++ b/src/nfagraph/ng_extparam.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,16 +38,19 @@ * match given these constraints, or transform the graph in order to make a * constraint implicit. */ + +#include "ng_extparam.h" + #include "ng.h" #include "ng_depth.h" #include "ng_dump.h" -#include "ng_extparam.h" #include "ng_prune.h" #include "ng_reports.h" #include "ng_som_util.h" #include "ng_width.h" #include "ng_util.h" #include "ue2common.h" +#include "compiler/compiler.h" #include "parser/position.h" #include "util/compile_context.h" #include "util/compile_error.h" @@ -129,7 +132,8 @@ DepthMinMax findMatchLengths(const ReportManager &rm, const NGHolder &g) { /** \brief Replace the graph's reports with new reports that specify bounds. */ static -void updateReportBounds(ReportManager &rm, NGWrapper &g, NFAVertex accept, +void updateReportBounds(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr, NFAVertex accept, set &done) { for (auto v : inv_adjacent_vertices_range(accept, g)) { // Don't operate on g.accept itself. @@ -153,16 +157,16 @@ void updateReportBounds(ReportManager &rm, NGWrapper &g, NFAVertex accept, // Note that we need to cope with offset adjustment here. - ir.minOffset = g.min_offset - ir.offsetAdjust; - if (g.max_offset == MAX_OFFSET) { + ir.minOffset = expr.min_offset - ir.offsetAdjust; + if (expr.max_offset == MAX_OFFSET) { ir.maxOffset = MAX_OFFSET; } else { - ir.maxOffset = g.max_offset - ir.offsetAdjust; + ir.maxOffset = expr.max_offset - ir.offsetAdjust; } assert(ir.maxOffset >= ir.minOffset); - ir.minLength = g.min_length; - if (g.min_length && !g.som) { + ir.minLength = expr.min_length; + if (expr.min_length && !expr.som) { ir.quashSom = true; } @@ -196,22 +200,23 @@ bool hasVirtualStarts(const NGHolder &g) { * anchored and unanchored paths, but it's too tricky for the moment. */ static -bool anchorPatternWithBoundedRepeat(NGWrapper &g, const depth &minWidth, +bool anchorPatternWithBoundedRepeat(NGHolder &g, const ExpressionInfo &expr, + const depth &minWidth, const depth &maxWidth) { - assert(!g.som); - assert(g.max_offset != MAX_OFFSET); + assert(!expr.som); + assert(expr.max_offset != MAX_OFFSET); assert(minWidth <= maxWidth); assert(maxWidth.is_reachable()); DEBUG_PRINTF("widths=[%s,%s], min/max offsets=[%llu,%llu]\n", - minWidth.str().c_str(), maxWidth.str().c_str(), g.min_offset, - g.max_offset); + minWidth.str().c_str(), maxWidth.str().c_str(), + expr.min_offset, expr.max_offset); - if (g.max_offset > MAX_MAXOFFSET_TO_ANCHOR) { + if (expr.max_offset > MAX_MAXOFFSET_TO_ANCHOR) { return false; } - if (g.max_offset < minWidth) { + if (expr.max_offset < minWidth) { assert(0); return false; } @@ -232,10 +237,10 @@ bool anchorPatternWithBoundedRepeat(NGWrapper &g, const depth &minWidth, u32 min_bound, max_bound; if (maxWidth.is_infinite()) { min_bound = 0; - max_bound = g.max_offset - minWidth; + max_bound = expr.max_offset - minWidth; } else { - min_bound = g.min_offset > maxWidth ? g.min_offset - maxWidth : 0; - max_bound = g.max_offset - minWidth; + min_bound = expr.min_offset > maxWidth ? expr.min_offset - maxWidth : 0; + max_bound = expr.max_offset - minWidth; } DEBUG_PRINTF("prepending ^.{%u,%u}\n", min_bound, max_bound); @@ -315,7 +320,7 @@ NFAVertex findSingleCyclic(const NGHolder &g) { } static -bool hasOffsetAdjust(const ReportManager &rm, NGWrapper &g, +bool hasOffsetAdjust(const ReportManager &rm, NGHolder &g, int *adjust) { const auto &reports = all_reports(g); if (reports.empty()) { @@ -342,10 +347,11 @@ bool hasOffsetAdjust(const ReportManager &rm, NGWrapper &g, * /foo.*bar/{min_length=100} --> /foo.{94,}bar/ */ static -bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { - assert(g.min_length); +bool transformMinLengthToRepeat(const ReportManager &rm, NGHolder &g, + ExpressionInfo &expr) { + assert(expr.min_length); - if (g.min_length > MAX_MINLENGTH_TO_CONVERT) { + if (expr.min_length > MAX_MINLENGTH_TO_CONVERT) { return false; } @@ -437,10 +443,10 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { DEBUG_PRINTF("width=%u, vertex %zu is cyclic\n", width, g[cyclic].index); - if (width >= g.min_length) { + if (width >= expr.min_length) { DEBUG_PRINTF("min_length=%llu is guaranteed, as width=%u\n", - g.min_length, width); - g.min_length = 0; + expr.min_length, width); + expr.min_length = 0; return true; } @@ -468,7 +474,7 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { const CharReach &cr = g[cyclic].char_reach; - for (u32 i = 0; i < g.min_length - width - 1; ++i) { + for (u32 i = 0; i < expr.min_length - width - 1; ++i) { v = add_vertex(g); g[v].char_reach = cr; @@ -487,19 +493,19 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { renumber_edges(g); clearReports(g); - g.min_length = 0; + expr.min_length = 0; return true; } static -bool hasExtParams(const NGWrapper &g) { - if (g.min_length != 0) { +bool hasExtParams(const ExpressionInfo &expr) { + if (expr.min_length != 0) { return true; } - if (g.min_offset != 0) { + if (expr.min_offset != 0) { return true; } - if (g.max_offset != MAX_OFFSET) { + if (expr.max_offset != MAX_OFFSET) { return true; } return false; @@ -535,7 +541,7 @@ const depth& minDistToAccept(const NFAVertexBidiDepth &d) { } static -bool isEdgePrunable(const NGWrapper &g, +bool isEdgePrunable(const NGHolder &g, const ExpressionInfo &expr, const vector &depths, const NFAEdge &e) { const NFAVertex u = source(e, g); @@ -564,29 +570,29 @@ bool isEdgePrunable(const NGWrapper &g, const NFAVertexBidiDepth &du = depths.at(u_idx); const NFAVertexBidiDepth &dv = depths.at(v_idx); - if (g.min_offset) { + if (expr.min_offset) { depth max_offset = maxDistFromStart(du) + maxDistToAccept(dv); - if (max_offset.is_finite() && max_offset < g.min_offset) { + if (max_offset.is_finite() && max_offset < expr.min_offset) { DEBUG_PRINTF("max_offset=%s too small\n", max_offset.str().c_str()); return true; } } - if (g.max_offset != MAX_OFFSET) { + if (expr.max_offset != MAX_OFFSET) { depth min_offset = minDistFromStart(du) + minDistToAccept(dv); assert(min_offset.is_finite()); - if (min_offset > g.max_offset) { + if (min_offset > expr.max_offset) { DEBUG_PRINTF("min_offset=%s too large\n", min_offset.str().c_str()); return true; } } - if (g.min_length && is_any_accept(v, g)) { + if (expr.min_length && is_any_accept(v, g)) { // Simple take on min_length. If we're an edge to accept and our max // dist from start is too small, we can be pruned. const depth &width = du.fromStart.max; - if (width.is_finite() && width < g.min_length) { + if (width.is_finite() && width < expr.min_length) { DEBUG_PRINTF("max width %s from start too small for min_length\n", width.str().c_str()); return true; @@ -597,14 +603,14 @@ bool isEdgePrunable(const NGWrapper &g, } static -void pruneExtUnreachable(NGWrapper &g) { +void pruneExtUnreachable(NGHolder &g, const ExpressionInfo &expr) { vector depths; calcDepths(g, depths); vector dead; for (const auto &e : edges_range(g)) { - if (isEdgePrunable(g, depths, e)) { + if (isEdgePrunable(g, expr, depths, e)) { DEBUG_PRINTF("pruning\n"); dead.push_back(e); } @@ -621,8 +627,8 @@ void pruneExtUnreachable(NGWrapper &g) { /** Remove vacuous edges in graphs where the min_offset or min_length * constraints dictate that they can never produce a match. */ static -void pruneVacuousEdges(NGWrapper &g) { - if (!g.min_length && !g.min_offset) { +void pruneVacuousEdges(NGHolder &g, const ExpressionInfo &expr) { + if (!expr.min_length && !expr.min_offset) { return; } @@ -634,14 +640,14 @@ void pruneVacuousEdges(NGWrapper &g) { // Special case: Crudely remove vacuous edges from start in graphs with a // min_offset. - if (g.min_offset && u == g.start && is_any_accept(v, g)) { + if (expr.min_offset && u == g.start && is_any_accept(v, g)) { DEBUG_PRINTF("vacuous edge in graph with min_offset!\n"); dead.push_back(e); continue; } // If a min_length is set, vacuous edges can be removed. - if (g.min_length && is_any_start(u, g) && is_any_accept(v, g)) { + if (expr.min_length && is_any_start(u, g) && is_any_accept(v, g)) { DEBUG_PRINTF("vacuous edge in graph with min_length!\n"); dead.push_back(e); continue; @@ -657,7 +663,8 @@ void pruneVacuousEdges(NGWrapper &g) { } static -void pruneUnmatchable(NGWrapper &g, const vector &depths, +void pruneUnmatchable(NGHolder &g, const ExpressionInfo &expr, + const vector &depths, const ReportManager &rm, NFAVertex accept) { vector dead; @@ -676,16 +683,16 @@ void pruneUnmatchable(NGWrapper &g, const vector &depths, d.min += adj.first; d.max += adj.second; - if (d.max.is_finite() && d.max < g.min_length) { + if (d.max.is_finite() && d.max < expr.min_length) { DEBUG_PRINTF("prune, max match length %s < min_length=%llu\n", - d.max.str().c_str(), g.min_length); + d.max.str().c_str(), expr.min_length); dead.push_back(e); continue; } - if (g.max_offset != MAX_OFFSET && d.min > g.max_offset) { + if (expr.max_offset != MAX_OFFSET && d.min > expr.max_offset) { DEBUG_PRINTF("prune, min match length %s > max_offset=%llu\n", - d.min.str().c_str(), g.max_offset); + d.min.str().c_str(), expr.max_offset); dead.push_back(e); continue; } @@ -697,15 +704,16 @@ void pruneUnmatchable(NGWrapper &g, const vector &depths, /** Remove edges to accepts that can never produce a match long enough to * satisfy our min_length and max_offset constraints. */ static -void pruneUnmatchable(NGWrapper &g, const ReportManager &rm) { - if (!g.min_length) { +void pruneUnmatchable(NGHolder &g, const ExpressionInfo &expr, + const ReportManager &rm) { + if (!expr.min_length) { return; } vector depths = getDistancesFromSOM(g); - pruneUnmatchable(g, depths, rm, g.accept); - pruneUnmatchable(g, depths, rm, g.acceptEod); + pruneUnmatchable(g, expr, depths, rm, g.accept); + pruneUnmatchable(g, expr, depths, rm, g.acceptEod); pruneUseless(g); } @@ -732,9 +740,9 @@ bool hasOffsetAdjustments(const ReportManager &rm, const NGHolder &g) { return false; } -void handleExtendedParams(ReportManager &rm, NGWrapper &g, +void handleExtendedParams(ReportManager &rm, NGHolder &g, ExpressionInfo &expr, UNUSED const CompileContext &cc) { - if (!hasExtParams(g)) { + if (!hasExtParams(expr)) { return; } @@ -751,50 +759,50 @@ void handleExtendedParams(ReportManager &rm, NGWrapper &g, DepthMinMax match_depths = findMatchLengths(rm, g); DEBUG_PRINTF("match depths %s\n", match_depths.str().c_str()); - if (is_anchored && maxWidth.is_finite() && g.min_offset > maxWidth) { + if (is_anchored && maxWidth.is_finite() && expr.min_offset > maxWidth) { ostringstream oss; oss << "Expression is anchored and cannot satisfy min_offset=" - << g.min_offset << " as it can only produce matches of length " + << expr.min_offset << " as it can only produce matches of length " << maxWidth << " bytes at most."; - throw CompileError(g.expressionIndex, oss.str()); + throw CompileError(expr.index, oss.str()); } - if (minWidth > g.max_offset) { + if (minWidth > expr.max_offset) { ostringstream oss; - oss << "Expression has max_offset=" << g.max_offset << " but requires " - << minWidth << " bytes to match."; - throw CompileError(g.expressionIndex, oss.str()); + oss << "Expression has max_offset=" << expr.max_offset + << " but requires " << minWidth << " bytes to match."; + throw CompileError(expr.index, oss.str()); } - if (maxWidth.is_finite() && match_depths.max < g.min_length) { + if (maxWidth.is_finite() && match_depths.max < expr.min_length) { ostringstream oss; - oss << "Expression has min_length=" << g.min_length << " but can " + oss << "Expression has min_length=" << expr.min_length << " but can " "only produce matches of length " << match_depths.max << " bytes at most."; - throw CompileError(g.expressionIndex, oss.str()); + throw CompileError(expr.index, oss.str()); } - if (g.min_length && g.min_length <= match_depths.min) { + if (expr.min_length && expr.min_length <= match_depths.min) { DEBUG_PRINTF("min_length=%llu constraint is unnecessary\n", - g.min_length); - g.min_length = 0; + expr.min_length); + expr.min_length = 0; } - if (!hasExtParams(g)) { + if (!hasExtParams(expr)) { return; } - pruneVacuousEdges(g); - pruneUnmatchable(g, rm); + pruneVacuousEdges(g, expr); + pruneUnmatchable(g, expr, rm); if (!has_offset_adj) { - pruneExtUnreachable(g); + pruneExtUnreachable(g, expr); } // We may have removed all the edges to accept, in which case this // expression cannot match. if (in_degree(g.accept, g) == 0 && in_degree(g.acceptEod, g) == 1) { - throw CompileError(g.expressionIndex, "Extended parameter " + throw CompileError(expr.index, "Extended parameter " "constraints can not be satisfied for any match from " "this expression."); } @@ -812,27 +820,28 @@ void handleExtendedParams(ReportManager &rm, NGWrapper &g, // If the pattern is completely anchored and has a min_length set, this can // be converted to a min_offset. - if (g.min_length && (g.min_offset <= g.min_length) && is_anchored) { - DEBUG_PRINTF("converting min_length to min_offset=%llu for " - "anchored case\n", g.min_length); - g.min_offset = g.min_length; - g.min_length = 0; + if (expr.min_length && (expr.min_offset <= expr.min_length) && + is_anchored) { + DEBUG_PRINTF("convertinexpr.min_length to min_offset=%llu for " + "anchored case\n", expr.min_length); + expr.min_offset = expr.min_length; + expr.min_length = 0; } - if (g.min_offset && g.min_offset <= minWidth && !has_offset_adj) { + if (expr.min_offset && expr.min_offset <= minWidth && !has_offset_adj) { DEBUG_PRINTF("min_offset=%llu constraint is unnecessary\n", - g.min_offset); - g.min_offset = 0; + expr.min_offset); + expr.min_offset = 0; } - if (!hasExtParams(g)) { + if (!hasExtParams(expr)) { return; } // If the pattern has a min_length and is of "ratchet" form with one // unbounded repeat, that repeat can become a bounded repeat. // e.g. /foo.*bar/{min_length=100} --> /foo.{94,}bar/ - if (g.min_length && transformMinLengthToRepeat(rm, g)) { + if (expr.min_length && transformMinLengthToRepeat(rm, g, expr)) { DEBUG_PRINTF("converted min_length to bounded repeat\n"); // recalc minWidth = findMinWidth(g); @@ -846,28 +855,28 @@ void handleExtendedParams(ReportManager &rm, NGWrapper &g, // Note that it is possible to handle graphs that have a combination of // anchored and unanchored paths, but it's too tricky for the moment. - if (g.max_offset != MAX_OFFSET && !g.som && !g.min_length && - !has_offset_adj && isUnanchored(g)) { - if (anchorPatternWithBoundedRepeat(g, minWidth, maxWidth)) { + if (expr.max_offset != MAX_OFFSET && !expr.som && !expr.min_length && + !has_offset_adj && isUnanchored(g)) { + if (anchorPatternWithBoundedRepeat(g, expr, minWidth, maxWidth)) { DEBUG_PRINTF("minWidth=%s, maxWidth=%s\n", minWidth.str().c_str(), maxWidth.str().c_str()); if (minWidth == maxWidth) { // For a fixed width pattern, we can retire the offsets as they // are implicit in the graph now. - g.min_offset = 0; - g.max_offset = MAX_OFFSET; + expr.min_offset = 0; + expr.max_offset = MAX_OFFSET; } } } //dumpGraph("final.dot", g); - if (!hasExtParams(g)) { + if (!hasExtParams(expr)) { return; } set done; - updateReportBounds(rm, g, g.accept, done); - updateReportBounds(rm, g, g.acceptEod, done); + updateReportBounds(rm, g, expr, g.accept, done); + updateReportBounds(rm, g, expr, g.acceptEod, done); } } // namespace ue2 diff --git a/src/nfagraph/ng_extparam.h b/src/nfagraph/ng_extparam.h index d5df1cf6..798acd3f 100644 --- a/src/nfagraph/ng_extparam.h +++ b/src/nfagraph/ng_extparam.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,10 +37,11 @@ namespace ue2 { struct CompileContext; -class NGWrapper; +class ExpressionInfo; +class NGHolder; class ReportManager; -void handleExtendedParams(ReportManager &rm, NGWrapper &g, +void handleExtendedParams(ReportManager &rm, NGHolder &g, ExpressionInfo &expr, const CompileContext &cc); } // namespace ue2 diff --git a/src/nfagraph/ng_literal_component.cpp b/src/nfagraph/ng_literal_component.cpp index e3cfe867..de05e490 100644 --- a/src/nfagraph/ng_literal_component.cpp +++ b/src/nfagraph/ng_literal_component.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,12 +30,15 @@ * \brief Literal Component Splitting. Identifies literals that span the * graph and moves them into Rose. */ + +#include "ng_literal_component.h" + #include "grey.h" #include "ng.h" -#include "ng_literal_component.h" #include "ng_prune.h" #include "ng_util.h" #include "ue2common.h" +#include "compiler/compiler.h" #include "rose/rose_build.h" #include "util/container.h" #include "util/graph.h" @@ -47,8 +50,8 @@ using namespace std; namespace ue2 { static -bool isLiteralChar(const NGWrapper &g, NFAVertex v, - bool &nocase, bool &casefixed) { +bool isLiteralChar(const NGHolder &g, NFAVertex v, bool &nocase, + bool &casefixed) { const CharReach &cr = g[v].char_reach; const size_t num = cr.count(); if (num > 2) { @@ -93,7 +96,7 @@ void addToString(string &s, const NGHolder &g, NFAVertex v) { } static -bool splitOffLiteral(NG &ng, NGWrapper &g, NFAVertex v, const bool anchored, +bool splitOffLiteral(NG &ng, NGHolder &g, NFAVertex v, const bool anchored, set &dead) { DEBUG_PRINTF("examine vertex %zu\n", g[v].index); bool nocase = false, casefixed = false; @@ -185,7 +188,7 @@ bool splitOffLiteral(NG &ng, NGWrapper &g, NFAVertex v, const bool anchored, } /** \brief Split off literals. True if any changes were made to the graph. */ -bool splitOffLiterals(NG &ng, NGWrapper &g) { +bool splitOffLiterals(NG &ng, NGHolder &g) { if (!ng.cc.grey.allowLiteral) { return false; } diff --git a/src/nfagraph/ng_literal_component.h b/src/nfagraph/ng_literal_component.h index dc177c40..1f284ce3 100644 --- a/src/nfagraph/ng_literal_component.h +++ b/src/nfagraph/ng_literal_component.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,10 +37,10 @@ namespace ue2 { class NG; -class NGWrapper; +class NGHolder; /** \brief Split off literals. True if any changes were made to the graph. */ -bool splitOffLiterals(NG &ng, NGWrapper &graph); +bool splitOffLiterals(NG &ng, NGHolder &g); } // namespace ue2 diff --git a/src/nfagraph/ng_som.cpp b/src/nfagraph/ng_som.cpp index 8d3d75a3..5bf52915 100644 --- a/src/nfagraph/ng_som.cpp +++ b/src/nfagraph/ng_som.cpp @@ -29,6 +29,9 @@ /** \file * \brief SOM ("Start of Match") analysis. */ + +#include "ng_som.h" + #include "ng.h" #include "ng_dump.h" #include "ng_equivalence.h" @@ -40,7 +43,6 @@ #include "ng_redundancy.h" #include "ng_region.h" #include "ng_reports.h" -#include "ng_som.h" #include "ng_som_add_redundancy.h" #include "ng_som_util.h" #include "ng_split.h" @@ -49,6 +51,7 @@ #include "ng_width.h" #include "grey.h" #include "ue2common.h" +#include "compiler/compiler.h" #include "nfa/goughcompile.h" #include "nfa/nfa_internal.h" // for MO_INVALID_IDX #include "parser/position.h" @@ -1584,8 +1587,9 @@ void dumpSomPlan(UNUSED const NGHolder &g, UNUSED const som_plan &p, * implement the full pattern. */ static -void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g, - vector &plan, const u32 first_som_slot) { +void implementSomPlan(NG &ng, const ExpressionInfo &expr, u32 comp_id, + NGHolder &g, vector &plan, + const u32 first_som_slot) { ReportManager &rm = ng.rm; SomSlotManager &ssm = ng.ssm; @@ -1598,14 +1602,14 @@ void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g, // Root plan, which already has a SOM slot assigned (first_som_slot). dumpSomPlan(g, plan.front(), 0); - dumpSomSubComponent(*plan.front().prefix, "04_som", w.expressionIndex, - comp_id, 0, ng.cc.grey); + dumpSomSubComponent(*plan.front().prefix, "04_som", expr.index, comp_id, 0, + ng.cc.grey); assert(plan.front().prefix); if (plan.front().escapes.any() && !plan.front().is_reset) { /* setup escaper for first som location */ if (!createEscaper(ng, *plan.front().prefix, plan.front().escapes, first_som_slot)) { - throw CompileError(w.expressionIndex, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } } @@ -1617,7 +1621,7 @@ void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g, for (++it; it != plan.end(); ++it) { const u32 plan_num = it - plan.begin(); dumpSomPlan(g, *it, plan_num); - dumpSomSubComponent(*it->prefix, "04_som", w.expressionIndex, comp_id, + dumpSomSubComponent(*it->prefix, "04_som", expr.index, comp_id, plan_num, ng.cc.grey); assert(it->parent < plan_num); @@ -1628,7 +1632,7 @@ void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g, assert(!it->no_implement); if (!buildMidfix(ng, *it, som_slot_in, som_slot_out)) { - throw CompileError(w.expressionIndex, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } updateReportToUseRecordedSom(rm, g, it->reporters_in, som_slot_in); updateReportToUseRecordedSom(rm, g, it->reporters, som_slot_out); @@ -1639,7 +1643,7 @@ void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g, renumber_vertices(*plan.front().prefix); assert(plan.front().prefix->kind == NFA_OUTFIX); if (!ng.addHolder(*plan.front().prefix)) { - throw CompileError(w.expressionIndex, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } } } @@ -1852,7 +1856,7 @@ bool doSomRevNfa(NG &ng, NGHolder &g, const CompileContext &cc) { } static -u32 doSomRevNfaPrefix(NG &ng, const NGWrapper &w, NGHolder &g, +u32 doSomRevNfaPrefix(NG &ng, const ExpressionInfo &expr, NGHolder &g, const CompileContext &cc) { depth maxWidth = findMaxWidth(g); @@ -1861,7 +1865,7 @@ u32 doSomRevNfaPrefix(NG &ng, const NGWrapper &w, NGHolder &g, auto nfa = makeBareSomRevNfa(g, cc); if (!nfa) { - throw CompileError(w.expressionIndex, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } if (ng.cc.streaming) { @@ -2055,8 +2059,8 @@ void roseAddHaigLiteral(RoseBuild &tb, const shared_ptr &prefix, } static -sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, - som_type som, +sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, + u32 comp_id, som_type som, const ue2::unordered_map ®ions, const map &info, map::const_iterator lower_bound) { @@ -2077,7 +2081,7 @@ sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, // This is an optimisation: if we can't build a Haig from a portion of // the graph, then we won't be able to manage it as an outfix either // when we fall back. - throw CompileError(w.expressionIndex, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } while (1) { @@ -2152,7 +2156,7 @@ sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, goto next_try; } - implementSomPlan(ng, w, comp_id, g, plan, som_loc); + implementSomPlan(ng, expr, comp_id, g, plan, som_loc); Report ir = makeCallback(0U, 0); assert(!plan.empty()); @@ -2877,7 +2881,7 @@ unique_ptr makePrefixForChain(NGHolder &g, return prefix; } -sombe_rv doSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, +sombe_rv doSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, u32 comp_id, som_type som) { assert(som); DEBUG_PRINTF("som hello\n"); @@ -3001,7 +3005,7 @@ sombe_rv doSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, /* create prefix to set the som_loc */ updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET_IF_UNSET); if (prefix_by_rev) { - u32 rev_comp_id = doSomRevNfaPrefix(ng, w, *prefix, cc); + u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc); updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id); } renumber_vertices(*prefix); @@ -3084,18 +3088,18 @@ sombe_rv doSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET); } if (prefix_by_rev && !plan.front().no_implement) { - u32 rev_comp_id = doSomRevNfaPrefix(ng, w, *prefix, cc); + u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc); updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id); } - implementSomPlan(ng, w, comp_id, g, plan, som_loc); + implementSomPlan(ng, expr, comp_id, g, plan, som_loc); DEBUG_PRINTF("success\n"); return SOMBE_HANDLED_INTERNAL; } -sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, - som_type som) { +sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const ExpressionInfo &expr, + u32 comp_id, som_type som) { assert(som); DEBUG_PRINTF("som+haig hello\n"); @@ -3132,7 +3136,7 @@ sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, buildRegionMapping(g, regions, info, true); sombe_rv rv = - doHaigLitSom(ng, g, w, comp_id, som, regions, info, info.begin()); + doHaigLitSom(ng, g, expr, comp_id, som, regions, info, info.begin()); if (rv == SOMBE_FAIL) { clear_graph(g); cloneHolder(g, g_pristine); diff --git a/src/nfagraph/ng_som.h b/src/nfagraph/ng_som.h index 70710945..ecae4c67 100644 --- a/src/nfagraph/ng_som.h +++ b/src/nfagraph/ng_som.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,12 +34,14 @@ #define NG_SOM_H #include "som/som.h" +#include "ue2common.h" namespace ue2 { +class ExpressionInfo; class NG; class NGHolder; -class NGWrapper; +class ReportManager; struct Grey; enum sombe_rv { @@ -63,14 +65,14 @@ enum sombe_rv { * May throw a "Pattern too large" exception if prefixes of the * pattern are too large to compile. */ -sombe_rv doSom(NG &ng, NGHolder &h, const NGWrapper &w, u32 comp_id, +sombe_rv doSom(NG &ng, NGHolder &h, const ExpressionInfo &expr, u32 comp_id, som_type som); /** Returns SOMBE_FAIL (and the original graph) if SOM cannot be established. * May also throw pattern too large if prefixes of the pattern are too large to * compile. */ -sombe_rv doSomWithHaig(NG &ng, NGHolder &h, const NGWrapper &w, u32 comp_id, - som_type som); +sombe_rv doSomWithHaig(NG &ng, NGHolder &h, const ExpressionInfo &expr, + u32 comp_id, som_type som); void makeReportsSomPass(ReportManager &rm, NGHolder &g); diff --git a/src/nfagraph/ng_utf8.cpp b/src/nfagraph/ng_utf8.cpp index 383aa142..89500fe3 100644 --- a/src/nfagraph/ng_utf8.cpp +++ b/src/nfagraph/ng_utf8.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,6 +34,7 @@ #include "ng.h" #include "ng_prune.h" #include "ng_util.h" +#include "compiler/compiler.h" #include "util/graph_range.h" #include "util/unicode_def.h" @@ -45,14 +46,14 @@ using namespace std; namespace ue2 { static -void allowIllegal(NGWrapper &w, NFAVertex v, u8 pred_char) { - if (in_degree(v, w) != 1) { +void allowIllegal(NGHolder &g, NFAVertex v, u8 pred_char) { + if (in_degree(v, g) != 1) { DEBUG_PRINTF("unexpected pred\n"); assert(0); /* should be true due to the early stage of this analysis */ return; } - CharReach &cr = w[v].char_reach; + CharReach &cr = g[v].char_reach; if (pred_char == 0xe0) { assert(cr.isSubsetOf(CharReach(0xa0, 0xbf))); if (cr == CharReach(0xa0, 0xbf)) { @@ -79,8 +80,8 @@ void allowIllegal(NGWrapper &w, NFAVertex v, u8 pred_char) { * above \\x{10ffff} or they represent overlong encodings. As we require valid * UTF-8 input, we have no defined behaviour in these cases, as a result we can * accept them if it simplifies the graph. */ -void relaxForbiddenUtf8(NGWrapper &w) { - if (!w.utf8) { +void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr) { + if (!expr.utf8) { return; } @@ -88,12 +89,12 @@ void relaxForbiddenUtf8(NGWrapper &w) { const CharReach f0(0xf0); const CharReach f4(0xf4); - for (auto v : vertices_range(w)) { - const CharReach &cr = w[v].char_reach; + for (auto v : vertices_range(g)) { + const CharReach &cr = g[v].char_reach; if (cr == e0 || cr == f0 || cr == f4) { u8 pred_char = cr.find_first(); - for (auto t : adjacent_vertices_range(v, w)) { - allowIllegal(w, t, pred_char); + for (auto t : adjacent_vertices_range(v, g)) { + allowIllegal(g, t, pred_char); } } } diff --git a/src/nfagraph/ng_utf8.h b/src/nfagraph/ng_utf8.h index e1b08e40..7c428833 100644 --- a/src/nfagraph/ng_utf8.h +++ b/src/nfagraph/ng_utf8.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,7 +35,7 @@ namespace ue2 { -class NGWrapper; +class ExpressionInfo; class NGHolder; /** \brief Relax forbidden UTF-8 sequences. @@ -44,7 +44,7 @@ class NGHolder; * above \\x{10ffff} or they represent overlong encodings. As we require valid * UTF-8 input, we have no defined behaviour in these cases, as a result we can * accept them if it simplifies the graph. */ -void relaxForbiddenUtf8(NGWrapper &w); +void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr); /** \brief Contract cycles of UTF-8 code points down to a single cyclic vertex * where possible, based on the assumption that we will always be matching diff --git a/src/nfagraph/ng_vacuous.cpp b/src/nfagraph/ng_vacuous.cpp index 53672a1b..d1123dff 100644 --- a/src/nfagraph/ng_vacuous.cpp +++ b/src/nfagraph/ng_vacuous.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,29 +34,31 @@ #include "grey.h" #include "ng.h" #include "ng_util.h" +#include "compiler/compiler.h" using namespace std; namespace ue2 { static -ReportID getInternalId(ReportManager &rm, const NGWrapper &graph) { - Report ir = rm.getBasicInternalReport(graph); +ReportID getInternalId(ReportManager &rm, const ExpressionInfo &expr) { + Report ir = rm.getBasicInternalReport(expr); // Apply any extended params. - if (graph.min_offset || graph.max_offset != MAX_OFFSET) { - ir.minOffset = graph.min_offset; - ir.maxOffset = graph.max_offset; + if (expr.min_offset || expr.max_offset != MAX_OFFSET) { + ir.minOffset = expr.min_offset; + ir.maxOffset = expr.max_offset; } - assert(!graph.min_length); // should be handled elsewhere. + assert(!expr.min_length); // should be handled elsewhere. return rm.getInternalId(ir); } static -void makeFirehose(BoundaryReports &boundary, ReportManager &rm, NGWrapper &g) { - const ReportID r = getInternalId(rm, g); +void makeFirehose(BoundaryReports &boundary, ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr) { + const ReportID r = getInternalId(rm, expr); boundary.report_at_0_eod.insert(r); boundary.report_at_0.insert(r); @@ -81,8 +83,8 @@ void makeFirehose(BoundaryReports &boundary, ReportManager &rm, NGWrapper &g) { static void makeAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm, - NGWrapper &g) { - boundary.report_at_0.insert(getInternalId(rm, g)); + NGHolder &g, const ExpressionInfo &expr) { + boundary.report_at_0.insert(getInternalId(rm, expr)); remove_edge(g.start, g.accept, g); remove_edge(g.start, g.acceptEod, g); g[g.start].reports.clear(); @@ -90,8 +92,8 @@ void makeAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm, static void makeEndAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm, - NGWrapper &g) { - boundary.report_at_eod.insert(getInternalId(rm, g)); + NGHolder &g, const ExpressionInfo &expr) { + boundary.report_at_eod.insert(getInternalId(rm, expr)); remove_edge(g.startDs, g.acceptEod, g); remove_edge(g.start, g.acceptEod, g); g[g.start].reports.clear(); @@ -100,18 +102,18 @@ void makeEndAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm, static void makeNothingAcceptor(BoundaryReports &boundary, ReportManager &rm, - NGWrapper &g) { - boundary.report_at_0_eod.insert(getInternalId(rm, g)); + NGHolder &g, const ExpressionInfo &expr) { + boundary.report_at_0_eod.insert(getInternalId(rm, expr)); remove_edge(g.start, g.acceptEod, g); g[g.start].reports.clear(); } bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm, - NGWrapper &g) { + NGHolder &g, const ExpressionInfo &expr) { if (edge(g.startDs, g.accept, g).second) { // e.g. '.*'; match "between" every byte DEBUG_PRINTF("graph is firehose\n"); - makeFirehose(boundary, rm, g); + makeFirehose(boundary, rm, g, expr); return true; } @@ -119,19 +121,19 @@ bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm, if (edge(g.start, g.accept, g).second) { DEBUG_PRINTF("creating anchored acceptor\n"); - makeAnchoredAcceptor(boundary, rm, g); + makeAnchoredAcceptor(boundary, rm, g, expr); work_done = true; } if (edge(g.startDs, g.acceptEod, g).second) { DEBUG_PRINTF("creating end-anchored acceptor\n"); - makeEndAnchoredAcceptor(boundary, rm, g); + makeEndAnchoredAcceptor(boundary, rm, g, expr); work_done = true; } if (edge(g.start, g.acceptEod, g).second) { DEBUG_PRINTF("creating nothing acceptor\n"); - makeNothingAcceptor(boundary, rm, g); + makeNothingAcceptor(boundary, rm, g, expr); work_done = true; } diff --git a/src/nfagraph/ng_vacuous.h b/src/nfagraph/ng_vacuous.h index ebbc9d17..c33cb312 100644 --- a/src/nfagraph/ng_vacuous.h +++ b/src/nfagraph/ng_vacuous.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,12 +36,13 @@ namespace ue2 { struct BoundaryReports; -class NGWrapper; +class ExpressionInfo; +class NGHolder; class ReportManager; // Returns true if a "vacuous" reporter was created. bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm, - NGWrapper &graph); + NGHolder &g, const ExpressionInfo &expr); } // namespace ue2 diff --git a/src/parser/shortcut_literal.cpp b/src/parser/shortcut_literal.cpp index a7aa5d06..4539836a 100644 --- a/src/parser/shortcut_literal.cpp +++ b/src/parser/shortcut_literal.cpp @@ -159,13 +159,15 @@ public: ConstructLiteralVisitor::~ConstructLiteralVisitor() {} /** \brief True if the literal expression \a expr could be added to Rose. */ -bool shortcutLiteral(NG &ng, const ParsedExpression &expr) { - assert(expr.component); +bool shortcutLiteral(NG &ng, const ParsedExpression &pe) { + assert(pe.component); if (!ng.cc.grey.allowLiteral) { return false; } + const auto &expr = pe.expr; + // XXX: don't shortcut literals with extended params (yet) if (expr.min_offset || expr.max_offset != MAX_OFFSET || expr.min_length || expr.edit_distance) { @@ -175,8 +177,8 @@ bool shortcutLiteral(NG &ng, const ParsedExpression &expr) { ConstructLiteralVisitor vis; try { - assert(expr.component); - expr.component->accept(vis); + assert(pe.component); + pe.component->accept(vis); assert(vis.repeat_stack.empty()); } catch (const ConstructLiteralVisitor::NotLiteral&) { DEBUG_PRINTF("not a literal\n"); @@ -196,7 +198,8 @@ bool shortcutLiteral(NG &ng, const ParsedExpression &expr) { } DEBUG_PRINTF("constructed literal %s\n", dumpString(lit).c_str()); - return ng.addLiteral(lit, expr.index, expr.id, expr.highlander, expr.som); + return ng.addLiteral(lit, expr.index, expr.report, expr.highlander, + expr.som); } } // namespace ue2 diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index 108bca8a..7d340d79 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,6 +30,7 @@ #include "grey.h" #include "ue2common.h" +#include "compiler/compiler.h" #include "nfa/dfa_min.h" #include "nfa/mcclellancompile.h" #include "nfa/mcclellancompile_util.h" @@ -74,7 +75,7 @@ public: // Construct a runtime implementation. aligned_unique_ptr build(u32 roseQuality) override; - void add(const NGWrapper &w) override; + void add(const NGHolder &g, const ExpressionInfo &expr) override; void add(const ue2_literal &literal, ReportID r) override; set all_reports() const override; @@ -171,26 +172,26 @@ bool pruneOverlong(NGHolder &g, const depth &max_depth, return modified; } -void SmallWriteBuildImpl::add(const NGWrapper &w) { +void SmallWriteBuildImpl::add(const NGHolder &g, const ExpressionInfo &expr) { // If the graph is poisoned (i.e. we can't build a SmallWrite version), // we don't even try. if (poisoned) { return; } - if (w.som || w.min_length || isVacuous(w)) { /* cannot support in smwr */ - poisoned = true; + if (expr.som || expr.min_length || isVacuous(g)) { + poisoned = true; /* cannot support in smwr */ return; } - DEBUG_PRINTF("w=%p\n", &w); + DEBUG_PRINTF("g=%p\n", &g); // make a copy of the graph so that we can modify it for our purposes - unique_ptr h = cloneHolder(w); + unique_ptr h = cloneHolder(g); pruneOverlong(*h, depth(cc.grey.smallWriteLargestBuffer), rm); - reduceGraph(*h, SOM_NONE, w.utf8, cc); + reduceGraph(*h, SOM_NONE, expr.utf8, cc); if (can_never_match(*h)) { DEBUG_PRINTF("graph can never match in small block\n"); diff --git a/src/smallwrite/smallwrite_build.h b/src/smallwrite/smallwrite_build.h index 84c6df3a..3d7f3cb6 100644 --- a/src/smallwrite/smallwrite_build.h +++ b/src/smallwrite/smallwrite_build.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -48,8 +48,9 @@ namespace ue2 { struct CompileContext; struct ue2_literal; -class NGWrapper; -class ReportManager; +class ExpressionInfo; +class NGHolder; +class ReportManager; // Abstract interface intended for callers from elsewhere in the tree, real // underlying implementation is SmallWriteBuildImpl in smwr_build_impl.h. @@ -61,16 +62,16 @@ public: // Construct a runtime implementation. virtual ue2::aligned_unique_ptr build(u32 roseQuality) = 0; - virtual void add(const NGWrapper &w) = 0; + virtual void add(const NGHolder &g, const ExpressionInfo &expr) = 0; virtual void add(const ue2_literal &literal, ReportID r) = 0; virtual std::set all_reports() const = 0; }; // Construct a usable SmallWrite builder. -std::unique_ptr makeSmallWriteBuilder(size_t num_patterns, - const ReportManager &rm, - const CompileContext &cc); +std::unique_ptr +makeSmallWriteBuilder(size_t num_patterns, const ReportManager &rm, + const CompileContext &cc); size_t smwrSize(const SmallWriteEngine *t); diff --git a/src/util/report_manager.cpp b/src/util/report_manager.cpp index 8377ea03..9c72da07 100644 --- a/src/util/report_manager.cpp +++ b/src/util/report_manager.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,9 +29,12 @@ /** \file * \brief ReportManager: tracks Report structures, exhaustion and dedupe keys. */ -#include "grey.h" + #include "report_manager.h" + +#include "grey.h" #include "ue2common.h" +#include "compiler/compiler.h" #include "nfagraph/ng.h" #include "rose/rose_build.h" #include "util/compile_error.h" @@ -201,20 +204,21 @@ void ReportManager::registerExtReport(ReportID id, } } -Report ReportManager::getBasicInternalReport(const NGWrapper &g, s32 adj) { +Report ReportManager::getBasicInternalReport(const ExpressionInfo &expr, + s32 adj) { /* validate that we are not violating highlander constraints, this will * throw a CompileError if so. */ - registerExtReport(g.reportId, - external_report_info(g.highlander, g.expressionIndex)); + registerExtReport(expr.report, + external_report_info(expr.highlander, expr.index)); /* create the internal report */ u32 ekey = INVALID_EKEY; - if (g.highlander) { + if (expr.highlander) { /* all patterns with the same report id share an ekey */ - ekey = getExhaustibleKey(g.reportId); + ekey = getExhaustibleKey(expr.report); } - return makeECallback(g.reportId, adj, ekey); + return makeECallback(expr.report, adj, ekey); } void ReportManager::setProgramOffset(ReportID id, u32 programOffset) { diff --git a/src/util/report_manager.h b/src/util/report_manager.h index 0eed2711..4b62e4b5 100644 --- a/src/util/report_manager.h +++ b/src/util/report_manager.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -47,7 +47,7 @@ namespace ue2 { struct Grey; class RoseBuild; -class NGWrapper; +class ExpressionInfo; struct external_report_info { external_report_info(bool h, u32 fpi) @@ -92,13 +92,13 @@ public: const std::vector &reports() const { return reportIds; } /** - * Get a simple internal report corresponding to the wrapper. An ekey will - * be setup as required. + * Get a simple internal report corresponding to the expression. An ekey + * will be setup if required. * * Note: this function may throw a CompileError if constraints on external * match id are violated (mixed highlander status for example). */ - Report getBasicInternalReport(const NGWrapper &g, s32 adj = 0); + Report getBasicInternalReport(const ExpressionInfo &expr, s32 adj = 0); /** \brief Register an external report and validate that we are not * violating highlander constraints (which will cause an exception to be diff --git a/unit/internal/lbr.cpp b/unit/internal/lbr.cpp index e40bda02..60bf8940 100644 --- a/unit/internal/lbr.cpp +++ b/unit/internal/lbr.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -96,7 +96,8 @@ protected: const CompileContext cc(true, false, target, grey); ReportManager rm(cc.grey); ParsedExpression parsed(0, pattern.c_str(), flags, 0); - unique_ptr g = buildWrapper(rm, cc, parsed); + auto built_expr = buildGraph(rm, cc, parsed); + const auto &g = built_expr.g; ASSERT_TRUE(g != nullptr); clearReports(*g); diff --git a/unit/internal/limex_nfa.cpp b/unit/internal/limex_nfa.cpp index 804fcb1f..333c35f3 100644 --- a/unit/internal/limex_nfa.cpp +++ b/unit/internal/limex_nfa.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -73,7 +73,8 @@ protected: CompileContext cc(false, false, target, Grey()); ReportManager rm(cc.grey); ParsedExpression parsed(0, expr.c_str(), flags, 0); - unique_ptr g = buildWrapper(rm, cc, parsed); + auto built_expr = buildGraph(rm, cc, parsed); + const auto &g = built_expr.g; ASSERT_TRUE(g != nullptr); clearReports(*g); @@ -306,7 +307,8 @@ protected: CompileContext cc(false, false, get_current_target(), Grey()); ReportManager rm(cc.grey); ParsedExpression parsed(0, expr.c_str(), flags, 0); - unique_ptr g = buildWrapper(rm, cc, parsed); + auto built_expr = buildGraph(rm, cc, parsed); + const auto &g = built_expr.g; ASSERT_TRUE(g != nullptr); clearReports(*g); @@ -365,7 +367,8 @@ protected: CompileContext cc(true, false, get_current_target(), Grey()); ParsedExpression parsed(0, expr.c_str(), flags, 0); ReportManager rm(cc.grey); - unique_ptr g = buildWrapper(rm, cc, parsed); + auto built_expr = buildGraph(rm, cc, parsed); + const auto &g = built_expr.g; ASSERT_TRUE(g != nullptr); clearReports(*g); diff --git a/unit/internal/nfagraph_common.h b/unit/internal/nfagraph_common.h index d3aafc99..ca5554c4 100644 --- a/unit/internal/nfagraph_common.h +++ b/unit/internal/nfagraph_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,18 +40,19 @@ namespace ue2 { // Helper function: construct a graph from an expression, flags and context. inline -std::unique_ptr constructGraphWithCC(const std::string &expr, - CompileContext &cc, - unsigned flags) { +std::unique_ptr constructGraphWithCC(const std::string &expr, + CompileContext &cc, + unsigned flags) { ReportManager rm(cc.grey); ParsedExpression parsed(0, expr.c_str(), flags, 0); - return buildWrapper(rm, cc, parsed); + auto built_expr = buildGraph(rm, cc, parsed); + return std::move(built_expr.g); } // Helper function: construct a graph from an expression and its flags. inline -std::unique_ptr constructGraph(const std::string &expr, - unsigned flags) { +std::unique_ptr constructGraph(const std::string &expr, + unsigned flags) { CompileContext cc(false, false, get_current_target(), Grey()); return constructGraphWithCC(expr, cc, flags); } diff --git a/unit/internal/nfagraph_equivalence.cpp b/unit/internal/nfagraph_equivalence.cpp index 8fda9223..73aec1d7 100644 --- a/unit/internal/nfagraph_equivalence.cpp +++ b/unit/internal/nfagraph_equivalence.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -54,7 +54,7 @@ TEST(NFAGraph, RemoveEquivalence1) { // The graph should be merged into: a(b|c) CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("(ab|ac)", cc, 0)); + auto graph(constructGraphWithCC("(ab|ac)", cc, 0)); ASSERT_TRUE(graph != nullptr); NGHolder &g = *graph; g.kind = NFA_SUFFIX; @@ -115,7 +115,7 @@ TEST(NFAGraph, RemoveEquivalence2) { // The graph should be merged into: (b|c)a CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("(ba|ca)", cc, 0)); + auto graph(constructGraphWithCC("(ba|ca)", cc, 0)); ASSERT_TRUE(graph != nullptr); NGHolder &g = *graph; g.kind = NFA_SUFFIX; @@ -176,8 +176,7 @@ TEST(NFAGraph, RemoveEquivalence3) { // The graph should be merged into: a(..)+(X|Y) CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("a(..)+X|a(..)+Y", cc, - HS_FLAG_DOTALL)); + auto graph(constructGraphWithCC("a(..)+X|a(..)+Y", cc, HS_FLAG_DOTALL)); ASSERT_TRUE(graph != nullptr); NGHolder &g = *graph; g.kind = NFA_SUFFIX; @@ -266,8 +265,7 @@ TEST(NFAGraph, RemoveEquivalence4) { // The graph should be merged into: (X|Y)(..)+a CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("X(..)+a|Y(..)+a", cc, - HS_FLAG_DOTALL)); + auto graph(constructGraphWithCC("X(..)+a|Y(..)+a", cc, HS_FLAG_DOTALL)); ASSERT_TRUE(graph != nullptr); NGHolder &g = *graph; g.kind = NFA_SUFFIX; @@ -363,8 +361,7 @@ TEST(NFAGraph, RemoveEquivalence5) { // The graph should be merged into: [^\x00]*[\x00] CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("[^\\x00][^\\x00]*[\\x00]", - cc, 0)); + auto graph(constructGraphWithCC("[^\\x00][^\\x00]*[\\x00]", cc, 0)); ASSERT_TRUE(graph != nullptr); NGHolder &g = *graph; g.kind = NFA_PREFIX; @@ -420,7 +417,7 @@ TEST(NFAGraph, RemoveEquivalence5) { TEST(NFAGraph, RemoveEquivalence6) { // Build a small graph with two redundant vertices: ^(.*|.*)a // The graph should be merged into: a - unique_ptr graph(constructGraph("^(.*|.*)a", HS_FLAG_DOTALL)); + auto graph(constructGraph("^(.*|.*)a", HS_FLAG_DOTALL)); ASSERT_TRUE(graph != nullptr); NGHolder &g = *graph; @@ -458,7 +455,7 @@ TEST(NFAGraph, RemoveEquivalence6) { TEST(NFAGraph, RemoveEquivalence7) { // Build a small graph with no redundant vertices: ^.+a // Make sure we don't merge anything - unique_ptr graph(constructGraph("^.+a", HS_FLAG_DOTALL)); + auto graph(constructGraph("^.+a", HS_FLAG_DOTALL)); ASSERT_TRUE(graph != nullptr); NGHolder &g = *graph; diff --git a/unit/internal/nfagraph_find_matches.cpp b/unit/internal/nfagraph_find_matches.cpp index 92c514d8..cd0cd796 100644 --- a/unit/internal/nfagraph_find_matches.cpp +++ b/unit/internal/nfagraph_find_matches.cpp @@ -208,7 +208,8 @@ TEST_P(MatchesTest, Check) { CompileContext cc(false, false, get_current_target(), Grey()); ReportManager rm(cc.grey); ParsedExpression parsed(0, t.pattern.c_str(), t.flags, 0); - auto g = buildWrapper(rm, cc, parsed); + auto built_expr = buildGraph(rm, cc, parsed); + const auto &g = built_expr.g; bool utf8 = (t.flags & HS_FLAG_UTF8) > 0; set> matches; diff --git a/unit/internal/nfagraph_redundancy.cpp b/unit/internal/nfagraph_redundancy.cpp index be9527fd..c77045e0 100644 --- a/unit/internal/nfagraph_redundancy.cpp +++ b/unit/internal/nfagraph_redundancy.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -53,7 +53,7 @@ TEST(NFAGraph, RemoveRedundancy1) { // The character reachability should be merged into: [ab]c CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("(a|b)c", cc, 0)); + auto graph(constructGraphWithCC("(a|b)c", cc, 0)); ASSERT_TRUE(graph.get() != nullptr); NGHolder &g = *graph; @@ -95,8 +95,7 @@ TEST(NFAGraph, RemoveRedundancy2) { // Build a small graph with a redundant vertex: a.*b?c // The dot-star should swallow the 'b?', leaving a.*c CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("a.*b?c", cc, - HS_FLAG_DOTALL)); + auto graph(constructGraphWithCC("a.*b?c", cc, HS_FLAG_DOTALL)); ASSERT_TRUE(graph.get() != nullptr); NGHolder &g = *graph; @@ -152,8 +151,7 @@ TEST(NFAGraph, RemoveRedundancy2) { TEST(NFAGraph, RemoveRedundancy3) { CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("foobar.*(a|b)?teakettle", - cc, 0)); + auto graph(constructGraphWithCC("foobar.*(a|b)?teakettle", cc, 0)); ASSERT_TRUE(graph.get() != nullptr); unsigned countBefore = num_vertices(*graph); @@ -166,7 +164,7 @@ TEST(NFAGraph, RemoveRedundancy3) { TEST(NFAGraph, RemoveRedundancy4) { CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("foo([A-Z]|a|b|q)", cc, 0)); + auto graph(constructGraphWithCC("foo([A-Z]|a|b|q)", cc, 0)); ASSERT_TRUE(graph.get() != nullptr); unsigned countBefore = num_vertices(*graph); @@ -178,8 +176,7 @@ TEST(NFAGraph, RemoveRedundancy4) { TEST(NFAGraph, RemoveRedundancy5) { CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("[0-9]?badgerbrush", - cc, 0)); + auto graph(constructGraphWithCC("[0-9]?badgerbrush", cc, 0)); ASSERT_TRUE(graph.get() != nullptr); unsigned countBefore = num_vertices(*graph); diff --git a/unit/internal/nfagraph_width.cpp b/unit/internal/nfagraph_width.cpp index 03508ea8..5cfb4c87 100644 --- a/unit/internal/nfagraph_width.cpp +++ b/unit/internal/nfagraph_width.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -79,10 +79,10 @@ INSTANTIATE_TEST_CASE_P(NFAWidth, NFAWidthTest, ValuesIn(widthTests)); TEST_P(NFAWidthTest, Check) { const WidthTest &t = GetParam(); SCOPED_TRACE(testing::Message() << "Pattern: " << t.pattern); - unique_ptr w(constructGraph(t.pattern, 0)); + auto g = constructGraph(t.pattern, 0); - ASSERT_EQ(t.minWidth, findMinWidth(*w)); - ASSERT_EQ(t.maxWidth, findMaxWidth(*w)); + ASSERT_EQ(t.minWidth, findMinWidth(*g)); + ASSERT_EQ(t.maxWidth, findMaxWidth(*g)); } // for google test diff --git a/util/ng_corpus_generator.cpp b/util/ng_corpus_generator.cpp index 9d75a7ad..19ab7edf 100644 --- a/util/ng_corpus_generator.cpp +++ b/util/ng_corpus_generator.cpp @@ -35,6 +35,7 @@ #include "ng_corpus_generator.h" #include "ng_corpus_editor.h" +#include "compiler/compiler.h" #include "nfagraph/ng.h" #include "nfagraph/ng_util.h" #include "ue2common.h" @@ -219,8 +220,9 @@ namespace { /** \brief Concrete implementation */ class CorpusGeneratorImpl : public CorpusGenerator { public: - CorpusGeneratorImpl(const NGWrapper &graph_in, CorpusProperties &props); - ~CorpusGeneratorImpl() {} + CorpusGeneratorImpl(const NGHolder &graph_in, const ExpressionInfo &expr_in, + CorpusProperties &props); + ~CorpusGeneratorImpl() = default; void generateCorpus(vector &data); @@ -237,6 +239,9 @@ private: * bytes in length. */ void addRandom(const min_max &mm, string *out); + /** \brief Info about this expression. */ + const ExpressionInfo &expr; + /** \brief The NFA graph we operate over. */ const NGHolder &graph; @@ -245,12 +250,13 @@ private: CorpusProperties &cProps; }; -CorpusGeneratorImpl::CorpusGeneratorImpl(const NGWrapper &graph_in, +CorpusGeneratorImpl::CorpusGeneratorImpl(const NGHolder &graph_in, + const ExpressionInfo &expr_in, CorpusProperties &props) - : graph(graph_in), cProps(props) { + : expr(expr_in), graph(graph_in), cProps(props) { // if this pattern is to be matched approximately - if (graph_in.edit_distance && !props.editDistance) { - props.editDistance = props.rand(0, graph_in.edit_distance + 1); + if (expr.edit_distance && !props.editDistance) { + props.editDistance = props.rand(0, expr.edit_distance + 1); } } @@ -392,8 +398,9 @@ hit_limit: /** \brief Concrete implementation for UTF-8 */ class CorpusGeneratorUtf8 : public CorpusGenerator { public: - CorpusGeneratorUtf8(const NGWrapper &graph_in, CorpusProperties &props); - ~CorpusGeneratorUtf8() {} + CorpusGeneratorUtf8(const NGHolder &graph_in, const ExpressionInfo &expr_in, + CorpusProperties &props); + ~CorpusGeneratorUtf8() = default; void generateCorpus(vector &data); @@ -410,19 +417,23 @@ private: * length. */ void addRandom(const min_max &mm, vector *out); + /** \brief Info about this expression. */ + const ExpressionInfo &expr; + /** \brief The NFA graph we operate over. */ - const NGWrapper &graph; + const NGHolder &graph; /** \brief Reference to our corpus generator properties object (stores some * state) */ CorpusProperties &cProps; }; -CorpusGeneratorUtf8::CorpusGeneratorUtf8(const NGWrapper &graph_in, +CorpusGeneratorUtf8::CorpusGeneratorUtf8(const NGHolder &graph_in, + const ExpressionInfo &expr_in, CorpusProperties &props) - : graph(graph_in), cProps(props) { + : expr(expr_in), graph(graph_in), cProps(props) { // we do not support Utf8 for approximate matching - if (graph.edit_distance) { + if (expr.edit_distance) { throw CorpusGenerationFailure("UTF-8 for edited patterns is not " "supported."); } @@ -681,11 +692,12 @@ CorpusGenerator::~CorpusGenerator() { } // External entry point -unique_ptr makeCorpusGenerator(const NGWrapper &graph, +unique_ptr makeCorpusGenerator(const NGHolder &graph, + const ExpressionInfo &expr, CorpusProperties &props) { - if (graph.utf8) { - return ue2::make_unique(graph, props); + if (expr.utf8) { + return ue2::make_unique(graph, expr, props); } else { - return ue2::make_unique(graph, props); + return ue2::make_unique(graph, expr, props); } } diff --git a/util/ng_corpus_generator.h b/util/ng_corpus_generator.h index a02721bd..f230a10d 100644 --- a/util/ng_corpus_generator.h +++ b/util/ng_corpus_generator.h @@ -41,7 +41,8 @@ namespace ue2 { -class NGWrapper; +class ExpressionInfo; +class NGHolder; } // namespace ue2 @@ -68,6 +69,7 @@ public: /** \brief Build a concrete impl conforming to the \ref CorpusGenerator * interface. */ std::unique_ptr -makeCorpusGenerator(const ue2::NGWrapper &graph, CorpusProperties &props); +makeCorpusGenerator(const ue2::NGHolder &g, const ue2::ExpressionInfo &expr, + CorpusProperties &props); #endif From ba867ebaff7e43f2ae63c9374be125e3a8e8994b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 21 Mar 2017 10:27:14 +1100 Subject: [PATCH 168/326] ng_calc_components: rework to move graphs Rather than cloning graphs for output, rework calc components so that it moves them. --- src/compiler/compiler.cpp | 5 +-- src/nfagraph/ng.cpp | 23 +++++++----- src/nfagraph/ng.h | 2 +- src/nfagraph/ng_calc_components.cpp | 57 +++++++++++++++-------------- src/nfagraph/ng_calc_components.h | 5 ++- unit/internal/nfagraph_comp.cpp | 11 ++---- 6 files changed, 51 insertions(+), 52 deletions(-) diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index 49ed88f4..47bf514c 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -276,13 +276,12 @@ void addExpression(NG &ng, unsigned index, const char *expression, throw CompileError("Internal error."); } - auto &g = *built_expr.g; - if (!pe.expr.allow_vacuous && matches_everywhere(g)) { + if (!pe.expr.allow_vacuous && matches_everywhere(*built_expr.g)) { throw CompileError("Pattern matches empty buffer; use " "HS_FLAG_ALLOWEMPTY to enable support."); } - if (!ng.addGraph(built_expr.expr, g)) { + if (!ng.addGraph(built_expr.expr, std::move(built_expr.g))) { DEBUG_PRINTF("NFA addGraph failed on ID %u.\n", pe.expr.report); throw CompileError("Error compiling expression."); } diff --git a/src/nfagraph/ng.cpp b/src/nfagraph/ng.cpp index eded7af2..bdd767e9 100644 --- a/src/nfagraph/ng.cpp +++ b/src/nfagraph/ng.cpp @@ -315,7 +315,10 @@ bool processComponents(NG &ng, ExpressionInfo &expr, return false; } -bool NG::addGraph(ExpressionInfo &expr, NGHolder &g) { +bool NG::addGraph(ExpressionInfo &expr, unique_ptr g_ptr) { + assert(g_ptr); + NGHolder &g = *g_ptr; + // remove reports that aren't on vertices connected to accept. clearReports(g); @@ -431,15 +434,16 @@ bool NG::addGraph(ExpressionInfo &expr, NGHolder &g) { return true; } - // Split the graph into a set of connected components. + // Split the graph into a set of connected components and process those. + // Note: this invalidates g_ptr. - deque> g_comp = calcComponents(g); + auto g_comp = calcComponents(std::move(g_ptr)); assert(!g_comp.empty()); if (!som) { - for (u32 i = 0; i < g_comp.size(); i++) { - assert(g_comp[i]); - reformLeadingDots(*g_comp[i]); + for (auto &gc : g_comp) { + assert(gc); + reformLeadingDots(*gc); } recalcComponents(g_comp); @@ -453,12 +457,11 @@ bool NG::addGraph(ExpressionInfo &expr, NGHolder &g) { // have another shot at accepting the graph. if (cc.grey.prefilterReductions && expr.prefilter) { - for (u32 i = 0; i < g_comp.size(); i++) { - if (!g_comp[i]) { + for (auto &gc : g_comp) { + if (!gc) { continue; } - - prefilterReductions(*g_comp[i], cc); + prefilterReductions(*gc, cc); } if (processComponents(*this, expr, g_comp, som)) { diff --git a/src/nfagraph/ng.h b/src/nfagraph/ng.h index 6693773e..dc797acf 100644 --- a/src/nfagraph/ng.h +++ b/src/nfagraph/ng.h @@ -70,7 +70,7 @@ public: /** \brief Consumes a pattern, returns false or throws a CompileError * exception if the graph cannot be consumed. */ - bool addGraph(ExpressionInfo &expr, NGHolder &h); + bool addGraph(ExpressionInfo &expr, std::unique_ptr g_ptr); /** \brief Consumes a graph, cut-down version of addGraph for use by SOM * processing. */ diff --git a/src/nfagraph/ng_calc_components.cpp b/src/nfagraph/ng_calc_components.cpp index da6775e4..ff0d0fe1 100644 --- a/src/nfagraph/ng_calc_components.cpp +++ b/src/nfagraph/ng_calc_components.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -245,31 +245,33 @@ void renumberVertices(NFAUndirectedGraph &ug) { * one or more connected components, adding them to the comps deque. */ static -void splitIntoComponents(const NGHolder &g, deque> &comps, +void splitIntoComponents(unique_ptr g, + deque> &comps, const depth &max_head_depth, const depth &max_tail_depth, bool *shell_comp) { - DEBUG_PRINTF("graph has %zu vertices\n", num_vertices(g)); + DEBUG_PRINTF("graph has %zu vertices\n", num_vertices(*g)); assert(shell_comp); *shell_comp = false; // Compute "shell" head and tail subgraphs. vector depths; - calcDepths(g, depths); - auto head_shell = findHeadShell(g, depths, max_head_depth); - auto tail_shell = findTailShell(g, depths, max_tail_depth); + calcDepths(*g, depths); + auto head_shell = findHeadShell(*g, depths, max_head_depth); + auto tail_shell = findTailShell(*g, depths, max_tail_depth); for (auto v : head_shell) { tail_shell.erase(v); } - if (head_shell.size() + tail_shell.size() + N_SPECIALS >= num_vertices(g)) { + if (head_shell.size() + tail_shell.size() + N_SPECIALS >= + num_vertices(*g)) { DEBUG_PRINTF("all in shell component\n"); - comps.push_back(cloneHolder(g)); + comps.push_back(std::move(g)); *shell_comp = true; return; } - vector shell_edges = findShellEdges(g, head_shell, tail_shell); + vector shell_edges = findShellEdges(*g, head_shell, tail_shell); DEBUG_PRINTF("%zu vertices in head, %zu in tail, %zu shell edges\n", head_shell.size(), tail_shell.size(), shell_edges.size()); @@ -277,7 +279,7 @@ void splitIntoComponents(const NGHolder &g, deque> &comps, NFAUndirectedGraph ug; ue2::unordered_map old2new; - createUnGraph(g, true, true, ug, old2new); + createUnGraph(*g, true, true, ug, old2new); // Construct reverse mapping. ue2::unordered_map new2old; @@ -298,7 +300,7 @@ void splitIntoComponents(const NGHolder &g, deque> &comps, assert(num > 0); if (num == 1 && shell_edges.empty()) { DEBUG_PRINTF("single component\n"); - comps.push_back(cloneHolder(g)); + comps.push_back(std::move(g)); return; } @@ -313,7 +315,7 @@ void splitIntoComponents(const NGHolder &g, deque> &comps, assert(contains(new2old, uv)); NFAVertex v = new2old.at(uv); verts[c].push_back(v); - DEBUG_PRINTF("vertex %zu is in comp %u\n", g[v].index, c); + DEBUG_PRINTF("vertex %zu is in comp %u\n", (*g)[v].index, c); } ue2::unordered_map v_map; // temp map for fillHolder @@ -328,12 +330,12 @@ void splitIntoComponents(const NGHolder &g, deque> &comps, auto gc = ue2::make_unique(); v_map.clear(); - fillHolder(gc.get(), g, vv, &v_map); + fillHolder(gc.get(), *g, vv, &v_map); // Remove shell edges, which will get their own component. for (const auto &e : shell_edges) { - auto cu = v_map.at(source(e, g)); - auto cv = v_map.at(target(e, g)); + auto cu = v_map.at(source(e, *g)); + auto cv = v_map.at(target(e, *g)); assert(edge(cu, cv, *gc).second); remove_edge(cu, cv, *gc); } @@ -352,7 +354,7 @@ void splitIntoComponents(const NGHolder &g, deque> &comps, auto gc = ue2::make_unique(); v_map.clear(); - fillHolder(gc.get(), g, vv, &v_map); + fillHolder(gc.get(), *g, vv, &v_map); pruneUseless(*gc); DEBUG_PRINTF("shell edge component %zu has %zu vertices\n", @@ -374,26 +376,26 @@ void splitIntoComponents(const NGHolder &g, deque> &comps, })); } -deque> calcComponents(const NGHolder &g) { +deque> calcComponents(unique_ptr g) { deque> comps; // For trivial cases, we needn't bother running the full // connected_components algorithm. - if (isAlternationOfClasses(g)) { - comps.push_back(cloneHolder(g)); + if (isAlternationOfClasses(*g)) { + comps.push_back(std::move(g)); return comps; } bool shell_comp = false; - splitIntoComponents(g, comps, MAX_HEAD_SHELL_DEPTH, MAX_TAIL_SHELL_DEPTH, - &shell_comp); + splitIntoComponents(std::move(g), comps, MAX_HEAD_SHELL_DEPTH, + MAX_TAIL_SHELL_DEPTH, &shell_comp); if (shell_comp) { DEBUG_PRINTF("re-running on shell comp\n"); assert(!comps.empty()); - auto sc = move(comps.back()); + auto sc = std::move(comps.back()); comps.pop_back(); - splitIntoComponents(*sc, comps, 0, 0, &shell_comp); + splitIntoComponents(std::move(sc), comps, 0, 0, &shell_comp); } DEBUG_PRINTF("finished; split into %zu components\n", comps.size()); @@ -409,14 +411,13 @@ void recalcComponents(deque> &comps) { } if (isAlternationOfClasses(*gc)) { - out.push_back(move(gc)); + out.push_back(std::move(gc)); continue; } - auto gc_comps = calcComponents(*gc); - for (auto &elem : gc_comps) { - out.push_back(move(elem)); - } + auto gc_comps = calcComponents(std::move(gc)); + out.insert(end(out), std::make_move_iterator(begin(gc_comps)), + std::make_move_iterator(end(gc_comps))); } // Replace comps with our recalculated list. diff --git a/src/nfagraph/ng_calc_components.h b/src/nfagraph/ng_calc_components.h index e68c81fc..0359f4a0 100644 --- a/src/nfagraph/ng_calc_components.h +++ b/src/nfagraph/ng_calc_components.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,7 +42,8 @@ class NGHolder; bool isAlternationOfClasses(const NGHolder &g); -std::deque> calcComponents(const NGHolder &g); +std::deque> +calcComponents(std::unique_ptr g); void recalcComponents(std::deque> &comps); diff --git a/unit/internal/nfagraph_comp.cpp b/unit/internal/nfagraph_comp.cpp index 41af3f0c..8aae9519 100644 --- a/unit/internal/nfagraph_comp.cpp +++ b/unit/internal/nfagraph_comp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,13 +33,8 @@ #include "config.h" #include "gtest/gtest.h" #include "nfagraph_common.h" -#include "grey.h" -#include "hs.h" -#include "compiler/compiler.h" #include "nfagraph/ng.h" -#include "nfagraph/ng_builder.h" #include "nfagraph/ng_calc_components.h" -#include "util/target_info.h" using namespace std; using namespace ue2; @@ -48,7 +43,7 @@ TEST(NFAGraph, CalcComp1) { auto graph = constructGraph("abc|def|ghi", 0); ASSERT_TRUE(graph != nullptr); - deque> comps = calcComponents(*graph); + auto comps = calcComponents(std::move(graph)); ASSERT_EQ(3, comps.size()); } @@ -56,7 +51,7 @@ TEST(NFAGraph, CalcComp2) { auto graph = constructGraph("a|b|c|d|e|f|g|h|i", 0); ASSERT_TRUE(graph != nullptr); - deque> comps = calcComponents(*graph); + auto comps = calcComponents(std::move(graph)); // We should be identifying this as a trivial case and not splitting it. ASSERT_EQ(1, comps.size()); From 560e522457ecb643acef5a55a982e10b365ba620 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 21 Mar 2017 10:58:26 +1100 Subject: [PATCH 169/326] ng_calc_components: add Grey box control --- src/grey.cpp | 2 ++ src/grey.h | 1 + src/nfagraph/ng.cpp | 4 ++-- src/nfagraph/ng_calc_components.cpp | 14 ++++++++++---- src/nfagraph/ng_calc_components.h | 6 ++++-- unit/internal/nfagraph_comp.cpp | 12 +++++++++--- 6 files changed, 28 insertions(+), 11 deletions(-) diff --git a/src/grey.cpp b/src/grey.cpp index ea92fdb5..24140c05 100644 --- a/src/grey.cpp +++ b/src/grey.cpp @@ -42,6 +42,7 @@ namespace ue2 { Grey::Grey(void) : optimiseComponentTree(true), + calcComponents(true), performGraphSimplification(true), prefilterReductions(true), removeEdgeRedundancy(true), @@ -209,6 +210,7 @@ void applyGreyOverrides(Grey *g, const string &s) { } while (0) G_UPDATE(optimiseComponentTree); + G_UPDATE(calcComponents); G_UPDATE(performGraphSimplification); G_UPDATE(prefilterReductions); G_UPDATE(removeEdgeRedundancy); diff --git a/src/grey.h b/src/grey.h index 5fde7b4b..50519418 100644 --- a/src/grey.h +++ b/src/grey.h @@ -41,6 +41,7 @@ struct Grey { bool optimiseComponentTree; + bool calcComponents; bool performGraphSimplification; bool prefilterReductions; bool removeEdgeRedundancy; diff --git a/src/nfagraph/ng.cpp b/src/nfagraph/ng.cpp index bdd767e9..dc74dcee 100644 --- a/src/nfagraph/ng.cpp +++ b/src/nfagraph/ng.cpp @@ -437,7 +437,7 @@ bool NG::addGraph(ExpressionInfo &expr, unique_ptr g_ptr) { // Split the graph into a set of connected components and process those. // Note: this invalidates g_ptr. - auto g_comp = calcComponents(std::move(g_ptr)); + auto g_comp = calcComponents(std::move(g_ptr), cc.grey); assert(!g_comp.empty()); if (!som) { @@ -446,7 +446,7 @@ bool NG::addGraph(ExpressionInfo &expr, unique_ptr g_ptr) { reformLeadingDots(*gc); } - recalcComponents(g_comp); + recalcComponents(g_comp, cc.grey); } if (processComponents(*this, expr, g_comp, som)) { diff --git a/src/nfagraph/ng_calc_components.cpp b/src/nfagraph/ng_calc_components.cpp index ff0d0fe1..2c1dbcdb 100644 --- a/src/nfagraph/ng_calc_components.cpp +++ b/src/nfagraph/ng_calc_components.cpp @@ -55,6 +55,7 @@ #include "ng_prune.h" #include "ng_undirected.h" #include "ng_util.h" +#include "grey.h" #include "ue2common.h" #include "util/graph_range.h" #include "util/make_unique.h" @@ -376,12 +377,13 @@ void splitIntoComponents(unique_ptr g, })); } -deque> calcComponents(unique_ptr g) { +deque> calcComponents(unique_ptr g, + const Grey &grey) { deque> comps; // For trivial cases, we needn't bother running the full // connected_components algorithm. - if (isAlternationOfClasses(*g)) { + if (!grey.calcComponents || isAlternationOfClasses(*g)) { comps.push_back(std::move(g)); return comps; } @@ -402,7 +404,11 @@ deque> calcComponents(unique_ptr g) { return comps; } -void recalcComponents(deque> &comps) { +void recalcComponents(deque> &comps, const Grey &grey) { + if (!grey.calcComponents) { + return; + } + deque> out; for (auto &gc : comps) { @@ -415,7 +421,7 @@ void recalcComponents(deque> &comps) { continue; } - auto gc_comps = calcComponents(std::move(gc)); + auto gc_comps = calcComponents(std::move(gc), grey); out.insert(end(out), std::make_move_iterator(begin(gc_comps)), std::make_move_iterator(end(gc_comps))); } diff --git a/src/nfagraph/ng_calc_components.h b/src/nfagraph/ng_calc_components.h index 0359f4a0..1bcdc5f8 100644 --- a/src/nfagraph/ng_calc_components.h +++ b/src/nfagraph/ng_calc_components.h @@ -39,13 +39,15 @@ namespace ue2 { class NGHolder; +struct Grey; bool isAlternationOfClasses(const NGHolder &g); std::deque> -calcComponents(std::unique_ptr g); +calcComponents(std::unique_ptr g, const Grey &grey); -void recalcComponents(std::deque> &comps); +void recalcComponents(std::deque> &comps, + const Grey &grey); } // namespace ue2 diff --git a/unit/internal/nfagraph_comp.cpp b/unit/internal/nfagraph_comp.cpp index 8aae9519..61b05a46 100644 --- a/unit/internal/nfagraph_comp.cpp +++ b/unit/internal/nfagraph_comp.cpp @@ -43,7 +43,9 @@ TEST(NFAGraph, CalcComp1) { auto graph = constructGraph("abc|def|ghi", 0); ASSERT_TRUE(graph != nullptr); - auto comps = calcComponents(std::move(graph)); + Grey grey; + grey.calcComponents = true; + auto comps = calcComponents(std::move(graph), grey); ASSERT_EQ(3, comps.size()); } @@ -51,7 +53,9 @@ TEST(NFAGraph, CalcComp2) { auto graph = constructGraph("a|b|c|d|e|f|g|h|i", 0); ASSERT_TRUE(graph != nullptr); - auto comps = calcComponents(std::move(graph)); + Grey grey; + grey.calcComponents = true; + auto comps = calcComponents(std::move(graph), grey); // We should be identifying this as a trivial case and not splitting it. ASSERT_EQ(1, comps.size()); @@ -62,7 +66,9 @@ TEST(NFAGraph, RecalcComp1) { comps.push_back(constructGraph("abc|def|ghi", 0)); ASSERT_TRUE(comps.back() != nullptr); - recalcComponents(comps); + Grey grey; + grey.calcComponents = true; + recalcComponents(comps, grey); ASSERT_EQ(3, comps.size()); } From 9724f8c3cc92282fd9576c6eab38d449aecc991d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 21 Mar 2017 12:31:08 +1100 Subject: [PATCH 170/326] ng_undirected: modernize code --- src/nfagraph/ng_calc_components.cpp | 4 +- src/nfagraph/ng_repeat.cpp | 8 ++-- src/nfagraph/ng_undirected.h | 59 ++++++++++++++++------------- 3 files changed, 36 insertions(+), 35 deletions(-) diff --git a/src/nfagraph/ng_calc_components.cpp b/src/nfagraph/ng_calc_components.cpp index 2c1dbcdb..a7d8dd69 100644 --- a/src/nfagraph/ng_calc_components.cpp +++ b/src/nfagraph/ng_calc_components.cpp @@ -277,10 +277,8 @@ void splitIntoComponents(unique_ptr g, DEBUG_PRINTF("%zu vertices in head, %zu in tail, %zu shell edges\n", head_shell.size(), tail_shell.size(), shell_edges.size()); - NFAUndirectedGraph ug; ue2::unordered_map old2new; - - createUnGraph(*g, true, true, ug, old2new); + auto ug = createUnGraph(*g, true, true, old2new); // Construct reverse mapping. ue2::unordered_map new2old; diff --git a/src/nfagraph/ng_repeat.cpp b/src/nfagraph/ng_repeat.cpp index a16e2715..96e3266f 100644 --- a/src/nfagraph/ng_repeat.cpp +++ b/src/nfagraph/ng_repeat.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -296,9 +296,8 @@ void splitSubgraph(const NGHolder &g, const deque &verts, ue2::unordered_map verts_map; // in g -> in verts_g fillHolder(&verts_g, g, verts, &verts_map); - NFAUndirectedGraph ug; ue2::unordered_map old2new; - createUnGraph(verts_g, true, true, ug, old2new); + auto ug = createUnGraph(verts_g, true, true, old2new); ue2::unordered_map repeatMap; @@ -1020,9 +1019,8 @@ void buildReachSubgraphs(const NGHolder &g, vector &rs, return; } - NFAUndirectedGraph ug; unordered_map old2new; - createUnGraph(rg, true, true, ug, old2new); + auto ug = createUnGraph(rg, true, true, old2new); unordered_map repeatMap; diff --git a/src/nfagraph/ng_undirected.h b/src/nfagraph/ng_undirected.h index 7df6c7dc..b9fbd58d 100644 --- a/src/nfagraph/ng_undirected.h +++ b/src/nfagraph/ng_undirected.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,8 +30,8 @@ * \brief Create an undirected graph from an NFAGraph. */ -#ifndef NG_UNDIRECTED_H_CB42C71CF38E3D -#define NG_UNDIRECTED_H_CB42C71CF38E3D +#ifndef NG_UNDIRECTED_H +#define NG_UNDIRECTED_H #include "ng_holder.h" #include "ng_util.h" @@ -52,13 +52,13 @@ namespace ue2 { * of parallel edges. The only vertex property constructed is \a * vertex_index_t. */ -typedef boost::adjacency_list > -NFAUndirectedGraph; +using NFAUndirectedGraph = + boost::adjacency_list>; -typedef NFAUndirectedGraph::vertex_descriptor NFAUndirectedVertex; +using NFAUndirectedVertex = NFAUndirectedGraph::vertex_descriptor; /** * Make a copy of an NFAGraph with undirected edges, optionally without start @@ -67,15 +67,17 @@ typedef NFAUndirectedGraph::vertex_descriptor NFAUndirectedVertex; * Note that new vertex indices are assigned contiguously in \a vertices(g) * order. */ -template -void createUnGraph(const GraphT &g, +template +NFAUndirectedGraph createUnGraph(const Graph &g, bool excludeStarts, bool excludeAccepts, - NFAUndirectedGraph &ug, - ue2::unordered_map &old2new) { + unordered_map &old2new) { + NFAUndirectedGraph ug; size_t idx = 0; - typedef typename GraphT::vertex_descriptor VertexT; + + assert(old2new.empty()); + old2new.reserve(num_vertices(g)); for (auto v : ue2::vertices_range(g)) { // skip all accept nodes @@ -88,32 +90,35 @@ void createUnGraph(const GraphT &g, continue; } - NFAUndirectedVertex nuv = boost::add_vertex(ug); - old2new[v] = nuv; + auto nuv = boost::add_vertex(ug); + old2new.emplace(v, nuv); boost::put(boost::vertex_index, ug, nuv, idx++); } for (const auto &e : ue2::edges_range(g)) { - VertexT src = source(e, g); - VertexT targ = target(e, g); + auto u = source(e, g); + auto v = target(e, g); - if ((excludeAccepts && is_any_accept(src, g)) - || (excludeStarts && is_any_start(src, g))) { + if ((excludeAccepts && is_any_accept(u, g)) + || (excludeStarts && is_any_start(u, g))) { continue; } - if ((excludeAccepts && is_any_accept(targ, g)) - || (excludeStarts && is_any_start(targ, g))) { + if ((excludeAccepts && is_any_accept(v, g)) + || (excludeStarts && is_any_start(v, g))) { continue; } - NFAUndirectedVertex new_src = old2new[src]; - NFAUndirectedVertex new_targ = old2new[targ]; + NFAUndirectedVertex new_u = old2new.at(u); + NFAUndirectedVertex new_v = old2new.at(v); - boost::add_edge(new_src, new_targ, ug); + boost::add_edge(new_u, new_v, ug); } + + assert(!has_parallel_edge(ug)); + return ug; } } // namespace ue2 -#endif /* NG_UNDIRECTED_H_CB42C71CF38E3D */ +#endif /* NG_UNDIRECTED_H */ From 4b3ff085d350f702e7ff5ea11ac4afb6c1917755 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 21 Mar 2017 12:37:52 +1100 Subject: [PATCH 171/326] ng_undirected: avoid parallel edge at construction (Rather than using setS for out edges) --- src/nfagraph/ng_undirected.h | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/nfagraph/ng_undirected.h b/src/nfagraph/ng_undirected.h index b9fbd58d..1e27ad79 100644 --- a/src/nfagraph/ng_undirected.h +++ b/src/nfagraph/ng_undirected.h @@ -52,11 +52,11 @@ namespace ue2 { * of parallel edges. The only vertex property constructed is \a * vertex_index_t. */ -using NFAUndirectedGraph = - boost::adjacency_list>; +using NFAUndirectedGraph = boost::adjacency_list< + boost::listS, // out edges + boost::listS, // vertices + boost::undirectedS, // graph is undirected + boost::property>; // vertex properties using NFAUndirectedVertex = NFAUndirectedGraph::vertex_descriptor; @@ -95,6 +95,14 @@ NFAUndirectedGraph createUnGraph(const Graph &g, boost::put(boost::vertex_index, ug, nuv, idx++); } + // Track seen edges so that we don't insert parallel edges. + using Vertex = typename Graph::vertex_descriptor; + unordered_set> seen; + seen.reserve(num_edges(g)); + auto make_ordered_edge = [](Vertex a, Vertex b) { + return std::make_pair(std::min(a, b), std::max(a, b)); + }; + for (const auto &e : ue2::edges_range(g)) { auto u = source(e, g); auto v = target(e, g); @@ -109,6 +117,10 @@ NFAUndirectedGraph createUnGraph(const Graph &g, continue; } + if (!seen.emplace(make_ordered_edge(u, v)).second) { + continue; // skip parallel edge. + } + NFAUndirectedVertex new_u = old2new.at(u); NFAUndirectedVertex new_v = old2new.at(v); From 546091f81933ca12cd28e4b15cbd3db5ee66ed0a Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 21 Mar 2017 13:09:53 +1100 Subject: [PATCH 172/326] ng_calc_components: filter vertices from ug --- src/nfagraph/ng_calc_components.cpp | 41 +++++++++-------------------- src/nfagraph/ng_util.h | 16 +++++++++++ 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/src/nfagraph/ng_calc_components.cpp b/src/nfagraph/ng_calc_components.cpp index a7d8dd69..e0689366 100644 --- a/src/nfagraph/ng_calc_components.cpp +++ b/src/nfagraph/ng_calc_components.cpp @@ -64,6 +64,7 @@ #include #include +#include using namespace std; @@ -219,28 +220,6 @@ vector findShellEdges(const NGHolder &g, return shell_edges; } -static -void removeVertices(const flat_set &verts, NFAUndirectedGraph &ug, - ue2::unordered_map &old2new, - ue2::unordered_map &new2old) { - for (auto v : verts) { - assert(contains(old2new, v)); - auto uv = old2new.at(v); - clear_vertex(uv, ug); - remove_vertex(uv, ug); - old2new.erase(v); - new2old.erase(uv); - } -} - -static -void renumberVertices(NFAUndirectedGraph &ug) { - u32 vertexIndex = 0; - for (auto uv : vertices_range(ug)) { - put(boost::vertex_index, ug, uv, vertexIndex++); - } -} - /** * Common code called by calc- and recalc- below. Splits the given holder into * one or more connected components, adding them to the comps deque. @@ -286,15 +265,21 @@ void splitIntoComponents(unique_ptr g, new2old.emplace(m.second, m.first); } - // Remove shells from undirected graph and renumber so we have dense - // vertex indices. - removeVertices(head_shell, ug, old2new, new2old); - removeVertices(tail_shell, ug, old2new, new2old); - renumberVertices(ug); + // Filter shell vertices from undirected graph. + unordered_set shell_undir_vertices; + for (auto v : head_shell) { + shell_undir_vertices.insert(old2new.at(v)); + } + for (auto v : tail_shell) { + shell_undir_vertices.insert(old2new.at(v)); + } + auto filtered_ug = boost::make_filtered_graph( + ug, boost::keep_all(), make_bad_vertex_filter(&shell_undir_vertices)); + // Actually run the connected components algorithm. map split_components; const u32 num = connected_components( - ug, boost::make_assoc_property_map(split_components)); + filtered_ug, boost::make_assoc_property_map(split_components)); assert(num > 0); if (num == 1 && shell_edges.empty()) { diff --git a/src/nfagraph/ng_util.h b/src/nfagraph/ng_util.h index f3fa1354..1d3a6f32 100644 --- a/src/nfagraph/ng_util.h +++ b/src/nfagraph/ng_util.h @@ -124,6 +124,22 @@ bad_edge_filter make_bad_edge_filter(const EdgeSet *e) { return bad_edge_filter(e); } +/** \brief vertex graph filter. */ +template +struct bad_vertex_filter { + bad_vertex_filter() = default; + explicit bad_vertex_filter(const VertexSet *bad_v) : bad_vertices(bad_v) {} + bool operator()(const typename VertexSet::value_type &v) const { + return !contains(*bad_vertices, v); /* keep vertices not in bad set */ + } + const VertexSet *bad_vertices = nullptr; +}; + +template +bad_vertex_filter make_bad_vertex_filter(const VertexSet *v) { + return bad_vertex_filter(v); +} + /** Visitor that records back edges */ template class BackEdges : public boost::default_dfs_visitor { From 8823a8fbfdaeff4a9e2d7f17d2f5063391b9013b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 14 Mar 2017 18:43:42 +1100 Subject: [PATCH 173/326] ng_region: use flat_sets in exit_info --- src/nfagraph/ng_region.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/nfagraph/ng_region.cpp b/src/nfagraph/ng_region.cpp index 0ecd7bd6..39c0f683 100644 --- a/src/nfagraph/ng_region.cpp +++ b/src/nfagraph/ng_region.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -79,7 +79,7 @@ struct exit_info { explicit exit_info(NFAVertex v) : exit(v) {} NFAVertex exit; - ue2::unordered_set open; + flat_set open; }; } @@ -88,7 +88,7 @@ void checkAndAddExitCandidate(const AcyclicGraph &g, const ue2::unordered_set &r, NFAVertex v, vector *exits) { // set when we find our first candidate. - ue2::unordered_set *open = nullptr; + decltype(exit_info::open) *open = nullptr; /* find the set of vertices reachable from v which are not in r */ for (auto w : adjacent_vertices_range(v, g)) { @@ -136,7 +136,7 @@ void refineExits(const AcyclicGraph &g, const ue2::unordered_set &r, */ static bool exitValid(UNUSED const AcyclicGraph &g, const vector &exits, - const ue2::unordered_set &open_jumps) { + const flat_set &open_jumps) { if (exits.empty() || (exits.size() < 2 && open_jumps.empty())) { return true; } @@ -180,7 +180,7 @@ void buildInitialCandidate(const AcyclicGraph &g, /* in exits of prev region; * out exits from candidate */ vector *exits, - ue2::unordered_set *open_jumps) { + flat_set *open_jumps) { if (it == ite) { candidate->clear(); exits->clear(); @@ -198,7 +198,7 @@ void buildInitialCandidate(const AcyclicGraph &g, return; } - ue2::unordered_set enters = (*exits)[0].open; + auto enters = (*exits)[0].open; // copy candidate->clear(); for (; it != ite; ++it) { @@ -211,7 +211,7 @@ void buildInitialCandidate(const AcyclicGraph &g, if (it != ite) { enters.erase(*it); - open_jumps->swap(enters); + *open_jumps = move(enters); DEBUG_PRINTF("oj size = %zu\n", open_jumps->size()); ++it; } else { @@ -230,7 +230,7 @@ void findDagLeaders(const NGHolder &h, const AcyclicGraph &g, vector::const_reverse_iterator t_it = topo.rbegin(); vector exits; ue2::unordered_set candidate; - ue2::unordered_set open_jumps; + flat_set open_jumps; DEBUG_PRINTF("adding %zu to current\n", g[*t_it].index); assert(t_it != topo.rend()); candidate.insert(*t_it++); From 7396c939907e34fb3f650f953c5294c7512f2825 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 15 Mar 2017 09:29:44 +1100 Subject: [PATCH 174/326] ng_region: clean up refineExits --- src/nfagraph/ng_region.cpp | 44 +++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/src/nfagraph/ng_region.cpp b/src/nfagraph/ng_region.cpp index 39c0f683..8adecfcf 100644 --- a/src/nfagraph/ng_region.cpp +++ b/src/nfagraph/ng_region.cpp @@ -86,7 +86,7 @@ struct exit_info { static void checkAndAddExitCandidate(const AcyclicGraph &g, const ue2::unordered_set &r, - NFAVertex v, vector *exits) { + NFAVertex v, vector &exits) { // set when we find our first candidate. decltype(exit_info::open) *open = nullptr; @@ -94,8 +94,8 @@ void checkAndAddExitCandidate(const AcyclicGraph &g, for (auto w : adjacent_vertices_range(v, g)) { if (!contains(r, NFAVertex(w))) { if (!open) { - exits->push_back(exit_info(NFAVertex(v))); - open = &exits->back().open; + exits.emplace_back(NFAVertex(v)); + open = &exits.back().open; } open->insert(NFAVertex(w)); } @@ -107,28 +107,31 @@ void checkAndAddExitCandidate(const AcyclicGraph &g, } static -void findExits(const AcyclicGraph &g, const ue2::unordered_set &r, - vector *exits) { - exits->clear(); +vector findExits(const AcyclicGraph &g, + const ue2::unordered_set &r) { + vector exits; for (auto v : r) { checkAndAddExitCandidate(g, r, v, exits); } + + return exits; } static void refineExits(const AcyclicGraph &g, const ue2::unordered_set &r, - NFAVertex new_v, vector *exits) { - for (u32 i = 0; i < exits->size(); i++) { - (*exits)[i].open.erase(new_v); /* new_v is no long an open edge */ - if ((*exits)[i].open.empty()) { /* no open edges: no longer an exit */ - /* shuffle to back and kill */ - (*exits)[i] = exits->back(); - exits->pop_back(); - i--; - } + NFAVertex new_v, vector &exits) { + /* new_v is no long an open edge */ + for (auto &exit : exits) { + exit.open.erase(new_v); } + /* no open edges: no longer an exit */ + exits.erase( + remove_if(exits.begin(), exits.end(), + [&](const exit_info &exit) { return exit.open.empty(); }), + exits.end()); + checkAndAddExitCandidate(g, r, new_v, exits); } @@ -193,7 +196,7 @@ void buildInitialCandidate(const AcyclicGraph &g, DEBUG_PRINTF("adding %zu to initial\n", g[*it].index); candidate->insert(*it); open_jumps->erase(*it); - checkAndAddExitCandidate(g, *candidate, *it, exits); + checkAndAddExitCandidate(g, *candidate, *it, *exits); ++it; return; } @@ -218,7 +221,7 @@ void buildInitialCandidate(const AcyclicGraph &g, open_jumps->clear(); } - findExits(g, *candidate, exits); + *exits = findExits(g, *candidate); } static @@ -228,7 +231,6 @@ void findDagLeaders(const NGHolder &h, const AcyclicGraph &g, assert(!topo.empty()); u32 curr_id = 0; vector::const_reverse_iterator t_it = topo.rbegin(); - vector exits; ue2::unordered_set candidate; flat_set open_jumps; DEBUG_PRINTF("adding %zu to current\n", g[*t_it].index); @@ -237,7 +239,8 @@ void findDagLeaders(const NGHolder &h, const AcyclicGraph &g, DEBUG_PRINTF("adding %zu to current\n", g[*t_it].index); assert(t_it != topo.rend()); candidate.insert(*t_it++); - findExits(g, candidate, &exits); + + auto exits = findExits(g, candidate); while (t_it != topo.rend()) { assert(!candidate.empty()); @@ -260,7 +263,7 @@ void findDagLeaders(const NGHolder &h, const AcyclicGraph &g, DEBUG_PRINTF("adding %zu to current\n", g[curr].index); candidate.insert(curr); open_jumps.erase(curr); - refineExits(g, candidate, *t_it, &exits); + refineExits(g, candidate, *t_it, exits); DEBUG_PRINTF(" open jumps %zu exits %zu\n", open_jumps.size(), exits.size()); ++t_it; @@ -416,6 +419,7 @@ vector buildTopoOrder(const NGHolder &w, const AcyclicGraph &acyclic_g, vector &colours) { vector topoOrder; + topoOrder.reserve(num_vertices(w)); topological_sort(acyclic_g, back_inserter(topoOrder), color_map(make_iterator_property_map(colours.begin(), From d63fdcd860713243489317bed54e2d7e2e7afb51 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 15 Mar 2017 09:37:21 +1100 Subject: [PATCH 175/326] ng_region: simplify checkAndAddExitCandidate --- src/nfagraph/ng_region.cpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/nfagraph/ng_region.cpp b/src/nfagraph/ng_region.cpp index 8adecfcf..39a8a504 100644 --- a/src/nfagraph/ng_region.cpp +++ b/src/nfagraph/ng_region.cpp @@ -87,22 +87,19 @@ static void checkAndAddExitCandidate(const AcyclicGraph &g, const ue2::unordered_set &r, NFAVertex v, vector &exits) { - // set when we find our first candidate. - decltype(exit_info::open) *open = nullptr; + exit_info v_exit(v); + auto &open = v_exit.open; /* find the set of vertices reachable from v which are not in r */ for (auto w : adjacent_vertices_range(v, g)) { - if (!contains(r, NFAVertex(w))) { - if (!open) { - exits.emplace_back(NFAVertex(v)); - open = &exits.back().open; - } - open->insert(NFAVertex(w)); + if (!contains(r, w)) { + open.insert(w); } } - if (open) { + if (!open.empty()) { DEBUG_PRINTF("exit %zu\n", g[v].index); + exits.push_back(move(v_exit)); } } From ca22edc9d36298dc83edc70720be1b15a05367a7 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 21 Mar 2017 17:19:56 +1100 Subject: [PATCH 176/326] ng_region: don't realloc exits --- src/nfagraph/ng_region.cpp | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/nfagraph/ng_region.cpp b/src/nfagraph/ng_region.cpp index 39a8a504..2391082a 100644 --- a/src/nfagraph/ng_region.cpp +++ b/src/nfagraph/ng_region.cpp @@ -104,15 +104,12 @@ void checkAndAddExitCandidate(const AcyclicGraph &g, } static -vector findExits(const AcyclicGraph &g, - const ue2::unordered_set &r) { - vector exits; - +void findExits(const AcyclicGraph &g, const ue2::unordered_set &r, + vector &exits) { + exits.clear(); for (auto v : r) { checkAndAddExitCandidate(g, r, v, exits); } - - return exits; } static @@ -179,26 +176,26 @@ void buildInitialCandidate(const AcyclicGraph &g, ue2::unordered_set *candidate, /* in exits of prev region; * out exits from candidate */ - vector *exits, + vector &exits, flat_set *open_jumps) { if (it == ite) { candidate->clear(); - exits->clear(); + exits.clear(); return; } - if (exits->empty()) { + if (exits.empty()) { DEBUG_PRINTF("odd\n"); candidate->clear(); DEBUG_PRINTF("adding %zu to initial\n", g[*it].index); candidate->insert(*it); open_jumps->erase(*it); - checkAndAddExitCandidate(g, *candidate, *it, *exits); + checkAndAddExitCandidate(g, *candidate, *it, exits); ++it; return; } - auto enters = (*exits)[0].open; // copy + auto enters = exits.front().open; // copy candidate->clear(); for (; it != ite; ++it) { @@ -218,7 +215,7 @@ void buildInitialCandidate(const AcyclicGraph &g, open_jumps->clear(); } - *exits = findExits(g, *candidate); + findExits(g, *candidate, exits); } static @@ -237,7 +234,8 @@ void findDagLeaders(const NGHolder &h, const AcyclicGraph &g, assert(t_it != topo.rend()); candidate.insert(*t_it++); - auto exits = findExits(g, candidate); + vector exits; + findExits(g, candidate, exits); while (t_it != topo.rend()) { assert(!candidate.empty()); @@ -253,7 +251,7 @@ void findDagLeaders(const NGHolder &h, const AcyclicGraph &g, DEBUG_PRINTF("setting region %u\n", curr_id); } setRegion(candidate, curr_id++, regions); - buildInitialCandidate(g, t_it, topo.rend(), &candidate, &exits, + buildInitialCandidate(g, t_it, topo.rend(), &candidate, exits, &open_jumps); } else { NFAVertex curr = *t_it; From 699ab4190a50a396ef07ef66d1c4e2a8c0c69ffc Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 22 Mar 2017 09:43:50 +1100 Subject: [PATCH 177/326] ng_region: clean up and modernise --- src/nfagraph/ng_region.cpp | 69 +++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 35 deletions(-) diff --git a/src/nfagraph/ng_region.cpp b/src/nfagraph/ng_region.cpp index 2391082a..6fef9aa9 100644 --- a/src/nfagraph/ng_region.cpp +++ b/src/nfagraph/ng_region.cpp @@ -70,9 +70,9 @@ using namespace std; namespace ue2 { -typedef ue2::unordered_set BackEdgeSet; -typedef boost::filtered_graph> - AcyclicGraph; +using BackEdgeSet = unordered_set; +using AcyclicGraph = + boost::filtered_graph>; namespace { struct exit_info { @@ -85,8 +85,8 @@ struct exit_info { static void checkAndAddExitCandidate(const AcyclicGraph &g, - const ue2::unordered_set &r, - NFAVertex v, vector &exits) { + const unordered_set &r, NFAVertex v, + vector &exits) { exit_info v_exit(v); auto &open = v_exit.open; @@ -104,7 +104,7 @@ void checkAndAddExitCandidate(const AcyclicGraph &g, } static -void findExits(const AcyclicGraph &g, const ue2::unordered_set &r, +void findExits(const AcyclicGraph &g, const unordered_set &r, vector &exits) { exits.clear(); for (auto v : r) { @@ -113,7 +113,7 @@ void findExits(const AcyclicGraph &g, const ue2::unordered_set &r, } static -void refineExits(const AcyclicGraph &g, const ue2::unordered_set &r, +void refineExits(const AcyclicGraph &g, const unordered_set &r, NFAVertex new_v, vector &exits) { /* new_v is no long an open edge */ for (auto &exit : exits) { @@ -121,10 +121,9 @@ void refineExits(const AcyclicGraph &g, const ue2::unordered_set &r, } /* no open edges: no longer an exit */ - exits.erase( - remove_if(exits.begin(), exits.end(), + exits.erase(remove_if(exits.begin(), exits.end(), [&](const exit_info &exit) { return exit.open.empty(); }), - exits.end()); + exits.end()); checkAndAddExitCandidate(g, r, new_v, exits); } @@ -162,8 +161,8 @@ bool exitValid(UNUSED const AcyclicGraph &g, const vector &exits, } static -void setRegion(const ue2::unordered_set &r, u32 rid, - ue2::unordered_map ®ions) { +void setRegion(const unordered_set &r, u32 rid, + unordered_map ®ions) { for (auto v : r) { regions[v] = rid; } @@ -173,34 +172,34 @@ static void buildInitialCandidate(const AcyclicGraph &g, vector::const_reverse_iterator &it, const vector::const_reverse_iterator &ite, - ue2::unordered_set *candidate, + unordered_set &candidate, /* in exits of prev region; * out exits from candidate */ vector &exits, - flat_set *open_jumps) { + flat_set &open_jumps) { if (it == ite) { - candidate->clear(); + candidate.clear(); exits.clear(); return; } if (exits.empty()) { DEBUG_PRINTF("odd\n"); - candidate->clear(); + candidate.clear(); DEBUG_PRINTF("adding %zu to initial\n", g[*it].index); - candidate->insert(*it); - open_jumps->erase(*it); - checkAndAddExitCandidate(g, *candidate, *it, exits); + candidate.insert(*it); + open_jumps.erase(*it); + checkAndAddExitCandidate(g, candidate, *it, exits); ++it; return; } auto enters = exits.front().open; // copy - candidate->clear(); + candidate.clear(); for (; it != ite; ++it) { DEBUG_PRINTF("adding %zu to initial\n", g[*it].index); - candidate->insert(*it); + candidate.insert(*it); if (contains(enters, *it)) { break; } @@ -208,24 +207,24 @@ void buildInitialCandidate(const AcyclicGraph &g, if (it != ite) { enters.erase(*it); - *open_jumps = move(enters); - DEBUG_PRINTF("oj size = %zu\n", open_jumps->size()); + open_jumps = move(enters); + DEBUG_PRINTF("oj size = %zu\n", open_jumps.size()); ++it; } else { - open_jumps->clear(); + open_jumps.clear(); } - findExits(g, *candidate, exits); + findExits(g, candidate, exits); } static void findDagLeaders(const NGHolder &h, const AcyclicGraph &g, const vector &topo, - ue2::unordered_map ®ions) { + unordered_map ®ions) { assert(!topo.empty()); u32 curr_id = 0; - vector::const_reverse_iterator t_it = topo.rbegin(); - ue2::unordered_set candidate; + auto t_it = topo.rbegin(); + unordered_set candidate; flat_set open_jumps; DEBUG_PRINTF("adding %zu to current\n", g[*t_it].index); assert(t_it != topo.rend()); @@ -251,8 +250,8 @@ void findDagLeaders(const NGHolder &h, const AcyclicGraph &g, DEBUG_PRINTF("setting region %u\n", curr_id); } setRegion(candidate, curr_id++, regions); - buildInitialCandidate(g, t_it, topo.rend(), &candidate, exits, - &open_jumps); + buildInitialCandidate(g, t_it, topo.rend(), candidate, exits, + open_jumps); } else { NFAVertex curr = *t_it; DEBUG_PRINTF("adding %zu to current\n", g[curr].index); @@ -271,7 +270,7 @@ void findDagLeaders(const NGHolder &h, const AcyclicGraph &g, static void mergeUnderBackEdges(const NGHolder &g, const vector &topo, const BackEdgeSet &backEdges, - ue2::unordered_map ®ions) { + unordered_map ®ions) { for (const auto &e : backEdges) { NFAVertex u = source(e, g); NFAVertex v = target(e, g); @@ -341,7 +340,7 @@ void reorderSpecials(const NGHolder &w, const AcyclicGraph &acyclic_g, static void liftSinks(const AcyclicGraph &acyclic_g, vector &topoOrder) { - ue2::unordered_set sinks; + unordered_set sinks; for (auto v : vertices_range(acyclic_g)) { if (is_special(v, acyclic_g)) { continue; @@ -386,7 +385,7 @@ void liftSinks(const AcyclicGraph &acyclic_g, vector &topoOrder) { } NFAVertex s = *ri; DEBUG_PRINTF("handling sink %zu\n", acyclic_g[s].index); - ue2::unordered_set parents; + unordered_set parents; for (const auto &e : in_edges_range(s, acyclic_g)) { parents.insert(NFAVertex(source(e, acyclic_g))); } @@ -437,7 +436,7 @@ vector buildTopoOrder(const NGHolder &w, return topoOrder; } -ue2::unordered_map assignRegions(const NGHolder &g) { +unordered_map assignRegions(const NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); const u32 numVertices = num_vertices(g); DEBUG_PRINTF("assigning regions for %u vertices in holder\n", numVertices); @@ -459,7 +458,7 @@ ue2::unordered_map assignRegions(const NGHolder &g) { vector topoOrder = buildTopoOrder(g, acyclic_g, colours); // Everybody starts in region 0. - ue2::unordered_map regions; + unordered_map regions; regions.reserve(numVertices); for (auto v : vertices_range(g)) { regions.emplace(v, 0); From 1200f33116ae393d199ea318043951bbceea64b9 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 22 Mar 2017 10:56:59 +1100 Subject: [PATCH 178/326] ng_region: no need to copy enters --- src/nfagraph/ng_region.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/nfagraph/ng_region.cpp b/src/nfagraph/ng_region.cpp index 6fef9aa9..91904b46 100644 --- a/src/nfagraph/ng_region.cpp +++ b/src/nfagraph/ng_region.cpp @@ -194,7 +194,9 @@ void buildInitialCandidate(const AcyclicGraph &g, return; } - auto enters = exits.front().open; // copy + // Note: findExits() will clear exits, so it's safe to mutate/move its + // elements here. + auto &enters = exits.front().open; candidate.clear(); for (; it != ite; ++it) { From 0cbec2c1c3b67001b30426ec46ddbfd993d53d16 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 22 Mar 2017 12:43:42 +1100 Subject: [PATCH 179/326] mmbBuildInitRangePlan: correct offset if initial block is not block 0 --- src/util/multibit_build.cpp | 4 +-- unit/internal/multi_bit.cpp | 61 +++++++++++++++++++++++++++++++++++-- 2 files changed, 61 insertions(+), 4 deletions(-) diff --git a/src/util/multibit_build.cpp b/src/util/multibit_build.cpp index 5fe2d617..c726bdf9 100644 --- a/src/util/multibit_build.cpp +++ b/src/util/multibit_build.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -272,7 +272,7 @@ void mmbBuildInitRangePlan(u32 total_bits, u32 begin, u32 end, } // Partial block to deal with beginning. - block_offset += k1 / MMB_KEY_BITS; + block_offset += (k1 / MMB_KEY_BITS) * sizeof(MMB_TYPE); if (k1 % MMB_KEY_BITS) { u32 idx = k1 / MMB_KEY_BITS; u32 block_end = (idx + 1) * MMB_KEY_BITS; diff --git a/unit/internal/multi_bit.cpp b/unit/internal/multi_bit.cpp index 38da1d8a..30dce493 100644 --- a/unit/internal/multi_bit.cpp +++ b/unit/internal/multi_bit.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,6 +30,7 @@ #include "gtest/gtest.h" #include "ue2common.h" +#include "rose/rose_build_scatter.h" #include "util/compile_error.h" #include "util/make_unique.h" #include "util/multibit.h" @@ -698,7 +699,9 @@ TEST_P(MultiBitTest, InitRangeChunked) { for (u32 n = 2; n <= 10; n++) { u32 chunk_size = test_size / n; - if (chunk_size == 0) break; + if (chunk_size == 0) { + break; + } for (u32 k = 0; k < n; k++) { u32 chunk_begin = k * chunk_size; @@ -723,6 +726,60 @@ TEST_P(MultiBitTest, InitRangeChunked) { } } +static +void apply(const scatter_plan_raw &sp, u8 *out) { + for (const auto &e : sp.p_u64a) { + memcpy(out + e.offset, &e.val, sizeof(e.val)); + } + for (const auto &e : sp.p_u32) { + memcpy(out + e.offset, &e.val, sizeof(e.val)); + } + for (const auto &e : sp.p_u16) { + memcpy(out + e.offset, &e.val, sizeof(e.val)); + } + for (const auto &e : sp.p_u8) { + memcpy(out + e.offset, &e.val, sizeof(e.val)); + } +} + +TEST_P(MultiBitTest, InitRangePlanChunked) { + SCOPED_TRACE(test_size); + ASSERT_TRUE(ba != nullptr); + + // Init ranges chunk by chunk. + + for (u32 n = 2; n <= 10; n++) { + u32 chunk_size = test_size / n; + if (chunk_size == 0) { + break; + } + + for (u32 k = 0; k < n; k++) { + u32 chunk_begin = k * chunk_size; + u32 chunk_end = min(test_size, (k + 1) * chunk_size); + + scatter_plan_raw sp; + mmbBuildInitRangePlan(test_size, chunk_begin, chunk_end, &sp); + memset(ba, 0xaa, mmbit_size(test_size)); + apply(sp, ba); + + // First bit set should be chunk_begin. + ASSERT_EQ(chunk_begin, mmbit_iterate(ba, test_size, MMB_INVALID)); + + // All bits in the chunk should be on. + for (u64a i = chunk_begin; i < chunk_end; i += stride) { + SCOPED_TRACE(i); + ASSERT_TRUE(mmbit_isset(ba, test_size, i)); + } + + // Last bit on is chunk_end - 1. + if (chunk_end) { + ASSERT_EQ(MMB_INVALID, mmbit_iterate(ba, test_size, chunk_end - 1)); + } + } + } +} + TEST(MultiBit, SparseIteratorBegin1) { const u32 test_size = 100; vector it; From 1d9a5421cef61beb837ffed7df3dacbdb1af31f7 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 28 Mar 2017 10:51:41 +1100 Subject: [PATCH 180/326] ICC doesn't have a __BMI2__ macro --- src/util/bitutils.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/util/bitutils.h b/src/util/bitutils.h index d144e879..f9e8d151 100644 --- a/src/util/bitutils.h +++ b/src/util/bitutils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -471,7 +471,8 @@ u32 rank_in_mask64(u64a mask, u32 bit) { return popcount64(mask); } -#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__)) +#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__)) || \ + (defined(__INTEL_COMPILER) && defined(__AVX2__)) #define HAVE_PEXT #endif From a1bc69f3dd4229e78c82882b5ebd9af1b5a88bc5 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 23 Mar 2017 10:12:12 +1100 Subject: [PATCH 181/326] rose_build_groups: allow simple-exh lits to squash --- src/rose/rose_build_groups.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/rose/rose_build_groups.cpp b/src/rose/rose_build_groups.cpp index a253ef04..f17e1ee4 100644 --- a/src/rose/rose_build_groups.cpp +++ b/src/rose/rose_build_groups.cpp @@ -35,6 +35,7 @@ #include "util/boundary_reports.h" #include "util/compile_context.h" +#include "util/report_manager.h" #include #include @@ -597,8 +598,12 @@ bool isGroupSquasher(const RoseBuildImpl &build, const u32 id /* literal id */, /* Case 1 */ - // Can't squash cases with accepts - if (!g[v].reports.empty()) { + // Can't squash cases with accepts unless they are all + // simple-exhaustible. + if (any_of_in(g[v].reports, [&](ReportID report) { + return !isSimpleExhaustible(build.rm.getReport(report)); + })) { + DEBUG_PRINTF("can't squash reporter\n"); return false; } From 0a163b553543397c0e6693c6f50e674443f57a55 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 28 Mar 2017 11:26:40 +1100 Subject: [PATCH 182/326] rose: only use live reports for dedupe assignment --- src/rose/rose_build_misc.cpp | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 5173596f..c9403896 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -590,6 +590,8 @@ private: map> suffix_map; map> outfix_map; map> puff_map; + + unordered_set live_reports; //!< all live internal reports. }; unique_ptr RoseBuildImpl::generateDedupeAux() const { @@ -606,6 +608,8 @@ RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &tbi_in) set suffixes; for (auto v : vertices_range(g)) { + insert(&live_reports, g[v].reports); + // Literals in the small block table are "shadow" copies of literals in // the other tables that do not run in the same runtime invocation. // Dedupe key assignment will be taken care of by the real literals. @@ -629,12 +633,14 @@ RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &tbi_in) for (const auto &suffix : suffixes) { for (const auto &report_id : all_reports(suffix)) { suffix_map[report_id].insert(suffix); + live_reports.insert(report_id); } } for (const auto &outfix : tbi.outfixes) { for (const auto &report_id : all_reports(outfix)) { outfix_map[report_id].insert(&outfix); + live_reports.insert(report_id); } } @@ -642,11 +648,21 @@ RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &tbi_in) auto *mpv = tbi.mpv_outfix->mpv(); for (const auto &puff : mpv->puffettes) { puff_map[puff.report].insert(&puff); + live_reports.insert(puff.report); } for (const auto &puff : mpv->triggered_puffettes) { puff_map[puff.report].insert(&puff); + live_reports.insert(puff.report); } } + + // Collect live reports from boundary reports. + insert(&live_reports, tbi.boundary.report_at_0); + insert(&live_reports, tbi.boundary.report_at_0_eod); + insert(&live_reports, tbi.boundary.report_at_eod); + + DEBUG_PRINTF("%zu of %zu reports are live\n", live_reports.size(), + tbi.rm.numReports()); } static @@ -716,11 +732,20 @@ bool RoseDedupeAuxImpl::hasSafeMultiReports( } bool RoseDedupeAuxImpl::requiresDedupeSupport( - const ue2::flat_set &reports) const { + const flat_set &reports_in) const { /* TODO: this could be expanded to check for offset or character constraints */ - DEBUG_PRINTF("reports: %s\n", as_string_list(reports).c_str()); + // We don't want to consider dead reports (tracked by ReportManager but no + // longer used) for the purposes of assigning dupe keys. + flat_set reports; + for (auto id : reports_in) { + if (contains(live_reports, id)) { + reports.insert(id); + } + } + + DEBUG_PRINTF("live reports: %s\n", as_string_list(reports).c_str()); const RoseGraph &g = tbi.g; From a871f70c2566f696c530b1ac2e65e26ce0f8bf6f Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 23 Mar 2017 14:10:14 +1100 Subject: [PATCH 183/326] ng_extparam: split up work and do per-comp reduce This change breaks extparam processing up into: - propagateExtendedParams: propagates min_length, min_offset and max_offset into the reports on the graph - reduceExtendedParams: runs graph reductions based on extparams Then, we apply the reduce pass to the whole graph, and later as well to each component after calc_components. --- src/hs.cpp | 2 +- src/nfagraph/ng.cpp | 36 +- src/nfagraph/ng_extparam.cpp | 564 ++++++++++++++++++---------- src/nfagraph/ng_extparam.h | 21 +- src/smallwrite/smallwrite_build.cpp | 20 +- unit/hyperscan/expr_info.cpp | 6 +- 6 files changed, 435 insertions(+), 214 deletions(-) diff --git a/src/hs.cpp b/src/hs.cpp index c1e1cdce..b9d3b356 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -388,7 +388,7 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, // fuzz graph - this must happen before any transformations are made make_fuzzy(*g, expr.edit_distance, cc.grey); - handleExtendedParams(rm, *g, expr, cc); + propagateExtendedParams(*g, expr, rm); fillExpressionInfo(rm, *g, expr, &local_info); } catch (const CompileError &e) { diff --git a/src/nfagraph/ng.cpp b/src/nfagraph/ng.cpp index dc74dcee..8b247c74 100644 --- a/src/nfagraph/ng.cpp +++ b/src/nfagraph/ng.cpp @@ -214,6 +214,7 @@ bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr, assert(allMatchStatesHaveReports(g)); + reduceExtendedParams(g, ng.rm, som); reduceGraph(g, som, expr.utf8, cc); dumpComponent(g, "02_reduced", expr.index, comp_id, ng.cc.grey); @@ -223,6 +224,13 @@ bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr, removeRegionRedundancy(g, som); } + // We might be done at this point: if we've run out of vertices, we can + // stop processing. + if (num_vertices(g) == N_SPECIALS) { + DEBUG_PRINTF("all vertices claimed\n"); + return true; + } + // "Short Exhaustible Passthrough" patterns always become outfixes. if (!som && isSEP(g, ng.rm, cc.grey)) { DEBUG_PRINTF("graph is SEP\n"); @@ -358,10 +366,22 @@ bool NG::addGraph(ExpressionInfo &expr, unique_ptr g_ptr) { optimiseVirtualStarts(g); /* good for som */ - handleExtendedParams(rm, g, expr, cc); - if (expr.min_length) { - // We have a minimum length constraint, which we currently use SOM to - // satisfy. + propagateExtendedParams(g, expr, rm); + reduceExtendedParams(g, rm, som); + + // We may have removed all the edges to accept, in which case this + // expression cannot match. + if (can_never_match(g)) { + throw CompileError(expr.index, "Extended parameter constraints can not " + "be satisfied for any match from this " + "expression."); + } + + if (any_of_in(all_reports(g), [&](ReportID id) { + return rm.getReport(id).minLength; + })) { + // We have at least one report with a minimum length constraint, which + // we currently use SOM to satisfy. som = SOM_LEFT; ssm.somPrecision(8); } @@ -377,10 +397,16 @@ bool NG::addGraph(ExpressionInfo &expr, unique_ptr g_ptr) { relaxForbiddenUtf8(g, expr); } - if (expr.highlander && !expr.min_length && !expr.min_offset) { + if (all_of_in(all_reports(g), [&](ReportID id) { + const auto &report = rm.getReport(id); + return report.ekey != INVALID_EKEY && !report.minLength && + !report.minOffset; + })) { // In highlander mode: if we don't have constraints on our reports that // may prevent us accepting our first match (i.e. extended params) we // can prune the other out-edges of all vertices connected to accept. + // TODO: shift the report checking down into pruneHighlanderAccepts() + // to allow us to handle the parts we can in mixed cases. pruneHighlanderAccepts(g, rm); } diff --git a/src/nfagraph/ng_extparam.cpp b/src/nfagraph/ng_extparam.cpp index 31a1f81b..19fa2295 100644 --- a/src/nfagraph/ng_extparam.cpp +++ b/src/nfagraph/ng_extparam.cpp @@ -26,12 +26,13 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief Propagate extended parameters to vertex reports and reduce graph if * possible. * * This code handles the propagation of the extension parameters specified by - * the user with the hs_expr_ext structure into the reports on the graph's + * the user with the \ref hs_expr_ext structure into the reports on the graph's * vertices. * * There are also some analyses that prune edges that cannot contribute to a @@ -68,8 +69,28 @@ namespace ue2 { static const u32 MAX_MAXOFFSET_TO_ANCHOR = 2000; static const u32 MAX_MINLENGTH_TO_CONVERT = 2000; -/** \brief Find the (min, max) offset adjustment for the reports on a given - * vertex. */ +/** True if all the given reports have the same extparam bounds. */ +template +bool hasSameBounds(const Container &reports, const ReportManager &rm) { + assert(!reports.empty()); + + const auto &first = rm.getReport(*reports.begin()); + for (auto id : reports) { + const auto &report = rm.getReport(id); + if (report.minOffset != first.minOffset || + report.maxOffset != first.maxOffset || + report.minLength != first.minLength) { + return false; + } + } + + return true; +} + +/** + * \brief Find the (min, max) offset adjustment for the reports on a given + * vertex. + */ static pair getMinMaxOffsetAdjust(const ReportManager &rm, const NGHolder &g, NFAVertex v) { @@ -130,55 +151,76 @@ DepthMinMax findMatchLengths(const ReportManager &rm, const NGHolder &g) { return match_depths; } +template +void replaceReports(NGHolder &g, NFAVertex accept, flat_set &seen, + Function func) { + for (auto v : inv_adjacent_vertices_range(accept, g)) { + if (v == g.accept) { + // Don't operate on accept: the accept->acceptEod edge is stylised. + assert(accept == g.acceptEod); + assert(g[v].reports.empty()); + continue; + } + + if (!seen.insert(v).second) { + continue; // We have already processed v. + } + + auto &reports = g[v].reports; + if (reports.empty()) { + continue; + } + decltype(g[v].reports) new_reports; + for (auto id : g[v].reports) { + new_reports.insert(func(v, id)); + } + reports = std::move(new_reports); + } +} + +/** + * Generic function for replacing all the reports in the graph. + * + * Pass this a function that takes a vertex and a ReportID returns another + * ReportID (or the same one) to replace it with. + */ +template +void replaceReports(NGHolder &g, Function func) { + flat_set seen; + replaceReports(g, g.accept, seen, func); + replaceReports(g, g.acceptEod, seen, func); +} + /** \brief Replace the graph's reports with new reports that specify bounds. */ static void updateReportBounds(ReportManager &rm, NGHolder &g, - const ExpressionInfo &expr, NFAVertex accept, - set &done) { - for (auto v : inv_adjacent_vertices_range(accept, g)) { - // Don't operate on g.accept itself. - if (v == g.accept) { - assert(accept == g.acceptEod); - continue; + const ExpressionInfo &expr) { + DEBUG_PRINTF("updating report bounds\n"); + replaceReports(g, [&](NFAVertex, ReportID id) { + Report report = rm.getReport(id); // make a copy + assert(!report.hasBounds()); + + // Note that we need to cope with offset adjustment here. + + report.minOffset = expr.min_offset - report.offsetAdjust; + if (expr.max_offset == MAX_OFFSET) { + report.maxOffset = MAX_OFFSET; + } else { + report.maxOffset = expr.max_offset - report.offsetAdjust; + } + assert(report.maxOffset >= report.minOffset); + + report.minLength = expr.min_length; + if (expr.min_length && !expr.som) { + report.quashSom = true; } - // Don't operate on a vertex we've already done. - if (contains(done, v)) { - continue; - } - done.insert(v); + DEBUG_PRINTF("id %u -> min_offset=%llu, max_offset=%llu, " + "min_length=%llu\n", id, report.minOffset, + report.maxOffset, report.minLength); - flat_set new_reports; - auto &reports = g[v].reports; - - for (auto id : reports) { - Report ir = rm.getReport(id); // make a copy - assert(!ir.hasBounds()); - - // Note that we need to cope with offset adjustment here. - - ir.minOffset = expr.min_offset - ir.offsetAdjust; - if (expr.max_offset == MAX_OFFSET) { - ir.maxOffset = MAX_OFFSET; - } else { - ir.maxOffset = expr.max_offset - ir.offsetAdjust; - } - assert(ir.maxOffset >= ir.minOffset); - - ir.minLength = expr.min_length; - if (expr.min_length && !expr.som) { - ir.quashSom = true; - } - - DEBUG_PRINTF("id %u -> min_offset=%llu, max_offset=%llu, " - "min_length=%llu\n", - id, ir.minOffset, ir.maxOffset, ir.minLength); - new_reports.insert(rm.getInternalId(ir)); - } - - DEBUG_PRINTF("swapping reports on vertex %zu\n", g[v].index); - reports.swap(new_reports); - } + return rm.getInternalId(report); + }); } static @@ -191,32 +233,93 @@ bool hasVirtualStarts(const NGHolder &g) { return false; } -/** If the pattern is unanchored, has a max_offset and has not asked for SOM, - * we can use that knowledge to anchor it which will limit its lifespan. Note - * that we can't use this transformation if there's a min_length, as it's - * currently handled using "sly SOM". +/** Set the min_length param for all reports to zero. */ +static +void clearMinLengthParam(NGHolder &g, ReportManager &rm) { + DEBUG_PRINTF("clearing min length\n"); + replaceReports(g, [&rm](NFAVertex, ReportID id) { + const auto &report = rm.getReport(id); + if (report.minLength) { + Report new_report = report; + new_report.minLength = 0; + return rm.getInternalId(new_report); + } + return id; + }); +} + +/** + * Set the min_offset param to zero and the max_offset param to MAX_OFFSET for + * all reports. + */ +static +void clearOffsetParams(NGHolder &g, ReportManager &rm) { + DEBUG_PRINTF("clearing min and max offset\n"); + replaceReports(g, [&rm](NFAVertex, ReportID id) { + const auto &report = rm.getReport(id); + if (report.minLength) { + Report new_report = report; + new_report.minOffset = 0; + new_report.maxOffset = MAX_OFFSET; + return rm.getInternalId(new_report); + } + return id; + }); +} + +/** + * If the pattern is unanchored, has a max_offset and has not asked for SOM, we + * can use that knowledge to anchor it which will limit its lifespan. Note that + * we can't use this transformation if there's a min_length, as it's currently + * handled using "sly SOM". * * Note that it is possible to handle graphs that have a combination of * anchored and unanchored paths, but it's too tricky for the moment. */ static -bool anchorPatternWithBoundedRepeat(NGHolder &g, const ExpressionInfo &expr, - const depth &minWidth, - const depth &maxWidth) { - assert(!expr.som); - assert(expr.max_offset != MAX_OFFSET); - assert(minWidth <= maxWidth); - assert(maxWidth.is_reachable()); - - DEBUG_PRINTF("widths=[%s,%s], min/max offsets=[%llu,%llu]\n", - minWidth.str().c_str(), maxWidth.str().c_str(), - expr.min_offset, expr.max_offset); - - if (expr.max_offset > MAX_MAXOFFSET_TO_ANCHOR) { +bool anchorPatternWithBoundedRepeat(NGHolder &g, ReportManager &rm) { + if (!isFloating(g)) { return false; } - if (expr.max_offset < minWidth) { + const auto &reports = all_reports(g); + if (reports.empty()) { + return false; + } + + if (any_of_in(reports, [&](ReportID id) { + const auto &report = rm.getReport(id); + return report.maxOffset == MAX_OFFSET || report.minLength || + report.offsetAdjust; + })) { + return false; + } + + if (!hasSameBounds(reports, rm)) { + DEBUG_PRINTF("mixed report bounds\n"); + return false; + } + + const depth minWidth = findMinWidth(g); + const depth maxWidth = findMaxWidth(g); + + assert(minWidth <= maxWidth); + assert(maxWidth.is_reachable()); + + const auto &first_report = rm.getReport(*reports.begin()); + const auto min_offset = first_report.minOffset; + const auto max_offset = first_report.maxOffset; + assert(max_offset < MAX_OFFSET); + + DEBUG_PRINTF("widths=[%s,%s], min/max offsets=[%llu,%llu]\n", + minWidth.str().c_str(), maxWidth.str().c_str(), + min_offset, max_offset); + + if (max_offset > MAX_MAXOFFSET_TO_ANCHOR) { + return false; + } + + if (max_offset < minWidth) { assert(0); return false; } @@ -237,10 +340,10 @@ bool anchorPatternWithBoundedRepeat(NGHolder &g, const ExpressionInfo &expr, u32 min_bound, max_bound; if (maxWidth.is_infinite()) { min_bound = 0; - max_bound = expr.max_offset - minWidth; + max_bound = max_offset - minWidth; } else { - min_bound = expr.min_offset > maxWidth ? expr.min_offset - maxWidth : 0; - max_bound = expr.max_offset - minWidth; + min_bound = min_offset > maxWidth ? min_offset - maxWidth : 0; + max_bound = max_offset - minWidth; } DEBUG_PRINTF("prepending ^.{%u,%u}\n", min_bound, max_bound); @@ -293,6 +396,13 @@ bool anchorPatternWithBoundedRepeat(NGHolder &g, const ExpressionInfo &expr, renumber_vertices(g); renumber_edges(g); + if (minWidth == maxWidth) { + // For a fixed width pattern, we can retire the offsets as + // they are implicit in the graph now. + clearOffsetParams(g, rm); + } + + clearReports(g); return true; } @@ -341,17 +451,27 @@ bool hasOffsetAdjust(const ReportManager &rm, NGHolder &g, return true; } -/** If the pattern has a min_length and is of "ratchet" form with one unbounded +/** + * If the pattern has a min_length and is of "ratchet" form with one unbounded * repeat, that repeat can become a bounded repeat. * * /foo.*bar/{min_length=100} --> /foo.{94,}bar/ */ static -bool transformMinLengthToRepeat(const ReportManager &rm, NGHolder &g, - ExpressionInfo &expr) { - assert(expr.min_length); +bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) { + const auto &reports = all_reports(g); - if (expr.min_length > MAX_MINLENGTH_TO_CONVERT) { + if (reports.empty()) { + return false; + } + + if (!hasSameBounds(reports, rm)) { + DEBUG_PRINTF("mixed report bounds\n"); + return false; + } + + const auto &min_length = rm.getReport(*reports.begin()).minLength; + if (!min_length || min_length > MAX_MINLENGTH_TO_CONVERT) { return false; } @@ -381,7 +501,6 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGHolder &g, u32 width = 0; - // Walk from the start vertex to the cyclic state and ensure we have a // chain of vertices. while (v != cyclic) { @@ -443,10 +562,10 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGHolder &g, DEBUG_PRINTF("width=%u, vertex %zu is cyclic\n", width, g[cyclic].index); - if (width >= expr.min_length) { + if (width >= min_length) { DEBUG_PRINTF("min_length=%llu is guaranteed, as width=%u\n", - expr.min_length, width); - expr.min_length = 0; + min_length, width); + clearMinLengthParam(g, rm); return true; } @@ -474,7 +593,7 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGHolder &g, const CharReach &cr = g[cyclic].char_reach; - for (u32 i = 0; i < expr.min_length - width - 1; ++i) { + for (u32 i = 0; i < min_length - width - 1; ++i) { v = add_vertex(g); g[v].char_reach = cr; @@ -491,9 +610,8 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGHolder &g, renumber_vertices(g); renumber_edges(g); + clearMinLengthParam(g, rm); clearReports(g); - - expr.min_length = 0; return true; } @@ -511,8 +629,8 @@ bool hasExtParams(const ExpressionInfo &expr) { return false; } -static -depth maxDistFromStart(const NFAVertexBidiDepth &d) { +template +depth maxDistFromStart(const VertexDepth &d) { if (!d.fromStartDotStar.max.is_unreachable()) { // A path from startDs, any path, implies we can match at any offset. return depth::infinity(); @@ -541,7 +659,7 @@ const depth& minDistToAccept(const NFAVertexBidiDepth &d) { } static -bool isEdgePrunable(const NGHolder &g, const ExpressionInfo &expr, +bool isEdgePrunable(const NGHolder &g, const Report &report, const vector &depths, const NFAEdge &e) { const NFAVertex u = source(e, g); @@ -570,29 +688,29 @@ bool isEdgePrunable(const NGHolder &g, const ExpressionInfo &expr, const NFAVertexBidiDepth &du = depths.at(u_idx); const NFAVertexBidiDepth &dv = depths.at(v_idx); - if (expr.min_offset) { + if (report.minOffset) { depth max_offset = maxDistFromStart(du) + maxDistToAccept(dv); - if (max_offset.is_finite() && max_offset < expr.min_offset) { + if (max_offset.is_finite() && max_offset < report.minOffset) { DEBUG_PRINTF("max_offset=%s too small\n", max_offset.str().c_str()); return true; } } - if (expr.max_offset != MAX_OFFSET) { + if (report.maxOffset != MAX_OFFSET) { depth min_offset = minDistFromStart(du) + minDistToAccept(dv); assert(min_offset.is_finite()); - if (min_offset > expr.max_offset) { + if (min_offset > report.maxOffset) { DEBUG_PRINTF("min_offset=%s too large\n", min_offset.str().c_str()); return true; } } - if (expr.min_length && is_any_accept(v, g)) { + if (report.minLength && is_any_accept(v, g)) { // Simple take on min_length. If we're an edge to accept and our max // dist from start is too small, we can be pruned. const depth &width = du.fromStart.max; - if (width.is_finite() && width < expr.min_length) { + if (width.is_finite() && width < report.minLength) { DEBUG_PRINTF("max width %s from start too small for min_length\n", width.str().c_str()); return true; @@ -603,14 +721,26 @@ bool isEdgePrunable(const NGHolder &g, const ExpressionInfo &expr, } static -void pruneExtUnreachable(NGHolder &g, const ExpressionInfo &expr) { +void pruneExtUnreachable(NGHolder &g, const ReportManager &rm) { + const auto &reports = all_reports(g); + if (reports.empty()) { + return; + } + + if (!hasSameBounds(reports, rm)) { + DEBUG_PRINTF("report bounds vary\n"); + return; + } + + const auto &report = rm.getReport(*reports.begin()); + vector depths; calcDepths(g, depths); vector dead; for (const auto &e : edges_range(g)) { - if (isEdgePrunable(g, expr, depths, e)) { + if (isEdgePrunable(g, report, depths, e)) { DEBUG_PRINTF("pruning\n"); dead.push_back(e); } @@ -622,32 +752,45 @@ void pruneExtUnreachable(NGHolder &g, const ExpressionInfo &expr) { remove_edges(dead, g); pruneUseless(g); + clearReports(g); } -/** Remove vacuous edges in graphs where the min_offset or min_length - * constraints dictate that they can never produce a match. */ +/** + * Remove vacuous edges in graphs where the min_offset or min_length + * constraints dictate that they can never produce a match. + */ static -void pruneVacuousEdges(NGHolder &g, const ExpressionInfo &expr) { - if (!expr.min_length && !expr.min_offset) { - return; - } - +void pruneVacuousEdges(NGHolder &g, const ReportManager &rm) { vector dead; + auto has_min_offset = [&](NFAVertex v) { + assert(!g[v].reports.empty()); // must be reporter + return all_of_in(g[v].reports, [&](ReportID id) { + return rm.getReport(id).minOffset > 0; + }); + }; + + auto has_min_length = [&](NFAVertex v) { + assert(!g[v].reports.empty()); // must be reporter + return all_of_in(g[v].reports, [&](ReportID id) { + return rm.getReport(id).minLength > 0; + }); + }; + for (const auto &e : edges_range(g)) { const NFAVertex u = source(e, g); const NFAVertex v = target(e, g); - // Special case: Crudely remove vacuous edges from start in graphs with a - // min_offset. - if (expr.min_offset && u == g.start && is_any_accept(v, g)) { + // Special case: Crudely remove vacuous edges from start in graphs with + // a min_offset. + if (u == g.start && is_any_accept(v, g) && has_min_offset(u)) { DEBUG_PRINTF("vacuous edge in graph with min_offset!\n"); dead.push_back(e); continue; } // If a min_length is set, vacuous edges can be removed. - if (expr.min_length && is_any_start(u, g) && is_any_accept(v, g)) { + if (is_any_start(u, g) && is_any_accept(v, g) && has_min_length(u)) { DEBUG_PRINTF("vacuous edge in graph with min_length!\n"); dead.push_back(e); continue; @@ -658,13 +801,14 @@ void pruneVacuousEdges(NGHolder &g, const ExpressionInfo &expr) { return; } + DEBUG_PRINTF("removing %zu vacuous edges\n", dead.size()); remove_edges(dead, g); pruneUseless(g); + clearReports(g); } static -void pruneUnmatchable(NGHolder &g, const ExpressionInfo &expr, - const vector &depths, +void pruneUnmatchable(NGHolder &g, const vector &depths, const ReportManager &rm, NFAVertex accept) { vector dead; @@ -675,6 +819,11 @@ void pruneUnmatchable(NGHolder &g, const ExpressionInfo &expr, continue; } + if (!hasSameBounds(g[v].reports, rm)) { + continue; + } + const auto &report = rm.getReport(*g[v].reports.begin()); + u32 idx = g[v].index; DepthMinMax d = depths[idx]; // copy pair adj = getMinMaxOffsetAdjust(rm, g, v); @@ -683,16 +832,16 @@ void pruneUnmatchable(NGHolder &g, const ExpressionInfo &expr, d.min += adj.first; d.max += adj.second; - if (d.max.is_finite() && d.max < expr.min_length) { + if (d.max.is_finite() && d.max < report.minLength) { DEBUG_PRINTF("prune, max match length %s < min_length=%llu\n", - d.max.str().c_str(), expr.min_length); + d.max.str().c_str(), report.minLength); dead.push_back(e); continue; } - if (expr.max_offset != MAX_OFFSET && d.min > expr.max_offset) { + if (report.maxOffset != MAX_OFFSET && d.min > report.maxOffset) { DEBUG_PRINTF("prune, min match length %s > max_offset=%llu\n", - d.min.str().c_str(), expr.max_offset); + d.min.str().c_str(), report.maxOffset); dead.push_back(e); continue; } @@ -701,47 +850,36 @@ void pruneUnmatchable(NGHolder &g, const ExpressionInfo &expr, remove_edges(dead, g); } -/** Remove edges to accepts that can never produce a match long enough to - * satisfy our min_length and max_offset constraints. */ +/** + * Remove edges to accepts that can never produce a match long enough to + * satisfy our min_length and max_offset constraints. + */ static -void pruneUnmatchable(NGHolder &g, const ExpressionInfo &expr, - const ReportManager &rm) { - if (!expr.min_length) { +void pruneUnmatchable(NGHolder &g, const ReportManager &rm) { + if (!any_of_in(all_reports(g), [&](ReportID id) { + return rm.getReport(id).minLength > 0; + })) { return; } vector depths = getDistancesFromSOM(g); - pruneUnmatchable(g, expr, depths, rm, g.accept); - pruneUnmatchable(g, expr, depths, rm, g.acceptEod); + pruneUnmatchable(g, depths, rm, g.accept); + pruneUnmatchable(g, depths, rm, g.acceptEod); pruneUseless(g); -} - -static -bool isUnanchored(const NGHolder &g) { - for (auto v : adjacent_vertices_range(g.start, g)) { - if (!edge(g.startDs, v, g).second) { - DEBUG_PRINTF("fail, %zu is anchored vertex\n", g[v].index); - return false; - } - } - return true; + clearReports(g); } static bool hasOffsetAdjustments(const ReportManager &rm, const NGHolder &g) { - for (auto report : all_reports(g)) { - const Report &ir = rm.getReport(report); - if (ir.offsetAdjust) { - return true; - } - } - return false; + return any_of_in(all_reports(g), [&rm](ReportID id) { + return rm.getReport(id).offsetAdjust != 0; + }); } -void handleExtendedParams(ReportManager &rm, NGHolder &g, ExpressionInfo &expr, - UNUSED const CompileContext &cc) { +void propagateExtendedParams(NGHolder &g, ExpressionInfo &expr, + ReportManager &rm) { if (!hasExtParams(expr)) { return; } @@ -750,11 +888,6 @@ void handleExtendedParams(ReportManager &rm, NGHolder &g, ExpressionInfo &expr, depth maxWidth = findMaxWidth(g); bool is_anchored = !has_proper_successor(g.startDs, g) && out_degree(g.start, g); - bool has_offset_adj = hasOffsetAdjustments(rm, g); - - DEBUG_PRINTF("minWidth=%s, maxWidth=%s, anchored=%d, offset_adj=%d\n", - minWidth.str().c_str(), maxWidth.str().c_str(), is_anchored, - has_offset_adj); DepthMinMax match_depths = findMatchLengths(rm, g); DEBUG_PRINTF("match depths %s\n", match_depths.str().c_str()); @@ -792,91 +925,122 @@ void handleExtendedParams(ReportManager &rm, NGHolder &g, ExpressionInfo &expr, return; } - pruneVacuousEdges(g, expr); - pruneUnmatchable(g, expr, rm); + updateReportBounds(rm, g, expr); +} - if (!has_offset_adj) { - pruneExtUnreachable(g, expr); +/** + * If the pattern is completely anchored and has a min_length set, this can + * be converted to a min_offset. + */ +static +void replaceMinLengthWithOffset(NGHolder &g, ReportManager &rm) { + if (has_proper_successor(g.startDs, g)) { + return; // not wholly anchored } - // We may have removed all the edges to accept, in which case this - // expression cannot match. - if (in_degree(g.accept, g) == 0 && in_degree(g.acceptEod, g) == 1) { - throw CompileError(expr.index, "Extended parameter " - "constraints can not be satisfied for any match from " - "this expression."); + replaceReports(g, [&rm](NFAVertex, ReportID id) { + const auto &report = rm.getReport(id); + if (report.minLength) { + Report new_report = report; + u64a min_len_offset = report.minLength - report.offsetAdjust; + new_report.minOffset = max(report.minOffset, min_len_offset); + new_report.minLength = 0; + return rm.getInternalId(new_report); + } + return id; + }); +} + +/** + * Clear offset bounds on reports that are not needed because they're satisfied + * by vertex depth. + */ +static +void removeUnneededOffsetBounds(NGHolder &g, ReportManager &rm) { + vector depths; + calcDepths(g, depths); + + replaceReports(g, [&](NFAVertex v, ReportID id) { + const auto &d = depths.at(g[v].index); + const depth &min_depth = min(d.fromStartDotStar.min, d.fromStart.min); + const depth &max_depth = maxDistFromStart(d); + + DEBUG_PRINTF("vertex %zu has min_depth=%s, max_depth=%s\n", g[v].index, + min_depth.str().c_str(), max_depth.str().c_str()); + + Report report = rm.getReport(id); // copy + bool modified = false; + if (report.minOffset && !report.offsetAdjust && + report.minOffset <= min_depth) { + report.minOffset = 0; + modified = true; + } + if (report.maxOffset != MAX_OFFSET && max_depth.is_finite() && + report.maxOffset >= max_depth) { + report.maxOffset = MAX_OFFSET; + modified = true; + } + if (modified) { + DEBUG_PRINTF("vertex %zu, changed bounds to [%llu,%llu]\n", + g[v].index, report.minOffset, report.maxOffset); + return rm.getInternalId(report); + } + + return id; + }); +} + +void reduceExtendedParams(NGHolder &g, ReportManager &rm, som_type som) { + if (!any_of_in(all_reports(g), + [&](ReportID id) { return rm.getReport(id).hasBounds(); })) { + DEBUG_PRINTF("no extparam bounds\n"); + return; } - // Remove reports on vertices without an edge to accept (which have been - // pruned above). - clearReports(g); + DEBUG_PRINTF("graph has extparam bounds\n"); - // Recalc. - minWidth = findMinWidth(g); - maxWidth = findMaxWidth(g); - is_anchored = proper_out_degree(g.startDs, g) == 0 && - out_degree(g.start, g); - has_offset_adj = hasOffsetAdjustments(rm, g); - - // If the pattern is completely anchored and has a min_length set, this can - // be converted to a min_offset. - if (expr.min_length && (expr.min_offset <= expr.min_length) && - is_anchored) { - DEBUG_PRINTF("convertinexpr.min_length to min_offset=%llu for " - "anchored case\n", expr.min_length); - expr.min_offset = expr.min_length; - expr.min_length = 0; + pruneVacuousEdges(g, rm); + if (can_never_match(g)) { + return; } - if (expr.min_offset && expr.min_offset <= minWidth && !has_offset_adj) { - DEBUG_PRINTF("min_offset=%llu constraint is unnecessary\n", - expr.min_offset); - expr.min_offset = 0; + pruneUnmatchable(g, rm); + if (can_never_match(g)) { + return; } - if (!hasExtParams(expr)) { + if (!hasOffsetAdjustments(rm, g)) { + pruneExtUnreachable(g, rm); + if (can_never_match(g)) { + return; + } + } + + replaceMinLengthWithOffset(g, rm); + if (can_never_match(g)) { return; } // If the pattern has a min_length and is of "ratchet" form with one // unbounded repeat, that repeat can become a bounded repeat. // e.g. /foo.*bar/{min_length=100} --> /foo.{94,}bar/ - if (expr.min_length && transformMinLengthToRepeat(rm, g, expr)) { - DEBUG_PRINTF("converted min_length to bounded repeat\n"); - // recalc - minWidth = findMinWidth(g); + transformMinLengthToRepeat(g, rm); + if (can_never_match(g)) { + return; } // If the pattern is unanchored, has a max_offset and has not asked for // SOM, we can use that knowledge to anchor it which will limit its // lifespan. Note that we can't use this transformation if there's a // min_length, as it's currently handled using "sly SOM". - - // Note that it is possible to handle graphs that have a combination of - // anchored and unanchored paths, but it's too tricky for the moment. - - if (expr.max_offset != MAX_OFFSET && !expr.som && !expr.min_length && - !has_offset_adj && isUnanchored(g)) { - if (anchorPatternWithBoundedRepeat(g, expr, minWidth, maxWidth)) { - DEBUG_PRINTF("minWidth=%s, maxWidth=%s\n", minWidth.str().c_str(), - maxWidth.str().c_str()); - if (minWidth == maxWidth) { - // For a fixed width pattern, we can retire the offsets as they - // are implicit in the graph now. - expr.min_offset = 0; - expr.max_offset = MAX_OFFSET; - } + if (som == SOM_NONE) { + anchorPatternWithBoundedRepeat(g, rm); + if (can_never_match(g)) { + return; } } - //dumpGraph("final.dot", g); - if (!hasExtParams(expr)) { - return; - } - - set done; - updateReportBounds(rm, g, expr, g.accept, done); - updateReportBounds(rm, g, expr, g.acceptEod, done); + removeUnneededOffsetBounds(g, rm); } } // namespace ue2 diff --git a/src/nfagraph/ng_extparam.h b/src/nfagraph/ng_extparam.h index 798acd3f..ae818075 100644 --- a/src/nfagraph/ng_extparam.h +++ b/src/nfagraph/ng_extparam.h @@ -34,15 +34,30 @@ #ifndef NG_EXTPARAM_H #define NG_EXTPARAM_H +#include "som/som.h" + namespace ue2 { -struct CompileContext; class ExpressionInfo; class NGHolder; class ReportManager; -void handleExtendedParams(ReportManager &rm, NGHolder &g, ExpressionInfo &expr, - const CompileContext &cc); +/** + * \brief Propagate extended parameter information to vertex reports. Will + * throw CompileError if this expression's extended parameters are not + * satisfiable. + * + * This will also remove extended parameter constraints that are guaranteed to + * be satisfied from ExpressionInfo. + */ +void propagateExtendedParams(NGHolder &g, ExpressionInfo &expr, + ReportManager &rm); + +/** + * \brief Perform graph reductions (if possible) to do with extended parameter + * constraints on reports. + */ +void reduceExtendedParams(NGHolder &g, ReportManager &rm, som_type som); } // namespace ue2 diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index 7d340d79..43a502f7 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -41,6 +41,7 @@ #include "nfagraph/ng_depth.h" #include "nfagraph/ng_holder.h" #include "nfagraph/ng_mcclellan.h" +#include "nfagraph/ng_reports.h" #include "nfagraph/ng_prune.h" #include "nfagraph/ng_util.h" #include "smallwrite/smallwrite_internal.h" @@ -179,8 +180,23 @@ void SmallWriteBuildImpl::add(const NGHolder &g, const ExpressionInfo &expr) { return; } - if (expr.som || expr.min_length || isVacuous(g)) { - poisoned = true; /* cannot support in smwr */ + if (expr.som) { + DEBUG_PRINTF("no SOM support in small-write engine\n"); + poisoned = true; + return; + } + + if (isVacuous(g)) { + DEBUG_PRINTF("no vacuous graph support in small-write engine\n"); + poisoned = true; + return; + } + + if (any_of_in(::ue2::all_reports(g), [&](ReportID id) { + return rm.getReport(id).minLength > 0; + })) { + DEBUG_PRINTF("no min_length extparam support in small-write engine\n"); + poisoned = true; return; } diff --git a/unit/hyperscan/expr_info.cpp b/unit/hyperscan/expr_info.cpp index e6ffa9ea..d2383479 100644 --- a/unit/hyperscan/expr_info.cpp +++ b/unit/hyperscan/expr_info.cpp @@ -168,7 +168,7 @@ static const expected_info ei_test[] = { // Some cases with extended parameters. {"^abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0}, 6, 10, 0, 0, 0}, - {"^abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0}, 6, UINT_MAX, 0, 0, 0}, + {"^abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0}, 100, UINT_MAX, 0, 0, 0}, {"abc.*def", {HS_EXT_FLAG_MAX_OFFSET, 0, 10, 0, 0}, 6, 10, 0, 0, 0}, {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 100, 0}, 100, UINT_MAX, 0, 0, 0}, {"abc.*def", {HS_EXT_FLAG_MIN_LENGTH, 0, 0, 5, 0}, 6, UINT_MAX, 0, 0, 0}, @@ -185,7 +185,7 @@ static const expected_info ei_test[] = { {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1}, 5, UINT_MAX, 0, 0, 0}, {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 4, UINT_MAX, 0, 0, 0}, {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 10, 2}, - 4, UINT_MAX, 0, 0, 0}, + 10, UINT_MAX, 0, 0, 0}, {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2}, 4, UINT_MAX, 0, 0, 0}, {"^abc.*def", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2}, @@ -194,7 +194,7 @@ static const expected_info ei_test[] = { {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 1}, 5, 7, 0, 0, 0}, {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE, 0, 0, 0, 2}, 4, 8, 0, 0, 0}, {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_LENGTH, 0, 0, 8, 2}, - 4, 8, 0, 0, 0}, + 8, 8, 0, 0, 0}, {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MIN_OFFSET, 6, 0, 0, 2}, 4, 8, 0, 0, 0}, {"^abcdef", {HS_EXT_FLAG_EDIT_DISTANCE | HS_EXT_FLAG_MAX_OFFSET, 0, 6, 0, 2}, From a4df49dd667581de2302bc7df48a1d3b9c821bde Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 24 Mar 2017 10:41:16 +1100 Subject: [PATCH 184/326] unit: better output from expr_info unit tests --- unit/hyperscan/expr_info.cpp | 47 ++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/unit/hyperscan/expr_info.cpp b/unit/hyperscan/expr_info.cpp index d2383479..7cc6abd7 100644 --- a/unit/hyperscan/expr_info.cpp +++ b/unit/hyperscan/expr_info.cpp @@ -51,6 +51,53 @@ struct expected_info { char matches_only_at_eod; }; +ostream& operator<<(ostream &os, const hs_expr_ext &ext) { + if (!ext.flags) { + return os; + } + bool first = true; + if (ext.flags & HS_EXT_FLAG_MIN_OFFSET) { + if (!first) { + os << ", "; + } + os << "min_offset=" << ext.min_offset; + first = false; + } + if (ext.flags & HS_EXT_FLAG_MAX_OFFSET) { + if (!first) { + os << ", "; + } + os << "max_offset=" << ext.max_offset; + first = false; + } + if (ext.flags & HS_EXT_FLAG_MIN_LENGTH) { + if (!first) { + os << ", "; + } + os << "min_length=" << ext.min_length; + first = false; + } + if (ext.flags & HS_EXT_FLAG_EDIT_DISTANCE) { + if (!first) { + os << ", "; + } + os << "edit_distance=" << ext.edit_distance; + first = false; + } + return os; +} + +// For Google Test. +void PrintTo(const expected_info &ei, ostream *os) { + *os << "expected_info: " + << "pattern=\"" << ei.pattern << "\"" + << ", ext={" << ei.ext << "}" + << ", min=" << ei.min << ", max=" << ei.max + << ", unordered_matches=" << (ei.unordered_matches ? 1 : 0) + << ", matches_at_eod=" << (ei.matches_at_eod ? 1 : 0) + << ", matches_only_at_eod=" << (ei.matches_only_at_eod ? 1 : 0); +} + class ExprInfop : public TestWithParam { }; From 7d23a7e2d35b3cda89bbe9a017e2c44003dd9db9 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 29 Mar 2017 14:33:37 +1100 Subject: [PATCH 185/326] som: move som_type into namespace ue2 --- src/som/som.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/som/som.h b/src/som/som.h index 4a380984..e759cf0a 100644 --- a/src/som/som.h +++ b/src/som/som.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,17 +26,22 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief Common SOM definitions. */ #ifndef UE2_SOM_H #define UE2_SOM_H +namespace ue2 { + /** \brief Enumeration specifying a start of match behaviour. */ enum som_type { SOM_NONE, //!< No SOM required SOM_LEFT //!< Exact leftmost SOM }; +} // namespace ue2 + #endif // UE2_SOM_H From 1f3a000bfa440bbbad404a5eeff992099b588acd Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 29 Mar 2017 15:08:16 +1100 Subject: [PATCH 186/326] dump_util: move into namespace ue2 --- src/util/dump_util.cpp | 6 +++++- src/util/dump_util.h | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/util/dump_util.cpp b/src/util/dump_util.cpp index 5b961367..782cba7a 100644 --- a/src/util/dump_util.cpp +++ b/src/util/dump_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,6 +33,8 @@ using namespace std; +namespace ue2 { + FILE *fopen_or_throw(const char *path, const char *mode) { FILE *f = fopen(path, mode); if (!f) { @@ -40,3 +42,5 @@ FILE *fopen_or_throw(const char *path, const char *mode) { } return f; } + +} // namespace ue2 diff --git a/src/util/dump_util.h b/src/util/dump_util.h index 487d2e7c..f5ebe94a 100644 --- a/src/util/dump_util.h +++ b/src/util/dump_util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,9 +31,13 @@ #include +namespace ue2 { + /** * Same as fopen(), but on error throws an exception rather than returning NULL. */ FILE *fopen_or_throw(const char *path, const char *mode); +} // namespace ue2 + #endif From 803f61d818d135441dd8a0363e5bb614febf8492 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 29 Mar 2017 13:04:27 +1100 Subject: [PATCH 187/326] ICC doesn't have a __POPCNT__ macro --- src/util/popcount.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/util/popcount.h b/src/util/popcount.h index d882a672..15361380 100644 --- a/src/util/popcount.h +++ b/src/util/popcount.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,8 +30,8 @@ * \brief Platform specific popcount functions */ -#ifndef POPCOUNT_H_075D843B4545B6 -#define POPCOUNT_H_075D843B4545B6 +#ifndef UTIL_POPCOUNT_H_ +#define UTIL_POPCOUNT_H_ #include "ue2common.h" @@ -40,6 +40,8 @@ #define HAVE_POPCOUNT_INSTR #elif defined(_WIN32) && defined(__AVX__) // TODO: fix win preproc #define HAVE_POPCOUNT_INSTR +#elif defined(__INTEL_COMPILER) && defined(__SSE4_2__) +#define HAVE_POPCOUNT_INSTR #endif static really_inline @@ -76,5 +78,5 @@ u32 popcount64(u64a x) { #endif } -#endif /* POPCOUNT_H_075D843B4545B6 */ +#endif /* UTIL_POPCOUNT_H_ */ From 18f843bcc16501f3d9586f59baaced78b8bd86ad Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 29 Mar 2017 13:06:26 +1100 Subject: [PATCH 188/326] rose: add CLEAR_WORK_DONE instruction Preparatory work for allowing fragments to be shared between literals that squash groups and those that don't. --- src/rose/program_runtime.h | 6 ++++++ src/rose/rose_build_bytecode.cpp | 17 +++++++++++++++++ src/rose/rose_build_dump.cpp | 3 +++ src/rose/rose_build_program.cpp | 1 + src/rose/rose_build_program.h | 8 ++++++++ src/rose/rose_program.h | 11 ++++++++++- 6 files changed, 45 insertions(+), 1 deletion(-) diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index d67c307f..30ff8527 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -2166,6 +2166,12 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, } } PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CLEAR_WORK_DONE) { + DEBUG_PRINTF("clear work_done flag\n"); + work_done = 0; + } + PROGRAM_NEXT_INSTRUCTION } } diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 32a1d075..f51e0449 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4517,6 +4517,18 @@ u32 writeLiteralProgram(const RoseBuildImpl &build, build_context &bc, bool is_anchored_program) { assert(!lit_ids.empty()); + // If we have multiple literals and any of them squash groups, we will have + // to add a CLEAR_WORK_DONE instruction to each literal program block to + // clear the work_done flags so that it's only set if a state has been + // switched on for that literal. + + // Note that we add it to every lit program, as they may be + // reordered/uniquified by assembleProgramBlocks() above. + const bool needs_clear_work = lit_ids.size() > 1 && + any_of_in(lit_ids, [&](u32 lit_id) { + return build.literal_info.at(lit_id).squash_group; + }); + vector blocks; const vector no_edges; @@ -4531,6 +4543,11 @@ u32 writeLiteralProgram(const RoseBuildImpl &build, build_context &bc, } auto prog = buildLiteralProgram(build, bc, prog_build, lit_id, *edges_ptr, is_anchored_program); + if (needs_clear_work) { + RoseProgram clear_block; + clear_block.add_before_end(make_unique()); + prog.add_block(move(clear_block)); + } blocks.push_back(move(prog)); } diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 0d05e8ac..0e53d59d 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -1100,6 +1100,9 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CLEAR_WORK_DONE) {} + PROGRAM_NEXT_INSTRUCTION + default: os << " UNKNOWN (code " << int{code} << ")" << endl; os << " " << endl; diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index 112b93f9..1c0fd2ab 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -48,6 +48,7 @@ RoseInstrSomZero::~RoseInstrSomZero() = default; RoseInstrSuffixesEod::~RoseInstrSuffixesEod() = default; RoseInstrMatcherEod::~RoseInstrMatcherEod() = default; RoseInstrEnd::~RoseInstrEnd() = default; +RoseInstrClearWorkDone::~RoseInstrClearWorkDone() = default; using OffsetMap = RoseInstruction::OffsetMap; diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index fd966a8d..a63f03c8 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -1851,6 +1851,14 @@ public: } }; +class RoseInstrClearWorkDone + : public RoseInstrBaseTrivial { +public: + ~RoseInstrClearWorkDone() override; +}; + class RoseInstrEnd : public RoseInstrBaseTrivial { diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 652b9109..cf1a9eb6 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -141,7 +141,12 @@ enum RoseInstructionCode { */ ROSE_INSTR_CHECK_MED_LIT_NOCASE, - LAST_ROSE_INSTRUCTION = ROSE_INSTR_CHECK_MED_LIT_NOCASE //!< Sentinel. + /** + * \brief Clear the "work done" flag used by the SQUASH_GROUPS instruction. + */ + ROSE_INSTR_CLEAR_WORK_DONE, + + LAST_ROSE_INSTRUCTION = ROSE_INSTR_CLEAR_WORK_DONE //!< Sentinel. }; struct ROSE_STRUCT_END { @@ -517,4 +522,8 @@ struct ROSE_STRUCT_CHECK_MED_LIT_NOCASE { u32 fail_jump; //!< Jump forward this many bytes on failure. }; +struct ROSE_STRUCT_CLEAR_WORK_DONE { + u8 code; //!< From enum RoseInstructionCode. +}; + #endif // ROSE_ROSE_PROGRAM_H From 5fe524fbb3510e5c05c8b8c67ffce546a746d011 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 29 Mar 2017 13:10:07 +1100 Subject: [PATCH 189/326] rose: allow lits that squash to share fragments --- src/rose/rose_build_bytecode.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index f51e0449..ea898408 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4699,14 +4699,6 @@ void groupByFragment(RoseBuildImpl &build) { continue; } - // Combining fragments that squash their groups is unsafe. - if (info.squash_group) { - lit_to_frag.emplace(lit_id, frag_id); - fragments.emplace_back(frag_id, groups); - frag_id++; - continue; - } - DEBUG_PRINTF("fragment candidate: lit_id=%u %s\n", lit_id, dumpString(lit.s).c_str()); auto &fi = frag_info[getFragment(lit)]; From 82011831387b5eac995f9945ee7a1125509476e8 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 29 Mar 2017 16:39:16 +1100 Subject: [PATCH 190/326] Check compiler architecture flags in one place --- src/crc32.c | 9 +++-- src/database.h | 5 ++- src/fdr/fdr.c | 5 ++- src/fdr/teddy.h | 5 ++- src/fdr/teddy_avx2.c | 5 ++- src/hwlm/noodle_engine.c | 5 ++- src/nfa/limex_accel.c | 7 ++-- src/nfa/limex_shuffle.h | 5 ++- src/nfa/mcsheng.c | 7 ++-- src/nfa/multishufti.c | 5 ++- src/nfa/multitruffle.c | 5 ++- src/nfa/multivermicelli.c | 5 ++- src/nfa/shufti.c | 5 ++- src/nfa/shufti_common.h | 5 ++- src/nfa/truffle.c | 5 ++- src/nfa/truffle_common.h | 5 ++- src/util/arch.h | 75 +++++++++++++++++++++++++++++++++++ src/util/bitutils.h | 20 ++++------ src/util/cpuid_flags.c | 5 ++- src/util/masked_move.c | 3 +- src/util/masked_move.h | 6 ++- src/util/math.h | 4 +- src/util/popcount.h | 10 +---- src/util/simd_types.h | 7 ++-- src/util/simd_utils.h | 58 ++++++++++++++------------- src/util/state_compress.c | 11 ++--- unit/internal/bitutils.cpp | 5 ++- unit/internal/database.cpp | 5 ++- unit/internal/masked_move.cpp | 5 ++- unit/internal/shuffle.cpp | 5 ++- unit/internal/simd_utils.cpp | 5 ++- 31 files changed, 203 insertions(+), 109 deletions(-) create mode 100644 src/util/arch.h diff --git a/src/crc32.c b/src/crc32.c index b85acc7f..9a9b6f26 100644 --- a/src/crc32.c +++ b/src/crc32.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,6 +29,7 @@ #include "crc32.h" #include "config.h" #include "ue2common.h" +#include "util/arch.h" #if defined(HAVE_C_X86INTRIN_H) #include @@ -36,7 +37,7 @@ #include #endif -#ifndef __SSE4_2__ +#if !defined(HAVE_SSE42) /*** *** What follows is derived from Intel's Slicing-by-8 CRC32 impl, which is BSD @@ -582,7 +583,7 @@ u32 crc32c_sb8_64_bit(u32 running_crc, const unsigned char* p_buf, return crc; } -#else // __SSE4_2__ +#else // HAVE_SSE42 #ifdef ARCH_64_BIT #define CRC_WORD 8 @@ -638,7 +639,7 @@ u32 crc32c_sse42(u32 running_crc, const unsigned char* p_buf, // Externally visible function u32 Crc32c_ComputeBuf(u32 inCrc32, const void *buf, size_t bufLen) { -#ifdef __SSE4_2__ +#if defined(HAVE_SSE42) u32 crc = crc32c_sse42(inCrc32, (const unsigned char *)buf, bufLen); #else u32 crc = crc32c_sb8_64_bit(inCrc32, (const unsigned char *)buf, bufLen); diff --git a/src/database.h b/src/database.h index 399513fc..9b24abd4 100644 --- a/src/database.h +++ b/src/database.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -41,6 +41,7 @@ extern "C" #include "hs_compile.h" // for HS_MODE_ flags #include "hs_version.h" #include "ue2common.h" +#include "util/arch.h" #define HS_DB_VERSION HS_VERSION_32BIT #define HS_DB_MAGIC (0xdbdbdbdbU) @@ -59,7 +60,7 @@ typedef u64a platform_t; static UNUSED const platform_t hs_current_platform = { -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) HS_PLATFORM_NOAVX2 | #endif 0, diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c index d5d40c38..74e6c577 100644 --- a/src/fdr/fdr.c +++ b/src/fdr/fdr.c @@ -34,6 +34,7 @@ #include "flood_runtime.h" #include "teddy.h" #include "teddy_internal.h" +#include "util/arch.h" #include "util/simd_utils.h" #include "util/uniform_ops.h" @@ -123,7 +124,7 @@ const ALIGN_CL_DIRECTIVE u8 zone_or_mask[ITER_BYTES+1][ITER_BYTES] = { static really_inline u64a andn(const u32 a, const u8 *b) { u64a r; -#if defined(__BMI__) +#if defined(HAVE_BMI) __asm__ ("andn\t%2,%1,%k0" : "=r"(r) : "r"(a), "m"(*(const u32 *)b)); #else r = unaligned_load_u32(b) & ~a; @@ -783,7 +784,7 @@ hwlm_error_t fdr_engine_exec(const struct FDR *fdr, return HWLM_SUCCESS; } -#if defined(__AVX2__) +#if defined(HAVE_AVX2) #define ONLY_AVX2(func) func #else #define ONLY_AVX2(func) NULL diff --git a/src/fdr/teddy.h b/src/fdr/teddy.h index 78cba847..35756c53 100644 --- a/src/fdr/teddy.h +++ b/src/fdr/teddy.h @@ -34,6 +34,7 @@ #define TEDDY_H_ #include "hwlm/hwlm.h" // for hwlm_group_t +#include "util/arch.h" struct FDR; // forward declaration from fdr_internal.h struct FDR_Runtime_Args; @@ -70,7 +71,7 @@ hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr, const struct FDR_Runtime_Args *a, hwlm_group_t control); -#if defined(__AVX2__) +#if defined(HAVE_AVX2) hwlm_error_t fdr_exec_teddy_avx2_msks1_fat(const struct FDR *fdr, const struct FDR_Runtime_Args *a, @@ -104,6 +105,6 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr, const struct FDR_Runtime_Args *a, hwlm_group_t control); -#endif /* __AVX2__ */ +#endif /* HAVE_AVX2 */ #endif /* TEDDY_H_ */ diff --git a/src/fdr/teddy_avx2.c b/src/fdr/teddy_avx2.c index 22b74408..ebc1362d 100644 --- a/src/fdr/teddy_avx2.c +++ b/src/fdr/teddy_avx2.c @@ -35,9 +35,10 @@ #include "teddy.h" #include "teddy_internal.h" #include "teddy_runtime_common.h" +#include "util/arch.h" #include "util/simd_utils.h" -#if defined(__AVX2__) +#if defined(HAVE_AVX2) #ifdef ARCH_64_BIT #define CONFIRM_FAT_TEDDY(var, bucket, offset, reason, conf_fn) \ @@ -687,4 +688,4 @@ hwlm_error_t fdr_exec_teddy_avx2_msks4_pck_fat(const struct FDR *fdr, return HWLM_SUCCESS; } -#endif // __AVX2__ +#endif // HAVE_AVX2 diff --git a/src/hwlm/noodle_engine.c b/src/hwlm/noodle_engine.c index 1d1ab4e6..a30a59a5 100644 --- a/src/hwlm/noodle_engine.c +++ b/src/hwlm/noodle_engine.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,6 +33,7 @@ #include "noodle_engine.h" #include "noodle_internal.h" #include "ue2common.h" +#include "util/arch.h" #include "util/bitutils.h" #include "util/compare.h" #include "util/masked_move.h" @@ -109,7 +110,7 @@ hwlm_error_t final(const u8 *buf, size_t len, const u8 *key, size_t keyLen, return HWLM_SUCCESS; } -#if defined(__AVX2__) +#if defined(HAVE_AVX2) #define CHUNKSIZE 32 #define MASK_TYPE m256 #include "noodle_engine_avx2.c" diff --git a/src/nfa/limex_accel.c b/src/nfa/limex_accel.c index c74c7079..a96dea43 100644 --- a/src/nfa/limex_accel.c +++ b/src/nfa/limex_accel.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -44,6 +44,7 @@ #include "multivermicelli.h" #include "ue2common.h" #include "vermicelli.h" +#include "util/arch.h" #include "util/bitutils.h" #include "util/simd_utils.h" @@ -118,7 +119,7 @@ size_t doAccel256(const m256 *state, const struct LimExNFA256 *limex, DEBUG_PRINTF("using PSHUFB for 256-bit shuffle\n"); m256 accelPerm = limex->accelPermute; m256 accelComp = limex->accelCompare; -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) u32 idx1 = packedExtract128(s.lo, accelPerm.lo, accelComp.lo); u32 idx2 = packedExtract128(s.hi, accelPerm.hi, accelComp.hi); assert((idx1 & idx2) == 0); // should be no shared bits @@ -153,7 +154,7 @@ size_t doAccel512(const m512 *state, const struct LimExNFA512 *limex, DEBUG_PRINTF("using PSHUFB for 512-bit shuffle\n"); m512 accelPerm = limex->accelPermute; m512 accelComp = limex->accelCompare; -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) u32 idx1 = packedExtract128(s.lo.lo, accelPerm.lo.lo, accelComp.lo.lo); u32 idx2 = packedExtract128(s.lo.hi, accelPerm.lo.hi, accelComp.lo.hi); u32 idx3 = packedExtract128(s.hi.lo, accelPerm.hi.lo, accelComp.hi.lo); diff --git a/src/nfa/limex_shuffle.h b/src/nfa/limex_shuffle.h index 5ca8fce0..5d9b3ef8 100644 --- a/src/nfa/limex_shuffle.h +++ b/src/nfa/limex_shuffle.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,6 +38,7 @@ #define LIMEX_SHUFFLE_H #include "ue2common.h" +#include "util/arch.h" #include "util/bitutils.h" #include "util/simd_utils.h" @@ -49,7 +50,7 @@ u32 packedExtract128(m128 s, const m128 permute, const m128 compare) { return (u32)rv; } -#if defined(__AVX2__) +#if defined(HAVE_AVX2) static really_inline u32 packedExtract256(m256 s, const m256 permute, const m256 compare) { // vpshufb doesn't cross lanes, so this is a bit of a cheat diff --git a/src/nfa/mcsheng.c b/src/nfa/mcsheng.c index 98db3f0a..322cde0a 100644 --- a/src/nfa/mcsheng.c +++ b/src/nfa/mcsheng.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,6 +33,7 @@ #include "nfa_api.h" #include "nfa_api_queue.h" #include "nfa_internal.h" +#include "util/arch.h" #include "util/bitutils.h" #include "util/compare.h" #include "util/simd_utils.h" @@ -168,7 +169,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end, * extract a single copy of the state from the u32 for checking. */ u32 sheng_stop_limit_x4 = sheng_stop_limit * 0x01010101; -#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) +#if defined(HAVE_BMI2) && defined(ARCH_64_BIT) u32 sheng_limit_x4 = sheng_limit * 0x01010101; m128 simd_stop_limit = set4x32(sheng_stop_limit_x4); m128 accel_delta = set16x8(sheng_limit - sheng_stop_limit); @@ -189,7 +190,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end, u8 s_gpr; while (c < c_end) { -#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) +#if defined(HAVE_BMI2) && defined(ARCH_64_BIT) /* This version uses pext for efficently bitbashing out scaled * versions of the bytes to process from a u64a */ diff --git a/src/nfa/multishufti.c b/src/nfa/multishufti.c index cb85b718..80a2bcd0 100644 --- a/src/nfa/multishufti.c +++ b/src/nfa/multishufti.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,12 +34,13 @@ #include "config.h" #include "ue2common.h" +#include "util/arch.h" #include "multishufti.h" #include "multiaccel_common.h" -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) #define MATCH_ALGO long_ #include "multiaccel_long.h" diff --git a/src/nfa/multitruffle.c b/src/nfa/multitruffle.c index 381bda93..c333414c 100644 --- a/src/nfa/multitruffle.c +++ b/src/nfa/multitruffle.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -28,6 +28,7 @@ #include "config.h" #include "ue2common.h" +#include "util/arch.h" #include "multitruffle.h" #include "util/bitutils.h" @@ -35,7 +36,7 @@ #include "multiaccel_common.h" -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) #define MATCH_ALGO long_ #include "multiaccel_long.h" diff --git a/src/nfa/multivermicelli.c b/src/nfa/multivermicelli.c index ab6d2cf2..fe6cbdb5 100644 --- a/src/nfa/multivermicelli.c +++ b/src/nfa/multivermicelli.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -28,12 +28,13 @@ #include "config.h" #include "ue2common.h" +#include "util/arch.h" #include "multivermicelli.h" #include "multiaccel_common.h" -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) #define MATCH_ALGO long_ #include "multiaccel_long.h" diff --git a/src/nfa/shufti.c b/src/nfa/shufti.c index d68b1b04..f7b4403e 100644 --- a/src/nfa/shufti.c +++ b/src/nfa/shufti.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,6 +34,7 @@ #include "shufti.h" #include "ue2common.h" +#include "util/arch.h" #include "util/bitutils.h" #include "util/simd_utils.h" #include "util/unaligned.h" @@ -55,7 +56,7 @@ const u8 *shuftiRevSlow(const u8 *lo, const u8 *hi, const u8 *buf, return buf_end; } -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) /* Normal SSSE3 shufti */ static really_inline diff --git a/src/nfa/shufti_common.h b/src/nfa/shufti_common.h index e63ad27a..7048a8b1 100644 --- a/src/nfa/shufti_common.h +++ b/src/nfa/shufti_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,6 +31,7 @@ #include "ue2common.h" +#include "util/arch.h" #include "util/bitutils.h" #include "util/simd_utils.h" #include "util/unaligned.h" @@ -86,7 +87,7 @@ void dumpMsk##_t##AsChars(m##_t msk) { \ #endif -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) #ifdef DEBUG DUMP_MSK(128) diff --git a/src/nfa/truffle.c b/src/nfa/truffle.c index 1eff269a..6d82f8e1 100644 --- a/src/nfa/truffle.c +++ b/src/nfa/truffle.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,12 +33,13 @@ #include "ue2common.h" #include "truffle.h" +#include "util/arch.h" #include "util/bitutils.h" #include "util/simd_utils.h" #include "truffle_common.h" -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) static really_inline const u8 *lastMatch(const u8 *buf, u32 z) { diff --git a/src/nfa/truffle_common.h b/src/nfa/truffle_common.h index 7368e550..dc9c726c 100644 --- a/src/nfa/truffle_common.h +++ b/src/nfa/truffle_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,13 +29,14 @@ #ifndef TRUFFLE_COMMON_H_ #define TRUFFLE_COMMON_H_ +#include "util/arch.h" #include "util/bitutils.h" #include "util/simd_utils.h" /* * Common stuff for all versions of truffle (single, multi and multidouble) */ -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) static really_inline const u8 *firstMatch(const u8 *buf, u32 z) { diff --git a/src/util/arch.h b/src/util/arch.h new file mode 100644 index 00000000..8584ee65 --- /dev/null +++ b/src/util/arch.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Per-platform architecture definitions + */ + +#ifndef UTIL_ARCH_H_ +#define UTIL_ARCH_H_ + +#if defined(__SSE2__) || defined(_M_X64) || (_M_IX86_FP >= 2) +#define HAVE_SSE2 +#endif + +#if defined(__SSE4_1__) || (defined(_WIN32) && defined(__AVX__)) +#define HAVE_SSE41 +#endif + +#if defined(__SSE4_2__) || (defined(_WIN32) && defined(__AVX__)) +#define HAVE_SSE42 +#endif + +#if defined(__AVX__) +#define HAVE_AVX +#endif + +#if defined(__AVX2__) +#define HAVE_AVX2 +#endif + +/* + * ICC and MSVC don't break out POPCNT or BMI/2 as separate pre-def macros + */ +#if defined(__POPCNT__) || \ + (defined(__INTEL_COMPILER) && defined(__SSE4_2__)) || \ + (defined(_WIN32) && defined(__AVX__)) +#define HAVE_POPCOUNT_INSTR +#endif + +#if defined(__BMI__) || (defined(_WIN32) && defined(__AVX2__)) || \ + (defined(__INTEL_COMPILER) && defined(__AVX2__)) +#define HAVE_BMI +#endif + +#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__)) || \ + (defined(__INTEL_COMPILER) && defined(__AVX2__)) +#define HAVE_BMI2 +#endif + +#endif // UTIL_ARCH_H_ diff --git a/src/util/bitutils.h b/src/util/bitutils.h index f9e8d151..66a07571 100644 --- a/src/util/bitutils.h +++ b/src/util/bitutils.h @@ -35,6 +35,7 @@ #include "ue2common.h" #include "popcount.h" +#include "util/arch.h" #ifdef __cplusplus # if defined(HAVE_CXX_X86INTRIN_H) @@ -269,7 +270,7 @@ u32 findAndClearMSB_64(u64a *v) { static really_inline u32 compress32(u32 x, u32 m) { -#if defined(__BMI2__) +#if defined(HAVE_BMI2) // BMI2 has a single instruction for this operation. return _pext_u32(x, m); #else @@ -304,7 +305,7 @@ u32 compress32(u32 x, u32 m) { static really_inline u64a compress64(u64a x, u64a m) { -#if defined(ARCH_X86_64) && defined(__BMI2__) +#if defined(ARCH_X86_64) && defined(HAVE_BMI2) // BMI2 has a single instruction for this operation. return _pext_u64(x, m); #else @@ -340,7 +341,7 @@ u64a compress64(u64a x, u64a m) { static really_inline u32 expand32(u32 x, u32 m) { -#if defined(__BMI2__) +#if defined(HAVE_BMI2) // BMI2 has a single instruction for this operation. return _pdep_u32(x, m); #else @@ -380,7 +381,7 @@ u32 expand32(u32 x, u32 m) { static really_inline u64a expand64(u64a x, u64a m) { -#if defined(ARCH_X86_64) && defined(__BMI2__) +#if defined(ARCH_X86_64) && defined(HAVE_BMI2) // BMI2 has a single instruction for this operation. return _pdep_u64(x, m); #else @@ -471,14 +472,9 @@ u32 rank_in_mask64(u64a mask, u32 bit) { return popcount64(mask); } -#if defined(__BMI2__) || (defined(_WIN32) && defined(__AVX2__)) || \ - (defined(__INTEL_COMPILER) && defined(__AVX2__)) -#define HAVE_PEXT -#endif - static really_inline u32 pext32(u32 x, u32 mask) { -#if defined(HAVE_PEXT) +#if defined(HAVE_BMI2) // Intel BMI2 can do this operation in one instruction. return _pext_u32(x, mask); #else @@ -498,7 +494,7 @@ u32 pext32(u32 x, u32 mask) { static really_inline u64a pext64(u64a x, u64a mask) { -#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) +#if defined(HAVE_BMI2) && defined(ARCH_64_BIT) // Intel BMI2 can do this operation in one instruction. return _pext_u64(x, mask); #else @@ -516,7 +512,7 @@ u64a pext64(u64a x, u64a mask) { #endif } -#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) +#if defined(HAVE_BMI2) && defined(ARCH_64_BIT) static really_inline u64a pdep64(u64a x, u64a mask) { return _pdep_u64(x, mask); diff --git a/src/util/cpuid_flags.c b/src/util/cpuid_flags.c index dba147ee..8ac0e63c 100644 --- a/src/util/cpuid_flags.c +++ b/src/util/cpuid_flags.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,6 +30,7 @@ #include "ue2common.h" #include "hs_compile.h" // for HS_MODE_ flags #include "hs_internal.h" +#include "util/arch.h" #ifndef _WIN32 #include @@ -131,7 +132,7 @@ u64a cpuid_flags(void) { cap |= HS_CPU_FEATURES_AVX2; } -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) cap &= ~HS_CPU_FEATURES_AVX2; #endif diff --git a/src/util/masked_move.c b/src/util/masked_move.c index ec788db7..001cd49f 100644 --- a/src/util/masked_move.c +++ b/src/util/masked_move.c @@ -29,8 +29,9 @@ #include "ue2common.h" #include "masked_move.h" +#include "util/arch.h" -#if defined(__AVX2__) +#if defined(HAVE_AVX2) /* masks for masked moves */ /* magic mask for maskload (vmmaskmovq) - described in UE-2424 */ diff --git a/src/util/masked_move.h b/src/util/masked_move.h index 09276e80..b51ff632 100644 --- a/src/util/masked_move.h +++ b/src/util/masked_move.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,7 +29,9 @@ #ifndef MASKED_MOVE_H #define MASKED_MOVE_H -#if defined(__AVX2__) +#include "arch.h" + +#if defined(HAVE_AVX2) #include "unaligned.h" #include "simd_utils.h" diff --git a/src/util/math.h b/src/util/math.h index 80ad4927..3fd69dba 100644 --- a/src/util/math.h +++ b/src/util/math.h @@ -29,6 +29,8 @@ #ifndef UTIL_MATH_H_ #define UTIL_MATH_H_ +#include "arch.h" + #include #ifdef __cplusplus @@ -59,7 +61,7 @@ static really_inline double our_pow(double x, double y) { -#if defined(__AVX__) +#if defined(HAVE_AVX) /* * Clear the upper half of AVX registers before calling into the math lib. * On some versions of glibc this can save thousands of AVX-to-SSE diff --git a/src/util/popcount.h b/src/util/popcount.h index 15361380..eb08f6b1 100644 --- a/src/util/popcount.h +++ b/src/util/popcount.h @@ -34,15 +34,7 @@ #define UTIL_POPCOUNT_H_ #include "ue2common.h" - -// We have a native popcount where the compiler has defined __POPCNT__. -#if defined(__POPCNT__) -#define HAVE_POPCOUNT_INSTR -#elif defined(_WIN32) && defined(__AVX__) // TODO: fix win preproc -#define HAVE_POPCOUNT_INSTR -#elif defined(__INTEL_COMPILER) && defined(__SSE4_2__) -#define HAVE_POPCOUNT_INSTR -#endif +#include "util/arch.h" static really_inline u32 popcount32(u32 x) { diff --git a/src/util/simd_types.h b/src/util/simd_types.h index d6e5d6a3..ec86d4a9 100644 --- a/src/util/simd_types.h +++ b/src/util/simd_types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,6 +30,7 @@ #define SIMD_TYPES_H #include "config.h" +#include "util/arch.h" #include "ue2common.h" // more recent headers are bestest, but only if we can use them @@ -61,13 +62,13 @@ #error no intrinsics! #endif -#if defined(__SSE2__) || defined(_M_X64) || (_M_IX86_FP >= 2) +#if defined(HAVE_SSE2) typedef __m128i m128; #else typedef struct ALIGN_DIRECTIVE {u64a hi; u64a lo;} m128; #endif -#if defined(__AVX2__) +#if defined(HAVE_AVX2) typedef __m256i m256; #else typedef ALIGN_AVX_DIRECTIVE struct {m128 lo; m128 hi;} m256; diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index 484b47c0..ccbcabbb 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -38,6 +38,8 @@ #endif #include "config.h" +#include "util/arch.h" + #include // for memcpy // more recent headers are bestest, but only if we can use them @@ -141,7 +143,7 @@ static really_inline u32 diffrich128(m128 a, m128 b) { * returns a 4-bit mask indicating which 64-bit words contain differences. */ static really_inline u32 diffrich64_128(m128 a, m128 b) { -#if defined(__SSE_41__) +#if defined(HAVE_SSE41) a = _mm_cmpeq_epi64(a, b); return ~(_mm_movemask_ps(_mm_castsi128_ps(a))) & 0x5; #else @@ -186,11 +188,11 @@ m128 load_m128_from_u64a(const u64a *p) { #define rshiftbyte_m128(a, count_immed) _mm_srli_si128(a, count_immed) #define lshiftbyte_m128(a, count_immed) _mm_slli_si128(a, count_immed) -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) // TODO: this entire file needs restructuring - this carveout is awful #define extractlow64from256(a) movq(a.lo) #define extractlow32from256(a) movd(a.lo) -#if defined(__SSE4_1__) +#if defined(HAVE_SSE41) #define extract32from256(a, imm) _mm_extract_epi32((imm >> 2) ? a.hi : a.lo, imm % 4) #define extract64from256(a, imm) _mm_extract_epi64((imm >> 2) ? a.hi : a.lo, imm % 2) #else @@ -288,7 +290,7 @@ void clearbit128(m128 *ptr, unsigned int n) { static really_inline char testbit128(m128 val, unsigned int n) { const m128 mask = mask1bit128(n); -#if defined(__SSE4_1__) +#if defined(HAVE_SSE41) return !_mm_testz_si128(mask, val); #else return isnonzero128(and128(mask, val)); @@ -307,7 +309,7 @@ m128 pshufb(m128 a, m128 b) { static really_inline m256 vpshufb(m256 a, m256 b) { -#if defined(__AVX2__) +#if defined(HAVE_AVX2) return _mm256_shuffle_epi8(a, b); #else m256 rv; @@ -348,7 +350,7 @@ m128 sub_u8_m128(m128 a, m128 b) { **** 256-bit Primitives ****/ -#if defined(__AVX2__) +#if defined(HAVE_AVX2) #define lshift64_m256(a, b) _mm256_slli_epi64((a), (b)) #define rshift64_m256(a, b) _mm256_srli_epi64((a), (b)) @@ -413,7 +415,7 @@ m256 set2x128(m128 a) { #endif static really_inline m256 zeroes256(void) { -#if defined(__AVX2__) +#if defined(HAVE_AVX2) return _mm256_setzero_si256(); #else m256 rv = {zeroes128(), zeroes128()}; @@ -422,7 +424,7 @@ static really_inline m256 zeroes256(void) { } static really_inline m256 ones256(void) { -#if defined(__AVX2__) +#if defined(HAVE_AVX2) m256 rv = _mm256_set1_epi8(0xFF); #else m256 rv = {ones128(), ones128()}; @@ -430,7 +432,7 @@ static really_inline m256 ones256(void) { return rv; } -#if defined(__AVX2__) +#if defined(HAVE_AVX2) static really_inline m256 and256(m256 a, m256 b) { return _mm256_and_si256(a, b); } @@ -443,7 +445,7 @@ static really_inline m256 and256(m256 a, m256 b) { } #endif -#if defined(__AVX2__) +#if defined(HAVE_AVX2) static really_inline m256 or256(m256 a, m256 b) { return _mm256_or_si256(a, b); } @@ -456,7 +458,7 @@ static really_inline m256 or256(m256 a, m256 b) { } #endif -#if defined(__AVX2__) +#if defined(HAVE_AVX2) static really_inline m256 xor256(m256 a, m256 b) { return _mm256_xor_si256(a, b); } @@ -469,7 +471,7 @@ static really_inline m256 xor256(m256 a, m256 b) { } #endif -#if defined(__AVX2__) +#if defined(HAVE_AVX2) static really_inline m256 not256(m256 a) { return _mm256_xor_si256(a, ones256()); } @@ -482,7 +484,7 @@ static really_inline m256 not256(m256 a) { } #endif -#if defined(__AVX2__) +#if defined(HAVE_AVX2) static really_inline m256 andnot256(m256 a, m256 b) { return _mm256_andnot_si256(a, b); } @@ -496,7 +498,7 @@ static really_inline m256 andnot256(m256 a, m256 b) { #endif static really_inline int diff256(m256 a, m256 b) { -#if defined(__AVX2__) +#if defined(HAVE_AVX2) return !!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(a, b)) ^ (int)-1); #else return diff128(a.lo, b.lo) || diff128(a.hi, b.hi); @@ -504,7 +506,7 @@ static really_inline int diff256(m256 a, m256 b) { } static really_inline int isnonzero256(m256 a) { -#if defined(__AVX2__) +#if defined(HAVE_AVX2) return !!diff256(a, zeroes256()); #else return isnonzero128(or128(a.lo, a.hi)); @@ -516,7 +518,7 @@ static really_inline int isnonzero256(m256 a) { * mask indicating which 32-bit words contain differences. */ static really_inline u32 diffrich256(m256 a, m256 b) { -#if defined(__AVX2__) +#if defined(HAVE_AVX2) a = _mm256_cmpeq_epi32(a, b); return ~(_mm256_movemask_ps(_mm256_castsi256_ps(a))) & 0xFF; #else @@ -540,7 +542,7 @@ static really_inline u32 diffrich64_256(m256 a, m256 b) { // aligned load static really_inline m256 load256(const void *ptr) { assert(ISALIGNED_N(ptr, alignof(m256))); -#if defined(__AVX2__) +#if defined(HAVE_AVX2) return _mm256_load_si256((const m256 *)ptr); #else m256 rv = { load128(ptr), load128((const char *)ptr + 16) }; @@ -550,7 +552,7 @@ static really_inline m256 load256(const void *ptr) { // aligned load of 128-bit value to low and high part of 256-bit value static really_inline m256 load2x128(const void *ptr) { -#if defined(__AVX2__) +#if defined(HAVE_AVX2) return set2x128(load128(ptr)); #else assert(ISALIGNED_N(ptr, alignof(m128))); @@ -567,7 +569,7 @@ static really_inline m256 loadu2x128(const void *ptr) { // aligned store static really_inline void store256(void *ptr, m256 a) { assert(ISALIGNED_N(ptr, alignof(m256))); -#if defined(__AVX2__) +#if defined(HAVE_AVX2) _mm256_store_si256((m256 *)ptr, a); #else ptr = assume_aligned(ptr, 16); @@ -577,7 +579,7 @@ static really_inline void store256(void *ptr, m256 a) { // unaligned load static really_inline m256 loadu256(const void *ptr) { -#if defined(__AVX2__) +#if defined(HAVE_AVX2) return _mm256_loadu_si256((const m256 *)ptr); #else m256 rv = { loadu128(ptr), loadu128((const char *)ptr + 16) }; @@ -587,7 +589,7 @@ static really_inline m256 loadu256(const void *ptr) { // unaligned store static really_inline void storeu256(void *ptr, m256 a) { -#if defined(__AVX2__) +#if defined(HAVE_AVX2) _mm256_storeu_si256((m256 *)ptr, a); #else storeu128(ptr, a.lo); @@ -619,7 +621,7 @@ m256 mask1bit256(unsigned int n) { return loadu256(&simd_onebit_masks[mask_idx]); } -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) // switches on bit N in the given vector. static really_inline void setbit256(m256 *ptr, unsigned int n) { @@ -971,7 +973,7 @@ static really_inline int diff512(m512 a, m512 b) { } static really_inline int isnonzero512(m512 a) { -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) m128 x = or128(a.lo.lo, a.lo.hi); m128 y = or128(a.hi.lo, a.hi.hi); return isnonzero128(or128(x, y)); @@ -986,7 +988,7 @@ static really_inline int isnonzero512(m512 a) { * mask indicating which 32-bit words contain differences. */ static really_inline u32 diffrich512(m512 a, m512 b) { -#if defined(__AVX2__) +#if defined(HAVE_AVX2) return diffrich256(a.lo, b.lo) | (diffrich256(a.hi, b.hi) << 8); #else a.lo.lo = _mm_cmpeq_epi32(a.lo.lo, b.lo.lo); @@ -1018,7 +1020,7 @@ static really_inline m512 load512(const void *ptr) { // aligned store static really_inline void store512(void *ptr, m512 a) { assert(ISALIGNED_N(ptr, alignof(m256))); -#if defined(__AVX2__) +#if defined(HAVE_AVX2) m512 *x = (m512 *)ptr; store256(&x->lo, a.lo); store256(&x->hi, a.hi); @@ -1054,7 +1056,7 @@ m512 loadbytes512(const void *ptr, unsigned int n) { static really_inline void setbit512(m512 *ptr, unsigned int n) { assert(n < sizeof(*ptr) * 8); -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) m128 *sub; if (n < 128) { sub = &ptr->lo.lo; @@ -1082,7 +1084,7 @@ void setbit512(m512 *ptr, unsigned int n) { static really_inline void clearbit512(m512 *ptr, unsigned int n) { assert(n < sizeof(*ptr) * 8); -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) m128 *sub; if (n < 128) { sub = &ptr->lo.lo; @@ -1110,7 +1112,7 @@ void clearbit512(m512 *ptr, unsigned int n) { static really_inline char testbit512(m512 val, unsigned int n) { assert(n < sizeof(val) * 8); -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) m128 sub; if (n < 128) { sub = val.lo.lo; diff --git a/src/util/state_compress.c b/src/util/state_compress.c index 2a821dad..87e62429 100644 --- a/src/util/state_compress.c +++ b/src/util/state_compress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,6 +31,7 @@ */ #include "config.h" #include "ue2common.h" +#include "arch.h" #include "bitutils.h" #include "unaligned.h" #include "pack_bits.h" @@ -262,7 +263,7 @@ m256 loadcompressed256_32bit(const void *ptr, m256 mvec) { expand32(v[4], m[4]), expand32(v[5], m[5]), expand32(v[6], m[6]), expand32(v[7], m[7]) }; -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) m256 xvec = { .lo = _mm_set_epi32(x[3], x[2], x[1], x[0]), .hi = _mm_set_epi32(x[7], x[6], x[5], x[4]) }; #else @@ -289,7 +290,7 @@ m256 loadcompressed256_64bit(const void *ptr, m256 mvec) { u64a x[4] = { expand64(v[0], m[0]), expand64(v[1], m[1]), expand64(v[2], m[2]), expand64(v[3], m[3]) }; -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) m256 xvec = { .lo = _mm_set_epi64x(x[1], x[0]), .hi = _mm_set_epi64x(x[3], x[2]) }; #else @@ -546,7 +547,7 @@ m512 loadcompressed512_32bit(const void *ptr, m512 mvec) { expand32(v[14], m[14]), expand32(v[15], m[15]) }; m512 xvec; -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) xvec.lo.lo = _mm_set_epi32(x[3], x[2], x[1], x[0]); xvec.lo.hi = _mm_set_epi32(x[7], x[6], x[5], x[4]); xvec.hi.lo = _mm_set_epi32(x[11], x[10], x[9], x[8]); @@ -581,7 +582,7 @@ m512 loadcompressed512_64bit(const void *ptr, m512 mvec) { expand64(v[4], m[4]), expand64(v[5], m[5]), expand64(v[6], m[6]), expand64(v[7], m[7]) }; -#if !defined(__AVX2__) +#if !defined(HAVE_AVX2) m512 xvec = { .lo = { _mm_set_epi64x(x[1], x[0]), _mm_set_epi64x(x[3], x[2]) }, .hi = { _mm_set_epi64x(x[5], x[4]), diff --git a/unit/internal/bitutils.cpp b/unit/internal/bitutils.cpp index 31aaf17f..7241c0b8 100644 --- a/unit/internal/bitutils.cpp +++ b/unit/internal/bitutils.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,6 +29,7 @@ #include "config.h" #include "gtest/gtest.h" +#include "util/arch.h" #include "util/bitutils.h" #include "util/popcount.h" @@ -437,7 +438,7 @@ TEST(BitUtils, rank_in_mask64) { ASSERT_EQ(31, rank_in_mask64(0xf0f0f0f0f0f0f0f0ULL, 63)); } -#if defined(HAVE_PEXT) && defined(ARCH_64_BIT) +#if defined(HAVE_BMI2) && defined(ARCH_64_BIT) TEST(BitUtils, pdep64) { u64a data = 0xF123456789ABCDEF; ASSERT_EQ(0xfULL, pdep64(data, 0xf)); diff --git a/unit/internal/database.cpp b/unit/internal/database.cpp index cb3e76b5..fa34ead2 100644 --- a/unit/internal/database.cpp +++ b/unit/internal/database.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,6 +33,7 @@ #include "crc32.h" #include "database.h" #include "ue2common.h" +#include "util/arch.h" #include "util/target_info.h" #include "gtest/gtest.h" @@ -47,7 +48,7 @@ TEST(DB, flagsToPlatform) { p.cpu_features = 0; -#if defined(__AVX2__) +#if defined(HAVE_AVX2) p.cpu_features |= HS_CPU_FEATURES_AVX2; #endif diff --git a/unit/internal/masked_move.cpp b/unit/internal/masked_move.cpp index 6a2d742d..7bd78c50 100644 --- a/unit/internal/masked_move.cpp +++ b/unit/internal/masked_move.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,11 +31,12 @@ #include #include "gtest/gtest.h" +#include "util/arch.h" #include "util/masked_move.h" namespace { -#if defined(__AVX2__) +#if defined(HAVE_AVX2) bool try_mask_len(const u8 *buf, u8 *target, size_t len) { memset(target, 0, 32); diff --git a/unit/internal/shuffle.cpp b/unit/internal/shuffle.cpp index a4632c36..fcf337f2 100644 --- a/unit/internal/shuffle.cpp +++ b/unit/internal/shuffle.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,6 +30,7 @@ #include "gtest/gtest.h" +#include "util/arch.h" #include "util/simd_utils.h" #include "nfa/limex_shuffle.h" @@ -194,7 +195,7 @@ TEST(Shuffle, PackedExtract128_1) { } } -#if defined(__AVX2__) +#if defined(HAVE_AVX2) TEST(Shuffle, PackedExtract256_1) { // Try all possible one-bit masks for (unsigned int i = 0; i < 256; i++) { diff --git a/unit/internal/simd_utils.cpp b/unit/internal/simd_utils.cpp index 7b34d92e..31b72648 100644 --- a/unit/internal/simd_utils.cpp +++ b/unit/internal/simd_utils.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,6 +30,7 @@ #include "gtest/gtest.h" #include "util/alloc.h" +#include "util/arch.h" #include "util/make_unique.h" #include "util/simd_utils.h" @@ -620,7 +621,7 @@ TEST(SimdUtilsTest, set4x32) { ASSERT_EQ(0, memcmp(cmp, &simd, sizeof(simd))); } -#if defined(__AVX2__) +#if defined(HAVE_AVX2) TEST(SimdUtilsTest, set32x8) { char cmp[sizeof(m256)]; From cd418ea6a889a02316bcd912e9b765fcdc86fd89 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Thu, 30 Mar 2017 13:40:52 +1100 Subject: [PATCH 191/326] Wrapper for system intrin header --- src/crc32.c | 7 +---- src/fdr/fdr.c | 2 +- src/util/arch.h | 7 +++++ src/util/bitutils.h | 32 +-------------------- src/util/intrinsics.h | 66 +++++++++++++++++++++++++++++++++++++++++++ src/util/math.h | 27 +----------------- src/util/simd_types.h | 30 +------------------- src/util/simd_utils.h | 37 +++--------------------- 8 files changed, 82 insertions(+), 126 deletions(-) create mode 100644 src/util/intrinsics.h diff --git a/src/crc32.c b/src/crc32.c index 9a9b6f26..1dae47b4 100644 --- a/src/crc32.c +++ b/src/crc32.c @@ -30,12 +30,7 @@ #include "config.h" #include "ue2common.h" #include "util/arch.h" - -#if defined(HAVE_C_X86INTRIN_H) -#include -#elif defined(HAVE_C_INTRIN_H) -#include -#endif +#include "util/intrinsics.h" #if !defined(HAVE_SSE42) diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c index 74e6c577..92e75aaa 100644 --- a/src/fdr/fdr.c +++ b/src/fdr/fdr.c @@ -124,7 +124,7 @@ const ALIGN_CL_DIRECTIVE u8 zone_or_mask[ITER_BYTES+1][ITER_BYTES] = { static really_inline u64a andn(const u32 a, const u8 *b) { u64a r; -#if defined(HAVE_BMI) +#if defined(HAVE_BMI) && !defined(NO_ASM) __asm__ ("andn\t%2,%1,%k0" : "=r"(r) : "r"(a), "m"(*(const u32 *)b)); #else r = unaligned_load_u32(b) & ~a; diff --git a/src/util/arch.h b/src/util/arch.h index 8584ee65..2ed1793a 100644 --- a/src/util/arch.h +++ b/src/util/arch.h @@ -72,4 +72,11 @@ #define HAVE_BMI2 #endif +/* + * MSVC uses a different form of inline asm + */ +#if defined(_WIN32) && defined(_MSC_VER) +#define NO_ASM +#endif + #endif // UTIL_ARCH_H_ diff --git a/src/util/bitutils.h b/src/util/bitutils.h index 66a07571..c545ee18 100644 --- a/src/util/bitutils.h +++ b/src/util/bitutils.h @@ -36,37 +36,7 @@ #include "ue2common.h" #include "popcount.h" #include "util/arch.h" - -#ifdef __cplusplus -# if defined(HAVE_CXX_X86INTRIN_H) -# define USE_X86INTRIN_H -# endif -#else // C, baby -# if defined(HAVE_C_X86INTRIN_H) -# define USE_X86INTRIN_H -# endif -#endif - -#ifdef __cplusplus -# if defined(HAVE_CXX_INTRIN_H) -# define USE_INTRIN_H -# endif -#else // C, baby -# if defined(HAVE_C_INTRIN_H) -# define USE_INTRIN_H -# endif -#endif - -#if defined(USE_X86INTRIN_H) -#include -#elif defined(USE_INTRIN_H) -#include -#endif - -// MSVC has a different form of inline asm -#ifdef _WIN32 -#define NO_ASM -#endif +#include "util/intrinsics.h" #define CASE_BIT 0x20 #define CASE_CLEAR 0xdf diff --git a/src/util/intrinsics.h b/src/util/intrinsics.h new file mode 100644 index 00000000..edc4f6ef --- /dev/null +++ b/src/util/intrinsics.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Wrapper around the compiler supplied intrinsic header + */ + +#ifndef INTRINSICS_H +#define INTRINSICS_H + +#include "config.h" + +#ifdef __cplusplus +# if defined(HAVE_CXX_X86INTRIN_H) +# define USE_X86INTRIN_H +# endif +#else // C +# if defined(HAVE_C_X86INTRIN_H) +# define USE_X86INTRIN_H +# endif +#endif + +#ifdef __cplusplus +# if defined(HAVE_CXX_INTRIN_H) +# define USE_INTRIN_H +# endif +#else // C +# if defined(HAVE_C_INTRIN_H) +# define USE_INTRIN_H +# endif +#endif + +#if defined(USE_X86INTRIN_H) +#include +#elif defined(USE_INTRIN_H) +#include +#else +#error no intrinsics file +#endif + +#endif // INTRINSICS_H diff --git a/src/util/math.h b/src/util/math.h index 3fd69dba..e18c5027 100644 --- a/src/util/math.h +++ b/src/util/math.h @@ -30,35 +30,10 @@ #define UTIL_MATH_H_ #include "arch.h" +#include "intrinsics.h" #include -#ifdef __cplusplus -# if defined(HAVE_CXX_X86INTRIN_H) -# define USE_X86INTRIN_H -# endif -#else // C -# if defined(HAVE_C_X86INTRIN_H) -# define USE_X86INTRIN_H -# endif -#endif - -#ifdef __cplusplus -# if defined(HAVE_CXX_INTRIN_H) -# define USE_INTRIN_H -# endif -#else // C -# if defined(HAVE_C_INTRIN_H) -# define USE_INTRIN_H -# endif -#endif - -#if defined(USE_X86INTRIN_H) -#include -#elif defined(USE_INTRIN_H) -#include -#endif - static really_inline double our_pow(double x, double y) { #if defined(HAVE_AVX) diff --git a/src/util/simd_types.h b/src/util/simd_types.h index ec86d4a9..a6c87db7 100644 --- a/src/util/simd_types.h +++ b/src/util/simd_types.h @@ -31,37 +31,9 @@ #include "config.h" #include "util/arch.h" +#include "util/intrinsics.h" #include "ue2common.h" -// more recent headers are bestest, but only if we can use them -#ifdef __cplusplus -# if defined(HAVE_CXX_X86INTRIN_H) -# define USE_X86INTRIN_H -# endif -#else // C -# if defined(HAVE_C_X86INTRIN_H) -# define USE_X86INTRIN_H -# endif -#endif - -#ifdef __cplusplus -# if defined(HAVE_CXX_INTRIN_H) -# define USE_INTRIN_H -# endif -#else // C -# if defined(HAVE_C_INTRIN_H) -# define USE_INTRIN_H -# endif -#endif - -#if defined(USE_X86INTRIN_H) -#include -#elif defined(USE_INTRIN_H) -#include -#else -#error no intrinsics! -#endif - #if defined(HAVE_SSE2) typedef __m128i m128; #else diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index ccbcabbb..bc49a046 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -38,42 +38,13 @@ #endif #include "config.h" -#include "util/arch.h" - -#include // for memcpy - -// more recent headers are bestest, but only if we can use them -#ifdef __cplusplus -# if defined(HAVE_CXX_X86INTRIN_H) -# define USE_X86INTRIN_H -# endif -#else // C -# if defined(HAVE_C_X86INTRIN_H) -# define USE_X86INTRIN_H -# endif -#endif - -#ifdef __cplusplus -# if defined(HAVE_CXX_INTRIN_H) -# define USE_INTRIN_H -# endif -#else // C -# if defined(HAVE_C_INTRIN_H) -# define USE_INTRIN_H -# endif -#endif - -#if defined(USE_X86INTRIN_H) -#include -#elif defined(USE_INTRIN_H) -#include -#else -#error no intrins! -#endif - #include "ue2common.h" #include "simd_types.h" #include "unaligned.h" +#include "util/arch.h" +#include "util/intrinsics.h" + +#include // for memcpy // Define a common assume_aligned using an appropriate compiler built-in, if // it's available. Note that we need to handle C or C++ compilation. From a61b92f8d14c5418cb42d207b6ad8f495df66f53 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 30 Mar 2017 13:55:36 +1100 Subject: [PATCH 192/326] operators: add simple ue2::totally_ordered class --- CMakeLists.txt | 1 + src/util/operators.h | 60 +++++++++++++++++++++++++++++++++++++++ src/util/ue2_containers.h | 10 +++---- src/util/ue2_graph.h | 8 +++--- 4 files changed, 70 insertions(+), 9 deletions(-) create mode 100644 src/util/operators.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 732a73b4..c7929b34 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1003,6 +1003,7 @@ SET (hs_SRCS src/util/math.h src/util/multibit_build.cpp src/util/multibit_build.h + src/util/operators.h src/util/order_check.h src/util/partial_store.h src/util/partitioned_set.h diff --git a/src/util/operators.h b/src/util/operators.h new file mode 100644 index 00000000..b0a1c1cc --- /dev/null +++ b/src/util/operators.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \brief Ordered operators: provides all the other compare operators for types + * that provide equal and less-than. + * + * This is similar to Boost's totally_ordered class, but much simpler and + * without injecting the boost namespace into ADL lookup. + */ + +#ifndef UTIL_OPERATORS_H +#define UTIL_OPERATORS_H + +namespace ue2 { + +/** + * \brief Ordered operators: provides all the other compare operators for types + * that provide equal and less-than. + * + * Simply inherit from this class with your class name as its template + * parameter. + */ +template +class totally_ordered { +public: + friend bool operator!=(const T &a, const T &b) { return !(a == b); } + friend bool operator<=(const T &a, const T &b) { return !(b < a); } + friend bool operator>(const T &a, const T &b) { return b < a; } + friend bool operator>=(const T &a, const T &b) { return !(a < b); } +}; + +} // namespace + +#endif // UTIL_OPERATORS_H diff --git a/src/util/ue2_containers.h b/src/util/ue2_containers.h index ef93b2d9..29919c7e 100644 --- a/src/util/ue2_containers.h +++ b/src/util/ue2_containers.h @@ -30,6 +30,7 @@ #define UTIL_UE2_CONTAINERS_H_ #include "ue2common.h" +#include "util/operators.h" #include "util/small_vector.h" #include @@ -39,7 +40,6 @@ #include #include -#include #include #include @@ -159,7 +159,7 @@ template , class Allocator = std::allocator> class flat_set : public flat_detail::flat_base, - public boost::totally_ordered> { + public totally_ordered> { using base_type = flat_detail::flat_base; using storage_type = typename base_type::storage_type; using base_type::data; @@ -324,7 +324,7 @@ public: return comp(); } - // Operators. All others provided by boost::totally_ordered. + // Operators. All others provided by ue2::totally_ordered. bool operator==(const flat_set &a) const { return data() == a.data(); @@ -362,7 +362,7 @@ template , class Allocator = std::allocator>> class flat_map : public flat_detail::flat_base, Compare, Allocator>, - public boost::totally_ordered> { + public totally_ordered> { public: // Member types. using key_type = Key; @@ -590,7 +590,7 @@ public: return value_compare(comp()); } - // Operators. All others provided by boost::totally_ordered. + // Operators. All others provided by ue2::totally_ordered. bool operator==(const flat_map &a) const { return data() == a.data(); diff --git a/src/util/ue2_graph.h b/src/util/ue2_graph.h index 9634b032..7526cad5 100644 --- a/src/util/ue2_graph.h +++ b/src/util/ue2_graph.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,8 +31,8 @@ #include "ue2common.h" #include "util/graph_range.h" +#include "util/operators.h" -#include #include #include /* vertex_index_t, ... */ #include /* no_property */ @@ -292,7 +292,7 @@ public: using vertex_bundled = VertexPropertyType; using edge_bundled = EdgePropertyType; - class vertex_descriptor : boost::totally_ordered { + class vertex_descriptor : totally_ordered { public: vertex_descriptor() : p(nullptr), serial(0) { } explicit vertex_descriptor(vertex_node *pp) @@ -324,7 +324,7 @@ public: friend ue2_graph; }; - class edge_descriptor : boost::totally_ordered { + class edge_descriptor : totally_ordered { public: edge_descriptor() : p(nullptr), serial(0) { } explicit edge_descriptor(edge_node *pp) : p(pp), serial(pp->serial) { } From a9844fbf74068cb78d48733c8fc7ef001afc4fb4 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 30 Mar 2017 14:30:36 +1100 Subject: [PATCH 193/326] noncopyable: add simple ue2::noncopyable class --- src/util/noncopyable.h | 50 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 src/util/noncopyable.h diff --git a/src/util/noncopyable.h b/src/util/noncopyable.h new file mode 100644 index 00000000..da3851a3 --- /dev/null +++ b/src/util/noncopyable.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Class that makes derived classes non-copyable. + */ + +#ifndef UTIL_NONCOPYABLE_H +#define UTIL_NONCOPYABLE_H + +namespace ue2 { + +/** \brief Class that makes derived classes non-copyable. */ +struct noncopyable { + noncopyable() = default; + // Copy constructor. + noncopyable(const noncopyable &) = delete; + // Copy-assignment operator. + noncopyable &operator=(const noncopyable &) = delete; +}; + +} // namespace ue2 + +#endif // UTIL_NONCOPYABLE_H From 1ef87c43ee208a781acc7cab6f248ed00d0d2d8a Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 30 Mar 2017 14:37:35 +1100 Subject: [PATCH 194/326] noncopyable: switch over from boost --- CMakeLists.txt | 1 + src/compiler/compiler.h | 4 ++-- src/fdr/fdr_compile.cpp | 4 ++-- src/fdr/teddy_compile.cpp | 5 ++--- src/nfa/dfa_min.cpp | 8 ++++---- src/nfa/goughcompile_internal.h | 6 +++--- src/nfagraph/ng.h | 5 ++--- src/nfagraph/ng_builder.h | 4 ++-- src/nfagraph/ng_is_equal.h | 3 +-- src/nfagraph/ng_violet.cpp | 1 - src/parser/buildstate.h | 6 +++--- src/rose/rose_build.h | 5 ++--- src/rose/rose_build_add.cpp | 5 ++--- src/rose/rose_build_bytecode.cpp | 5 +++-- src/rose/rose_build_engine_blob.h | 5 ++--- src/smallwrite/smallwrite_build.h | 5 ++--- src/som/slot_manager.h | 4 ++-- src/util/boundary_reports.h | 6 +++--- src/util/partitioned_set.h | 6 +++--- src/util/queue_index_factory.h | 7 +++---- src/util/report_manager.h | 4 ++-- src/util/ue2_graph.h | 3 ++- 22 files changed, 48 insertions(+), 54 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c7929b34..fbcb5399 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1003,6 +1003,7 @@ SET (hs_SRCS src/util/math.h src/util/multibit_build.cpp src/util/multibit_build.h + src/util/noncopyable.h src/util/operators.h src/util/order_check.h src/util/partial_store.h diff --git a/src/compiler/compiler.h b/src/compiler/compiler.h index 8f5f9b65..60d7ca33 100644 --- a/src/compiler/compiler.h +++ b/src/compiler/compiler.h @@ -37,9 +37,9 @@ #include "database.h" #include "compiler/expression_info.h" #include "parser/Component.h" +#include "util/noncopyable.h" #include -#include struct hs_database; struct hs_expr_ext; @@ -54,7 +54,7 @@ class NGHolder; class ReportManager; /** \brief Class gathering together the pieces of a parsed expression. */ -class ParsedExpression : boost::noncopyable { +class ParsedExpression : noncopyable { public: ParsedExpression(unsigned index, const char *expression, unsigned flags, ReportID report, const hs_expr_ext *ext = nullptr); diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index 015fa51e..dc0cc0f3 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -44,6 +44,7 @@ #include "util/compare.h" #include "util/dump_mask.h" #include "util/math.h" +#include "util/noncopyable.h" #include "util/target_info.h" #include "util/ue2string.h" #include "util/verify_types.h" @@ -62,7 +63,6 @@ #include #include -#include #include using namespace std; @@ -71,7 +71,7 @@ namespace ue2 { namespace { -class FDRCompiler : boost::noncopyable { +class FDRCompiler : noncopyable { private: const FDREngineDescription ŋ const Grey &grey; diff --git a/src/fdr/teddy_compile.cpp b/src/fdr/teddy_compile.cpp index 09155280..607024d1 100644 --- a/src/fdr/teddy_compile.cpp +++ b/src/fdr/teddy_compile.cpp @@ -35,6 +35,7 @@ #include "ue2common.h" #include "util/alloc.h" #include "util/compare.h" +#include "util/noncopyable.h" #include "util/popcount.h" #include "util/target_info.h" #include "util/verify_types.h" @@ -55,8 +56,6 @@ #include #include -#include - using namespace std; namespace ue2 { @@ -65,7 +64,7 @@ namespace { //#define TEDDY_DEBUG -class TeddyCompiler : boost::noncopyable { +class TeddyCompiler : noncopyable { const TeddyEngineDescription ŋ const Grey &grey; const vector &lits; diff --git a/src/nfa/dfa_min.cpp b/src/nfa/dfa_min.cpp index 0d3bca11..f83d1420 100644 --- a/src/nfa/dfa_min.cpp +++ b/src/nfa/dfa_min.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -60,8 +60,9 @@ #include "nfa/rdfa.h" #include "nfagraph/ng_mcclellan.h" #include "ue2common.h" -#include "util/partitioned_set.h" #include "util/container.h" +#include "util/noncopyable.h" +#include "util/partitioned_set.h" #include "util/ue2_containers.h" #include @@ -71,7 +72,6 @@ #include #include -#include #include using namespace std; @@ -84,7 +84,7 @@ struct hopcroft_state_info { vector > prev; }; -struct DFA_components : boost::noncopyable { +struct DFA_components : noncopyable { dstate_id_t nstates; size_t inp_size; set work_queue; diff --git a/src/nfa/goughcompile_internal.h b/src/nfa/goughcompile_internal.h index 52e65f15..a6ba0d1b 100644 --- a/src/nfa/goughcompile_internal.h +++ b/src/nfa/goughcompile_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,6 +33,7 @@ #include "mcclellancompile.h" #include "ue2common.h" #include "util/charreach.h" +#include "util/noncopyable.h" #include "util/order_check.h" #include "util/ue2_containers.h" @@ -41,7 +42,6 @@ #include #include -#include #include namespace ue2 { @@ -103,7 +103,7 @@ struct GoughSSAVarWithInputs; struct GoughSSAVarMin; struct GoughSSAVarJoin; -struct GoughSSAVar : boost::noncopyable { +struct GoughSSAVar : noncopyable { GoughSSAVar(void) : seen(false), slot(INVALID_SLOT) {} virtual ~GoughSSAVar(); const ue2::flat_set &get_inputs() const { diff --git a/src/nfagraph/ng.h b/src/nfagraph/ng.h index dc797acf..a5a5c235 100644 --- a/src/nfagraph/ng.h +++ b/src/nfagraph/ng.h @@ -42,6 +42,7 @@ #include "util/compile_context.h" #include "util/depth.h" #include "util/graph.h" +#include "util/noncopyable.h" #include "util/report_manager.h" #include "util/ue2_containers.h" @@ -51,8 +52,6 @@ #include #include -#include - namespace ue2 { struct CompileContext; @@ -62,7 +61,7 @@ class ExpressionInfo; class RoseBuild; class SmallWriteBuild; -class NG : boost::noncopyable { +class NG : noncopyable { public: NG(const CompileContext &in_cc, size_t num_patterns, unsigned in_somPrecision); diff --git a/src/nfagraph/ng_builder.h b/src/nfagraph/ng_builder.h index df2e0dd8..9f71b622 100644 --- a/src/nfagraph/ng_builder.h +++ b/src/nfagraph/ng_builder.h @@ -37,9 +37,9 @@ #include "ue2common.h" #include "parser/position.h" +#include "util/noncopyable.h" #include -#include namespace ue2 { @@ -52,7 +52,7 @@ class ParsedExpression; /** \brief Abstract builder interface. Use \ref makeNFABuilder to construct * one. Used by GlushkovBuildState. */ -class NFABuilder : boost::noncopyable { +class NFABuilder : noncopyable { public: virtual ~NFABuilder(); diff --git a/src/nfagraph/ng_is_equal.h b/src/nfagraph/ng_is_equal.h index 8eba2af5..52b29882 100644 --- a/src/nfagraph/ng_is_equal.h +++ b/src/nfagraph/ng_is_equal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,7 +39,6 @@ #include "ue2common.h" #include -#include namespace ue2 { diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index 1cf3b716..e2825643 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -68,7 +68,6 @@ #include #include #include -#include #include #include diff --git a/src/parser/buildstate.h b/src/parser/buildstate.h index 8a69f44f..5ddaf9b2 100644 --- a/src/parser/buildstate.h +++ b/src/parser/buildstate.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,10 +35,10 @@ #include "ue2common.h" #include "position.h" +#include "util/noncopyable.h" #include #include -#include namespace ue2 { @@ -49,7 +49,7 @@ class PositionInfo; * * Abstract base class; use \ref makeGlushkovBuildState to get one of these you * can use. */ -class GlushkovBuildState : boost::noncopyable { +class GlushkovBuildState : noncopyable { public: /** \brief Represents an uninitialized state. */ static const Position POS_UNINITIALIZED; diff --git a/src/rose/rose_build.h b/src/rose/rose_build.h index 0af8ba57..2949fcc9 100644 --- a/src/rose/rose_build.h +++ b/src/rose/rose_build.h @@ -42,6 +42,7 @@ #include "rose_in_graph.h" #include "util/alloc.h" #include "util/charreach.h" +#include "util/noncopyable.h" #include "util/ue2_containers.h" #include "util/ue2string.h" @@ -50,8 +51,6 @@ #include #include -#include - struct NFA; struct SmallWriteEngine; struct RoseEngine; @@ -80,7 +79,7 @@ public: /** \brief Abstract interface intended for callers from elsewhere in the tree, * real underlying implementation is RoseBuildImpl in rose_build_impl.h. */ -class RoseBuild : boost::noncopyable { +class RoseBuild : noncopyable { public: virtual ~RoseBuild(); diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index 01d7d827..b53f7c8a 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -56,6 +56,7 @@ #include "util/dump_charclass.h" #include "util/graph_range.h" #include "util/make_unique.h" +#include "util/noncopyable.h" #include "util/order_check.h" #include "util/report_manager.h" #include "util/ue2string.h" @@ -68,8 +69,6 @@ #include #include -#include - using namespace std; namespace ue2 { @@ -77,7 +76,7 @@ namespace ue2 { /** * \brief Data used by most of the construction code in this file. */ -struct RoseBuildData : boost::noncopyable { +struct RoseBuildData : noncopyable { RoseBuildData(const RoseInGraph &ig_in, bool som_in) : ig(ig_in), som(som_in) {} diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index ea898408..6ee08952 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -86,6 +86,7 @@ #include "util/graph_range.h" #include "util/make_unique.h" #include "util/multibit_build.h" +#include "util/noncopyable.h" #include "util/order_check.h" #include "util/popcount.h" #include "util/queue_index_factory.h" @@ -177,7 +178,7 @@ struct RoseResources { bool has_eod = false; }; -struct build_context : boost::noncopyable { +struct build_context : noncopyable { /** \brief information about engines to the left of a vertex */ map leftfix_info; @@ -237,7 +238,7 @@ struct build_context : boost::noncopyable { /** \brief Data only used during construction of various programs (literal, * anchored, delay, etc). */ -struct ProgramBuild : boost::noncopyable { +struct ProgramBuild : noncopyable { /** \brief Mapping from vertex to key, for vertices with a * CHECK_NOT_HANDLED instruction. */ ue2::unordered_map handledKeys; diff --git a/src/rose/rose_build_engine_blob.h b/src/rose/rose_build_engine_blob.h index 9298c37f..61b6b440 100644 --- a/src/rose/rose_build_engine_blob.h +++ b/src/rose/rose_build_engine_blob.h @@ -35,17 +35,16 @@ #include "util/alloc.h" #include "util/container.h" #include "util/multibit_build.h" +#include "util/noncopyable.h" #include "util/ue2_containers.h" #include "util/verify_types.h" #include #include -#include - namespace ue2 { -class RoseEngineBlob : boost::noncopyable { +class RoseEngineBlob : noncopyable { public: /** \brief Base offset of engine_blob in the Rose engine bytecode. */ static constexpr u32 base_offset = ROUNDUP_CL(sizeof(RoseEngine)); diff --git a/src/smallwrite/smallwrite_build.h b/src/smallwrite/smallwrite_build.h index 3d7f3cb6..906a83c2 100644 --- a/src/smallwrite/smallwrite_build.h +++ b/src/smallwrite/smallwrite_build.h @@ -37,11 +37,10 @@ #include "ue2common.h" #include "util/alloc.h" +#include "util/noncopyable.h" #include -#include - struct SmallWriteEngine; namespace ue2 { @@ -54,7 +53,7 @@ class ReportManager; // Abstract interface intended for callers from elsewhere in the tree, real // underlying implementation is SmallWriteBuildImpl in smwr_build_impl.h. -class SmallWriteBuild : boost::noncopyable { +class SmallWriteBuild : noncopyable { public: // Destructor virtual ~SmallWriteBuild(); diff --git a/src/som/slot_manager.h b/src/som/slot_manager.h index 971ea362..adccf99a 100644 --- a/src/som/slot_manager.h +++ b/src/som/slot_manager.h @@ -36,11 +36,11 @@ #include "ue2common.h" #include "nfagraph/ng_holder.h" #include "util/alloc.h" +#include "util/noncopyable.h" #include "util/ue2_containers.h" #include #include -#include struct NFA; @@ -54,7 +54,7 @@ struct SlotCache; /** \brief SOM slot manager. Used to hand out SOM slots and track their * relationships during SOM construction. Also stores reverse NFAs used for * SOM. */ -class SomSlotManager : boost::noncopyable { +class SomSlotManager : noncopyable { public: explicit SomSlotManager(u8 precision); ~SomSlotManager(); diff --git a/src/util/boundary_reports.h b/src/util/boundary_reports.h index 7ad93ba1..b2bb1c9b 100644 --- a/src/util/boundary_reports.h +++ b/src/util/boundary_reports.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,13 +30,13 @@ #define BOUNDARY_REPORTS_H #include "ue2common.h" +#include "util/noncopyable.h" #include -#include namespace ue2 { -struct BoundaryReports : boost::noncopyable { +struct BoundaryReports : noncopyable { std::set report_at_0; /* set of internal reports to fire * unconditionally at offset 0 */ std::set report_at_0_eod; /* set of internal reports to fire diff --git a/src/util/partitioned_set.h b/src/util/partitioned_set.h index 8f92a8b7..a9e4644d 100644 --- a/src/util/partitioned_set.h +++ b/src/util/partitioned_set.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,13 +30,13 @@ #define PARTITIONED_SET_H #include "container.h" +#include "noncopyable.h" #include "ue2_containers.h" #include "ue2common.h" #include #include -#include #include namespace ue2 { @@ -53,7 +53,7 @@ static constexpr size_t INVALID_SUBSET = ~(size_t)0; */ template -class partitioned_set : boost::noncopyable { +class partitioned_set : noncopyable { public: class subset { public: diff --git a/src/util/queue_index_factory.h b/src/util/queue_index_factory.h index 1360beef..e8f7028e 100644 --- a/src/util/queue_index_factory.h +++ b/src/util/queue_index_factory.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,12 +33,11 @@ #define UTIL_QUEUE_INDEX_FACTORY_H #include "ue2common.h" - -#include +#include "util/noncopyable.h" namespace ue2 { -class QueueIndexFactory : boost::noncopyable { +class QueueIndexFactory : noncopyable { public: QueueIndexFactory() : val(0) {} u32 get_queue() { return val++; } diff --git a/src/util/report_manager.h b/src/util/report_manager.h index 4b62e4b5..f76aff22 100644 --- a/src/util/report_manager.h +++ b/src/util/report_manager.h @@ -36,12 +36,12 @@ #include "ue2common.h" #include "util/compile_error.h" +#include "util/noncopyable.h" #include "util/report.h" #include #include #include -#include namespace ue2 { @@ -57,7 +57,7 @@ struct external_report_info { }; /** \brief Tracks Report structures, exhaustion and dedupe keys. */ -class ReportManager : boost::noncopyable { +class ReportManager : noncopyable { public: explicit ReportManager(const Grey &g); diff --git a/src/util/ue2_graph.h b/src/util/ue2_graph.h index 7526cad5..138d7467 100644 --- a/src/util/ue2_graph.h +++ b/src/util/ue2_graph.h @@ -31,6 +31,7 @@ #include "ue2common.h" #include "util/graph_range.h" +#include "util/noncopyable.h" #include "util/operators.h" #include @@ -156,7 +157,7 @@ namespace ue2 { namespace graph_detail { -class graph_base : boost::noncopyable { +class graph_base : noncopyable { }; struct default_edge_property { From 7533e3341e20aa4746b058207909a84cba294f8a Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Thu, 23 Mar 2017 12:19:35 +1100 Subject: [PATCH 195/326] Don't fail if sqlite is not present. --- cmake/sqlite3.cmake | 2 +- tools/hsbench/CMakeLists.txt | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/cmake/sqlite3.cmake b/cmake/sqlite3.cmake index c07f1161..cbe17c6d 100644 --- a/cmake/sqlite3.cmake +++ b/cmake/sqlite3.cmake @@ -22,7 +22,7 @@ if (NOT SQLITE3_FOUND) set(SQLITE3_INCLUDE_DIRS "${PROJECT_SOURCE_DIR}/sqlite3") set(SQLITE3_LDFLAGS sqlite3_static) else() - message(FATAL_ERROR " no sqlite3 in source tree") + message(STATUS " no sqlite3 in source tree") endif() endif() diff --git a/tools/hsbench/CMakeLists.txt b/tools/hsbench/CMakeLists.txt index 25a833d0..3b9a73f7 100644 --- a/tools/hsbench/CMakeLists.txt +++ b/tools/hsbench/CMakeLists.txt @@ -1,4 +1,8 @@ include (${CMAKE_MODULE_PATH}/sqlite3.cmake) +if (NOT SQLITE3_FOUND) + message(STATUS "sqlite3 not found, not building hsbench") + return() +endif() if (NOT XCODE) include_directories(SYSTEM ${SQLITE3_INCLUDE_DIRS}) From ae3cb7de6fd48c20decda1e71e9aadcb28d20b84 Mon Sep 17 00:00:00 2001 From: "Xu, Chi" Date: Fri, 31 Mar 2017 04:37:33 +0800 Subject: [PATCH 196/326] rose: add multi-path shufti 16x8, 32x8, 32x16, 64x8 and multi-path lookaround instructions. --- src/rose/program_runtime.h | 435 ++++++++++++++++++++- src/rose/rose_build_bytecode.cpp | 603 +++++++++++++++++++++++------ src/rose/rose_build_dump.cpp | 339 +++++++++++++++- src/rose/rose_build_lookaround.cpp | 335 ++++++++++------ src/rose/rose_build_lookaround.h | 10 +- src/rose/rose_build_program.cpp | 90 ++++- src/rose/rose_build_program.h | 339 +++++++++++++++- src/rose/rose_common.h | 11 +- src/rose/rose_internal.h | 3 +- src/rose/rose_program.h | 107 ++++- src/rose/validate_shufti.h | 123 +++++- src/util/simd_utils.h | 17 + 12 files changed, 2133 insertions(+), 279 deletions(-) diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 30ff8527..88c312d2 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -857,13 +857,13 @@ u32 getBufferDataComplex(const struct core_info *ci, const s64a loc, } static rose_inline -m128 getData128(const struct core_info *ci, s64a offset, u16 *valid_data_mask) { +m128 getData128(const struct core_info *ci, s64a offset, u32 *valid_data_mask) { if (offset > 0 && offset + sizeof(m128) <= ci->len) { *valid_data_mask = 0xffff; return loadu128(ci->buf + offset); } ALIGN_DIRECTIVE u8 data[sizeof(m128)]; - *valid_data_mask = (u16)getBufferDataComplex(ci, offset, data, 16); + *valid_data_mask = getBufferDataComplex(ci, offset, data, 16); return *(m128 *)data; } @@ -892,7 +892,7 @@ int roseCheckShufti16x8(const struct core_info *ci, const u8 *nib_mask, return 0; } - u16 valid_data_mask = 0; + u32 valid_data_mask = 0; m128 data = getData128(ci, offset, &valid_data_mask); if (unlikely(!valid_data_mask)) { return 1; @@ -924,7 +924,7 @@ int roseCheckShufti16x16(const struct core_info *ci, const u8 *hi_mask, return 0; } - u16 valid_data_mask = 0; + u32 valid_data_mask = 0; m128 data = getData128(ci, offset, &valid_data_mask); if (unlikely(!valid_data_mask)) { return 1; @@ -1020,8 +1020,9 @@ int roseCheckShufti32x16(const struct core_info *ci, const u8 *hi_mask, static rose_inline int roseCheckSingleLookaround(const struct RoseEngine *t, const struct hs_scratch *scratch, - s8 checkOffset, u32 lookaroundIndex, u64a end) { - assert(lookaroundIndex != MO_INVALID_IDX); + s8 checkOffset, u32 lookaroundReachIndex, + u64a end) { + assert(lookaroundReachIndex != MO_INVALID_IDX); const struct core_info *ci = &scratch->core_info; DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, ci->buf_offset, ci->buf_offset + ci->len); @@ -1037,7 +1038,7 @@ int roseCheckSingleLookaround(const struct RoseEngine *t, } const u8 *reach_base = (const u8 *)t + t->lookaroundReachOffset; - const u8 *reach = reach_base + lookaroundIndex * REACH_BITVECTOR_LEN; + const u8 *reach = reach_base + lookaroundReachIndex; u8 c; if (offset >= 0 && offset < (s64a)ci->len) { @@ -1063,9 +1064,11 @@ int roseCheckSingleLookaround(const struct RoseEngine *t, */ static rose_inline int roseCheckLookaround(const struct RoseEngine *t, - const struct hs_scratch *scratch, u32 lookaroundIndex, + const struct hs_scratch *scratch, + u32 lookaroundLookIndex, u32 lookaroundReachIndex, u32 lookaroundCount, u64a end) { - assert(lookaroundIndex != MO_INVALID_IDX); + assert(lookaroundLookIndex != MO_INVALID_IDX); + assert(lookaroundReachIndex != MO_INVALID_IDX); assert(lookaroundCount > 0); const struct core_info *ci = &scratch->core_info; @@ -1074,12 +1077,12 @@ int roseCheckLookaround(const struct RoseEngine *t, const u8 *base = (const u8 *)t; const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset); - const s8 *look = look_base + lookaroundIndex; + const s8 *look = look_base + lookaroundLookIndex; const s8 *look_end = look + lookaroundCount; assert(look < look_end); const u8 *reach_base = base + t->lookaroundReachOffset; - const u8 *reach = reach_base + lookaroundIndex * REACH_BITVECTOR_LEN; + const u8 *reach = reach_base + lookaroundReachIndex; // The following code assumes that the lookaround structures are ordered by // increasing offset. @@ -1151,6 +1154,359 @@ int roseCheckLookaround(const struct RoseEngine *t, return 1; } +/** + * \brief Trying to find a matching path by the corresponding path mask of + * every lookaround location. + */ +static rose_inline +int roseMultipathLookaround(const struct RoseEngine *t, + const struct hs_scratch *scratch, + u32 multipathLookaroundLookIndex, + u32 multipathLookaroundReachIndex, + u32 multipathLookaroundCount, + s32 last_start, const u8 *start_mask, + u64a end) { + assert(multipathLookaroundCount > 0); + + const struct core_info *ci = &scratch->core_info; + DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, + ci->buf_offset, ci->buf_offset + ci->len); + + const s8 *look_base = getByOffset(t, t->lookaroundTableOffset); + const s8 *look = look_base + multipathLookaroundLookIndex; + const s8 *look_end = look + multipathLookaroundCount; + assert(look < look_end); + + const u8 *reach_base = getByOffset(t, t->lookaroundReachOffset); + const u8 *reach = reach_base + multipathLookaroundReachIndex; + + const s64a base_offset = (s64a)end - ci->buf_offset; + DEBUG_PRINTF("base_offset=%lld\n", base_offset); + + u8 path = 0xff; + + assert(last_start < 0); + + if (unlikely((u64a)(0 - last_start) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + u32 start_offset = 0; + do { + s64a offset = base_offset + *look; + DEBUG_PRINTF("start_mask[%u] = %x\n", start_offset, + start_mask[start_offset]); + path = start_mask[start_offset]; + if (offset >= -(s64a)ci->hlen) { + break; + } + DEBUG_PRINTF("look=%d before history\n", *look); + start_offset++; + look++; + reach += MULTI_REACH_BITVECTOR_LEN; + } while (look < look_end); + + DEBUG_PRINTF("scan history (%zu looks left)\n", look_end - look); + for (; look < look_end; ++look, reach += MULTI_REACH_BITVECTOR_LEN) { + s64a offset = base_offset + *look; + DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); + + if (offset >= 0) { + DEBUG_PRINTF("in buffer\n"); + break; + } + + assert(offset >= -(s64a)ci->hlen && offset < 0); + u8 c = ci->hbuf[ci->hlen + offset]; + path &= reach[c]; + DEBUG_PRINTF("reach[%x] = %02x path = %0xx\n", c, reach[c], path); + if (!path) { + DEBUG_PRINTF("char 0x%02x failed reach check\n", c); + return 0; + } + } + + DEBUG_PRINTF("scan buffer (%zu looks left)\n", look_end - look); + for(; look < look_end; ++look, reach += MULTI_REACH_BITVECTOR_LEN) { + s64a offset = base_offset + *look; + DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); + + if (offset >= (s64a)ci->len) { + DEBUG_PRINTF("in the future\n"); + break; + } + + assert(offset >= 0 && offset < (s64a)ci->len); + u8 c = ci->buf[offset]; + path &= reach[c]; + DEBUG_PRINTF("reach[%x] = %02x path = %0xx\n", c, reach[c], path); + if (!path) { + DEBUG_PRINTF("char 0x%02x failed reach check\n", c); + return 0; + } + } + + DEBUG_PRINTF("OK :)\n"); + return 1; +} + +static never_inline +int roseCheckMultipathShufti16x8(const struct hs_scratch *scratch, + const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_16x8 *ri, + u64a end) { + const struct core_info *ci = &scratch->core_info; + s32 checkOffset = ri->base_offset; + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + assert(ri->last_start <= 0); + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + if ((u64a)(0 - ri->last_start) > end) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + } + + u32 valid_data_mask; + m128 data_init = getData128(ci, offset, &valid_data_mask); + m128 data_select_mask = loadu128(ri->data_select_mask); + + u32 valid_path_mask = 0; + if (unlikely(!(valid_data_mask & 1))) { + DEBUG_PRINTF("lose part of backward data\n"); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); + + m128 expand_valid; + u64a expand_mask = 0x8080808080808080ULL; + u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); + u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); + DEBUG_PRINTF("expand_hi %llx\n", valid_hi); + DEBUG_PRINTF("expand_lo %llx\n", valid_lo); + expand_valid = set64x2(valid_hi, valid_lo); + valid_path_mask = ~movemask128(pshufb(expand_valid, + data_select_mask)); + } + + m128 data = pshufb(data_init, data_select_mask); + m256 nib_mask = loadu256(ri->nib_mask); + m128 bucket_select_mask = loadu128(ri->bucket_select_mask); + + u32 hi_bits_mask = ri->hi_bits_mask; + u32 lo_bits_mask = ri->lo_bits_mask; + u32 neg_mask = ri->neg_mask; + + if (validateMultipathShuftiMask16x8(data, nib_mask, + bucket_select_mask, + hi_bits_mask, lo_bits_mask, + neg_mask, valid_path_mask)) { + DEBUG_PRINTF("check multi-path shufti-16x8 successfully\n"); + return 1; + } else { + return 0; + } +} + +static never_inline +int roseCheckMultipathShufti32x8(const struct hs_scratch *scratch, + const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x8 *ri, + u64a end) { + const struct core_info *ci = &scratch->core_info; + s32 checkOffset = ri->base_offset; + const s64a base_offset = (s64a)end - ci->buf_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + assert(ri->last_start <= 0); + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + if ((u64a)(0 - ri->last_start) > end) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + } + + u32 valid_data_mask; + m128 data_m128 = getData128(ci, offset, &valid_data_mask); + m256 data_double = set2x128(data_m128); + m256 data_select_mask = loadu256(ri->data_select_mask); + + u32 valid_path_mask = 0; + m256 expand_valid; + if (unlikely(!(valid_data_mask & 1))) { + DEBUG_PRINTF("lose part of backward data\n"); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); + + u64a expand_mask = 0x8080808080808080ULL; + u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); + u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); + DEBUG_PRINTF("expand_hi %llx\n", valid_hi); + DEBUG_PRINTF("expand_lo %llx\n", valid_lo); + expand_valid = set64x4(valid_hi, valid_lo, valid_hi, + valid_lo); + valid_path_mask = ~movemask256(vpshufb(expand_valid, + data_select_mask)); + } + + m256 data = vpshufb(data_double, data_select_mask); + m256 hi_mask = loadu2x128(ri->hi_mask); + m256 lo_mask = loadu2x128(ri->lo_mask); + m256 bucket_select_mask = loadu256(ri->bucket_select_mask); + + u32 hi_bits_mask = ri->hi_bits_mask; + u32 lo_bits_mask = ri->lo_bits_mask; + u32 neg_mask = ri->neg_mask; + + if (validateMultipathShuftiMask32x8(data, hi_mask, lo_mask, + bucket_select_mask, + hi_bits_mask, lo_bits_mask, + neg_mask, valid_path_mask)) { + DEBUG_PRINTF("check multi-path shufti-32x8 successfully\n"); + return 1; + } else { + return 0; + } +} + +static never_inline +int roseCheckMultipathShufti32x16(const struct hs_scratch *scratch, + const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x16 *ri, + u64a end) { + const struct core_info *ci = &scratch->core_info; + const s64a base_offset = (s64a)end - ci->buf_offset; + s32 checkOffset = ri->base_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + assert(ri->last_start <= 0); + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + if ((u64a)(0 - ri->last_start) > end) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + } + + u32 valid_data_mask; + m128 data_m128 = getData128(ci, offset, &valid_data_mask); + m256 data_double = set2x128(data_m128); + m256 data_select_mask = loadu256(ri->data_select_mask); + + u32 valid_path_mask = 0; + m256 expand_valid; + if (unlikely(!(valid_data_mask & 1))) { + DEBUG_PRINTF("lose part of backward data\n"); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); + + u64a expand_mask = 0x8080808080808080ULL; + u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); + u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); + DEBUG_PRINTF("expand_hi %llx\n", valid_hi); + DEBUG_PRINTF("expand_lo %llx\n", valid_lo); + expand_valid = set64x4(valid_hi, valid_lo, valid_hi, + valid_lo); + valid_path_mask = ~movemask256(vpshufb(expand_valid, + data_select_mask)); + } + + m256 data = vpshufb(data_double, data_select_mask); + + m256 hi_mask_1 = loadu2x128(ri->hi_mask); + m256 hi_mask_2 = loadu2x128(ri->hi_mask + 16); + m256 lo_mask_1 = loadu2x128(ri->lo_mask); + m256 lo_mask_2 = loadu2x128(ri->lo_mask + 16); + + m256 bucket_select_mask_hi = loadu256(ri->bucket_select_mask_hi); + m256 bucket_select_mask_lo = loadu256(ri->bucket_select_mask_lo); + + u32 hi_bits_mask = ri->hi_bits_mask; + u32 lo_bits_mask = ri->lo_bits_mask; + u32 neg_mask = ri->neg_mask; + + if (validateMultipathShuftiMask32x16(data, hi_mask_1, hi_mask_2, + lo_mask_1, lo_mask_2, + bucket_select_mask_hi, + bucket_select_mask_lo, + hi_bits_mask, lo_bits_mask, + neg_mask, valid_path_mask)) { + DEBUG_PRINTF("check multi-path shufti-32x16 successfully\n"); + return 1; + } else { + return 0; + } +} + +static never_inline +int roseCheckMultipathShufti64(const struct hs_scratch *scratch, + const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_64 *ri, + u64a end) { + const struct core_info *ci = &scratch->core_info; + const s64a base_offset = (s64a)end - ci->buf_offset; + s32 checkOffset = ri->base_offset; + s64a offset = base_offset + checkOffset; + DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset); + DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset); + + if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) { + if ((u64a)(0 - ri->last_start) > end) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + } + + u32 valid_data_mask; + m128 data_m128 = getData128(ci, offset, &valid_data_mask); + m256 data_m256 = set2x128(data_m128); + m256 data_select_mask_1 = loadu256(ri->data_select_mask); + m256 data_select_mask_2 = loadu256(ri->data_select_mask + 32); + + u64a valid_path_mask = 0; + m256 expand_valid; + if (unlikely(!(valid_data_mask & 1))) { + DEBUG_PRINTF("lose part of backward data\n"); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); + + u64a expand_mask = 0x8080808080808080ULL; + u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask); + u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask); + DEBUG_PRINTF("expand_hi %llx\n", valid_hi); + DEBUG_PRINTF("expand_lo %llx\n", valid_lo); + expand_valid = set64x4(valid_hi, valid_lo, valid_hi, + valid_lo); + u32 valid_path_1 = movemask256(vpshufb(expand_valid, + data_select_mask_1)); + u32 valid_path_2 = movemask256(vpshufb(expand_valid, + data_select_mask_2)); + valid_path_mask = ~((u64a)valid_path_1 | (u64a)valid_path_2 << 32); + } + + m256 data_1 = vpshufb(data_m256, data_select_mask_1); + m256 data_2 = vpshufb(data_m256, data_select_mask_2); + + m256 hi_mask = loadu2x128(ri->hi_mask); + m256 lo_mask = loadu2x128(ri->lo_mask); + + m256 bucket_select_mask_1 = loadu256(ri->bucket_select_mask); + m256 bucket_select_mask_2 = loadu256(ri->bucket_select_mask + 32); + + u64a hi_bits_mask = ri->hi_bits_mask; + u64a lo_bits_mask = ri->lo_bits_mask; + u64a neg_mask = ri->neg_mask; + + if (validateMultipathShuftiMask64(data_1, data_2, hi_mask, lo_mask, + bucket_select_mask_1, + bucket_select_mask_2, hi_bits_mask, + lo_bits_mask, neg_mask, + valid_path_mask)) { + DEBUG_PRINTF("check multi-path shufti-64 successfully\n"); + return 1; + } else { + return 0; + } +} + int roseNfaEarliestSom(u64a start, u64a end, ReportID id, void *context); static rose_inline @@ -1614,8 +1970,8 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_LOOKAROUND) { - if (!roseCheckLookaround(t, scratch, ri->index, ri->count, - end)) { + if (!roseCheckLookaround(t, scratch, ri->look_index, + ri->reach_index, ri->count, end)) { DEBUG_PRINTF("failed lookaround check\n"); assert(ri->fail_jump); // must progress pc += ri->fail_jump; @@ -2172,6 +2528,59 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, work_done = 0; } PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(MULTIPATH_LOOKAROUND) { + if (!roseMultipathLookaround(t, scratch, ri->look_index, + ri->reach_index, ri->count, + ri->last_start, ri->start_mask, + end)) { + DEBUG_PRINTF("failed multi-path lookaround check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_16x8) { + if (!roseCheckMultipathShufti16x8(scratch, ri, end)) { + DEBUG_PRINTF("failed multi-path shufti 16x8 check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x8) { + if (!roseCheckMultipathShufti32x8(scratch, ri, end)) { + DEBUG_PRINTF("failed multi-path shufti 32x8 check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x16) { + if (!roseCheckMultipathShufti32x16(scratch, ri, end)) { + DEBUG_PRINTF("failed multi-path shufti 32x16 check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_64) { + if (!roseCheckMultipathShufti64(scratch, ri, end)) { + DEBUG_PRINTF("failed multi-path shufti 64 check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION } } diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 6ee08952..a0edc711 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -82,6 +82,7 @@ #include "util/compile_context.h" #include "util/compile_error.h" #include "util/container.h" +#include "util/dump_charclass.h" #include "util/fatbit_build.h" #include "util/graph_range.h" #include "util/make_unique.h" @@ -99,6 +100,7 @@ #include #include #include +#include #include #include #include @@ -141,8 +143,8 @@ struct left_build_info { countingMiracleReach(cm_cr) {} // Constructor for a lookaround implementation. - explicit left_build_info(const vector &look) - : has_lookaround(true), lookaround(look) {} + explicit left_build_info(const vector> &looks) + : has_lookaround(true), lookaround(looks) {} u32 queue = 0; /* uniquely idents the left_build_info */ u32 lag = 0; @@ -154,7 +156,7 @@ struct left_build_info { CharReach countingMiracleReach; u32 countingMiracleOffset = 0; /* populated later when laying out bytecode */ bool has_lookaround = false; - vector lookaround; // alternative implementation to the NFA + vector> lookaround; // alternative implementation to the NFA }; /** @@ -197,12 +199,22 @@ struct build_context : noncopyable { ue2::unordered_map program_cache; - /** \brief LookEntry list cache, so that we don't have to go scanning - * through the full list to find cases we've used already. */ - ue2::unordered_map, size_t> lookaround_cache; + /** \brief LookEntry list cache, so that we can reuse the look index and + * reach index for the same lookaround. */ + ue2::unordered_map>, + pair> lookaround_cache; /** \brief Lookaround table for Rose roles. */ - vector lookaround; + vector>> lookaround; + + /** \brief Lookaround look table size. */ + size_t lookTableSize = 0; + + /** \brief Lookaround reach table size. + * since single path lookaround and multi-path lookaround have different + * bitvectors range (32 and 256), we need to maintain both look table size + * and reach table size. */ + size_t reachTableSize = 0; /** \brief State indices, for those roles that have them. */ ue2::unordered_map roleStateIndices; @@ -1582,7 +1594,7 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc, // TODO: Handle SOM-tracking cases as well. if (cc.grey.roseLookaroundMasks && is_transient && !g[v].left.tracksSom()) { - vector lookaround; + vector> lookaround; if (makeLeftfixLookaround(tbi, v, lookaround)) { DEBUG_PRINTF("implementing as lookaround!\n"); bc.leftfix_info.emplace(v, left_build_info(lookaround)); @@ -2651,15 +2663,7 @@ bool hasEodAnchors(const RoseBuildImpl &build, const build_context &bc, } static -void writeLookaroundTables(build_context &bc, RoseEngine &proto) { - const auto &look_vec = bc.lookaround; - DEBUG_PRINTF("%zu lookaround table entries\n", look_vec.size()); - - vector look_table(look_vec.size(), 0); - vector reach_table(REACH_BITVECTOR_LEN * look_vec.size(), 0); - - s8 *look = look_table.data(); - u8 *reach = reach_table.data(); +void writeLookaround(const vector &look_vec, s8 *&look, u8 *&reach) { for (const auto &le : look_vec) { *look = verify_s8(le.offset); const CharReach &cr = le.reach; @@ -2670,6 +2674,52 @@ void writeLookaroundTables(build_context &bc, RoseEngine &proto) { ++look; reach += REACH_BITVECTOR_LEN; } +} + +static +void writeMultipathLookaround(const vector> &multi_look, + s8 *&look, u8 *&reach) { + for (const auto &m : multi_look) { + u8 u = 0; + assert(m.size() == MAX_LOOKAROUND_PATHS); + for (size_t i = 0; i < m.size(); i++) { + if (m[i].reach.none()) { + u |= (u8)1U << i; + } + } + std::fill_n(reach, MULTI_REACH_BITVECTOR_LEN, u); + + for (size_t i = 0; i < m.size(); i++) { + const CharReach &cr = m[i].reach; + if (cr.none()) { + continue; + } + *look = m[i].offset; + + for (size_t c = cr.find_first(); c != cr.npos; + c = cr.find_next(c)) { + reach[c] |= (u8)1U << i; + } + } + + ++look; + reach += MULTI_REACH_BITVECTOR_LEN; + } +} + +static +void writeLookaroundTables(build_context &bc, RoseEngine &proto) { + vector look_table(bc.lookTableSize, 0); + vector reach_table(bc.reachTableSize, 0); + s8 *look = look_table.data(); + u8 *reach = reach_table.data(); + for (const auto &l : bc.lookaround) { + if (l.size() == 1) { + writeLookaround(l.front(), look, reach); + } else { + writeMultipathLookaround(l, look, reach); + } + } proto.lookaroundTableOffset = bc.engine_blob.add_range(look_table); proto.lookaroundReachOffset = bc.engine_blob.add_range(reach_table); @@ -2804,30 +2854,37 @@ bool onlyAtEod(const RoseBuildImpl &tbi, RoseVertex v) { } static -u32 addLookaround(build_context &bc, const vector &look) { +void addLookaround(build_context &bc, + const vector> &look, + u32 &look_index, u32 &reach_index) { // Check the cache. auto it = bc.lookaround_cache.find(look); if (it != bc.lookaround_cache.end()) { - DEBUG_PRINTF("reusing look at idx %zu\n", it->second); - return verify_u32(it->second); + look_index = verify_u32(it->second.first); + reach_index = verify_u32(it->second.second); + DEBUG_PRINTF("reusing look at idx %u\n", look_index); + DEBUG_PRINTF("reusing reach at idx %u\n", reach_index); + return; } - // Linear scan for sequence. - auto seq_it = search(begin(bc.lookaround), end(bc.lookaround), begin(look), - end(look)); - if (seq_it != end(bc.lookaround)) { - size_t idx = distance(begin(bc.lookaround), seq_it); - DEBUG_PRINTF("linear scan found look at idx %zu\n", idx); - bc.lookaround_cache.emplace(look, idx); - return verify_u32(idx); + size_t look_idx = bc.lookTableSize; + size_t reach_idx = bc.reachTableSize; + + if (look.size() == 1) { + bc.lookTableSize += look.front().size(); + bc.reachTableSize += look.front().size() * REACH_BITVECTOR_LEN; + } else { + bc.lookTableSize += look.size(); + bc.reachTableSize += look.size() * MULTI_REACH_BITVECTOR_LEN; } - // New sequence. - size_t idx = bc.lookaround.size(); - bc.lookaround_cache.emplace(look, idx); - insert(&bc.lookaround, bc.lookaround.end(), look); - DEBUG_PRINTF("adding look at idx %zu\n", idx); - return verify_u32(idx); + bc.lookaround_cache.emplace(look, make_pair(look_idx, reach_idx)); + bc.lookaround.emplace_back(look); + + DEBUG_PRINTF("adding look at idx %zu\n", look_idx); + DEBUG_PRINTF("adding reach at idx %zu\n", reach_idx); + look_index = verify_u32(look_idx); + reach_index = verify_u32(reach_idx); } static @@ -2977,7 +3034,7 @@ struct cmpNibble { // Insert all pairs of bucket and offset into buckets. static really_inline void getAllBuckets(const vector &look, - map, cmpNibble> &buckets, u32 &neg_mask) { + map, cmpNibble> &buckets, u64a &neg_mask) { s32 base_offset = verify_s32(look.front().offset); for (const auto &entry : look) { CharReach cr = entry.reach; @@ -2985,7 +3042,7 @@ void getAllBuckets(const vector &look, if (cr.count() > 128 ) { cr.flip(); } else { - neg_mask ^= 1 << (entry.offset - base_offset); + neg_mask ^= 1ULL << (entry.offset - base_offset); } map lo2hi; // We treat Ascii Table as a 16x16 grid. @@ -3037,23 +3094,16 @@ void nibMaskUpdate(array &mask, u32 data, u8 bit_index) { } static -bool makeRoleShufti(const vector &look, - RoseProgram &program) { - - s32 base_offset = verify_s32(look.front().offset); - if (look.back().offset >= base_offset + 32) { - return false; - } - array hi_mask, lo_mask; - hi_mask.fill(0); - lo_mask.fill(0); - array bucket_select_hi, bucket_select_lo; - bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8. - bucket_select_lo.fill(0); - u8 bit_index = 0; // number of buckets +bool getShuftiMasks(const vector &look, array &hi_mask, + array &lo_mask, u8 *bucket_select_hi, + u8 *bucket_select_lo, u64a &neg_mask, + u8 &bit_idx, size_t len) { map nib; // map every bucket to its bucket number. map, cmpNibble> bucket2offsets; - u32 neg_mask = ~0u; + s32 base_offset = look.front().offset; + + bit_idx = 0; + neg_mask = ~0ULL; getAllBuckets(look, bucket2offsets, neg_mask); @@ -3061,15 +3111,15 @@ bool makeRoleShufti(const vector &look, u32 hi_lo = it.first; // New bucket. if (!nib[hi_lo]) { - if (bit_index >= 16) { + if ((bit_idx >= 8 && len == 64) || bit_idx >= 16) { return false; } - nib[hi_lo] = 1 << bit_index; + nib[hi_lo] = 1 << bit_idx; nibUpdate(nib, hi_lo); - nibMaskUpdate(hi_mask, hi_lo >> 16, bit_index); - nibMaskUpdate(lo_mask, hi_lo & 0xffff, bit_index); - bit_index++; + nibMaskUpdate(hi_mask, hi_lo >> 16, bit_idx); + nibMaskUpdate(lo_mask, hi_lo & 0xffff, bit_idx); + bit_idx++; } DEBUG_PRINTF("hi_lo %x bucket %x\n", hi_lo, nib[hi_lo]); @@ -3082,6 +3132,113 @@ bool makeRoleShufti(const vector &look, bucket_select_lo[offset - base_offset] |= nib_lo; } } + return true; +} + +static +unique_ptr +makeCheckShufti16x8(u32 offset_range, u8 bucket_idx, + const array &hi_mask, const array &lo_mask, + const array &bucket_select_mask, + u32 neg_mask, s32 base_offset, + const RoseInstruction *end_inst) { + if (offset_range > 16 || bucket_idx > 8) { + return nullptr; + } + array nib_mask; + array bucket_select_mask_16; + copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin()); + copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin() + 16); + copy(bucket_select_mask.begin(), bucket_select_mask.begin() + 16, + bucket_select_mask_16.begin()); + return make_unique + (nib_mask, bucket_select_mask_16, + neg_mask & 0xffff, base_offset, end_inst); +} + +static +unique_ptr +makeCheckShufti32x8(u32 offset_range, u8 bucket_idx, + const array &hi_mask, const array &lo_mask, + const array &bucket_select_mask, + u32 neg_mask, s32 base_offset, + const RoseInstruction *end_inst) { + if (offset_range > 32 || bucket_idx > 8) { + return nullptr; + } + + array hi_mask_16; + array lo_mask_16; + copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_16.begin()); + copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_16.begin()); + return make_unique + (hi_mask_16, lo_mask_16, bucket_select_mask, + neg_mask, base_offset, end_inst); +} + +static +unique_ptr +makeCheckShufti16x16(u32 offset_range, u8 bucket_idx, + const array &hi_mask, const array &lo_mask, + const array &bucket_select_mask_lo, + const array &bucket_select_mask_hi, + u32 neg_mask, s32 base_offset, + const RoseInstruction *end_inst) { + if (offset_range > 16 || bucket_idx > 16) { + return nullptr; + } + + array bucket_select_mask_32; + copy(bucket_select_mask_lo.begin(), bucket_select_mask_lo.begin() + 16, + bucket_select_mask_32.begin()); + copy(bucket_select_mask_hi.begin(), bucket_select_mask_hi.begin() + 16, + bucket_select_mask_32.begin() + 16); + return make_unique + (hi_mask, lo_mask, bucket_select_mask_32, + neg_mask & 0xffff, base_offset, end_inst); +} +static +unique_ptr +makeCheckShufti32x16(u32 offset_range, u8 bucket_idx, + const array &hi_mask, const array &lo_mask, + const array &bucket_select_mask_lo, + const array &bucket_select_mask_hi, + u32 neg_mask, s32 base_offset, + const RoseInstruction *end_inst) { + if (offset_range > 32 || bucket_idx > 16) { + return nullptr; + } + + return make_unique + (hi_mask, lo_mask, bucket_select_mask_hi, + bucket_select_mask_lo, neg_mask, base_offset, end_inst); +} + +static +bool makeRoleShufti(const vector &look, + RoseProgram &program) { + + s32 base_offset = verify_s32(look.front().offset); + if (look.back().offset >= base_offset + 32) { + return false; + } + + u8 bucket_idx = 0; // number of buckets + u64a neg_mask_64; + array hi_mask; + array lo_mask; + array bucket_select_hi; + array bucket_select_lo; + hi_mask.fill(0); + lo_mask.fill(0); + bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8. + bucket_select_lo.fill(0); + + if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi.data(), + bucket_select_lo.data(), neg_mask_64, bucket_idx, 32)) { + return false; + } + u32 neg_mask = (u32)neg_mask_64; DEBUG_PRINTF("hi_mask %s\n", convertMaskstoString(hi_mask.data(), 32).c_str()); @@ -3093,48 +3250,29 @@ bool makeRoleShufti(const vector &look, convertMaskstoString(bucket_select_lo.data(), 32).c_str()); const auto *end_inst = program.end_instruction(); - if (bit_index < 8) { - if (look.back().offset < base_offset + 16) { - neg_mask &= 0xffff; - array nib_mask; - array bucket_select_mask_16; - copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin()); - copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin() + 16); - copy(bucket_select_lo.begin(), bucket_select_lo.begin() + 16, - bucket_select_mask_16.begin()); - auto ri = make_unique - (nib_mask, bucket_select_mask_16, - neg_mask, base_offset, end_inst); - program.add_before_end(move(ri)); - } else { - array hi_mask_16; - array lo_mask_16; - copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_16.begin()); - copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_16.begin()); - auto ri = make_unique - (hi_mask_16, lo_mask_16, bucket_select_lo, - neg_mask, base_offset, end_inst); - program.add_before_end(move(ri)); - } - } else { - if (look.back().offset < base_offset + 16) { - neg_mask &= 0xffff; - array bucket_select_mask_32; - copy(bucket_select_lo.begin(), bucket_select_lo.begin() + 16, - bucket_select_mask_32.begin()); - copy(bucket_select_hi.begin(), bucket_select_hi.begin() + 16, - bucket_select_mask_32.begin() + 16); - auto ri = make_unique - (hi_mask, lo_mask, bucket_select_mask_32, - neg_mask, base_offset, end_inst); - program.add_before_end(move(ri)); - } else { - auto ri = make_unique - (hi_mask, lo_mask, bucket_select_hi, bucket_select_lo, - neg_mask, base_offset, end_inst); - program.add_before_end(move(ri)); - } + s32 offset_range = look.back().offset - base_offset + 1; + + auto ri = makeCheckShufti16x8(offset_range, bucket_idx, hi_mask, lo_mask, + bucket_select_lo, neg_mask, base_offset, + end_inst); + if (!ri) { + ri = makeCheckShufti32x8(offset_range, bucket_idx, hi_mask, lo_mask, + bucket_select_lo, neg_mask, base_offset, + end_inst); } + if (!ri) { + ri = makeCheckShufti16x16(offset_range, bucket_idx, hi_mask, lo_mask, + bucket_select_lo, bucket_select_hi, + neg_mask, base_offset, end_inst); + } + if (!ri) { + ri = makeCheckShufti32x16(offset_range, bucket_idx, hi_mask, lo_mask, + bucket_select_lo, bucket_select_hi, + neg_mask, base_offset, end_inst); + } + assert(ri); + program.add_before_end(move(ri)); + return true; } @@ -3153,9 +3291,13 @@ void makeLookaroundInstruction(build_context &bc, const vector &look, if (look.size() == 1) { s8 offset = look.begin()->offset; - u32 look_idx = addLookaround(bc, look); - auto ri = make_unique(offset, look_idx, - program.end_instruction()); + u32 look_idx, reach_idx; + vector> lookaround; + lookaround.emplace_back(look); + addLookaround(bc, lookaround, look_idx, reach_idx); + // We don't need look_idx here. + auto ri = make_unique(offset, reach_idx, + program.end_instruction()); program.add_before_end(move(ri)); return; } @@ -3172,10 +3314,242 @@ void makeLookaroundInstruction(build_context &bc, const vector &look, return; } - u32 look_idx = addLookaround(bc, look); + u32 look_idx, reach_idx; + vector> lookaround; + lookaround.emplace_back(look); + addLookaround(bc, lookaround, look_idx, reach_idx); u32 look_count = verify_u32(look.size()); - auto ri = make_unique(look_idx, look_count, + auto ri = make_unique(look_idx, reach_idx, + look_count, + program.end_instruction()); + program.add_before_end(move(ri)); +} + +#if defined(DEBUG) || defined(DUMP_SUPPORT) +static UNUSED +string dumpMultiLook(const vector &looks) { + ostringstream oss; + for (auto it = looks.begin(); it != looks.end(); ++it) { + if (it != looks.begin()) { + oss << ", "; + } + oss << "{" << int(it->offset) << ": " << describeClass(it->reach) << "}"; + } + return oss.str(); +} +#endif + +static +bool makeRoleMultipathShufti(const vector> &multi_look, + RoseProgram &program) { + if (multi_look.empty()) { + return false; + } + + // find the base offset + assert(!multi_look[0].empty()); + s32 base_offset = multi_look[0].front().offset; + s32 last_start = base_offset; + s32 end_offset = multi_look[0].back().offset; + size_t multi_len = 0; + + for (const auto &look : multi_look) { + assert(look.size() > 0); + multi_len += look.size(); + + LIMIT_TO_AT_MOST(&base_offset, look.front().offset); + ENSURE_AT_LEAST(&last_start, look.front().offset); + ENSURE_AT_LEAST(&end_offset, look.back().offset); + } + + assert(last_start < 0); + + if (end_offset - base_offset >= MULTIPATH_MAX_LEN) { + return false; + } + + if (multi_len <= 16) { + multi_len = 16; + } else if (multi_len <= 32) { + multi_len = 32; + } else if (multi_len <= 64) { + multi_len = 64; + } else { + DEBUG_PRINTF("too long for multi-path\n"); + return false; + } + + vector linear_look; + array data_select_mask; + data_select_mask.fill(0); + u64a hi_bits_mask = 0; + u64a lo_bits_mask = 0; + + for (const auto &look : multi_look) { + assert(linear_look.size() < 64); + lo_bits_mask |= 1LLU << linear_look.size(); + for (const auto &entry : look) { + assert(entry.offset - base_offset < MULTIPATH_MAX_LEN); + data_select_mask[linear_look.size()] = + verify_u8(entry.offset - base_offset); + linear_look.emplace_back(verify_s8(linear_look.size()), entry.reach); + } + hi_bits_mask |= 1LLU << (linear_look.size() - 1); + } + + u8 bit_index = 0; // number of buckets + u64a neg_mask; + array hi_mask; + array lo_mask; + array bucket_select_hi; + array bucket_select_lo; + hi_mask.fill(0); + lo_mask.fill(0); + bucket_select_hi.fill(0); + bucket_select_lo.fill(0); + + if (!getShuftiMasks(linear_look, hi_mask, lo_mask, bucket_select_hi.data(), + bucket_select_lo.data(), neg_mask, bit_index, + multi_len)) { + return false; + } + + DEBUG_PRINTF("hi_mask %s\n", + convertMaskstoString(hi_mask.data(), 16).c_str()); + DEBUG_PRINTF("lo_mask %s\n", + convertMaskstoString(lo_mask.data(), 16).c_str()); + DEBUG_PRINTF("bucket_select_hi %s\n", + convertMaskstoString(bucket_select_hi.data(), 64).c_str()); + DEBUG_PRINTF("bucket_select_lo %s\n", + convertMaskstoString(bucket_select_lo.data(), 64).c_str()); + DEBUG_PRINTF("data_select_mask %s\n", + convertMaskstoString(data_select_mask.data(), 64).c_str()); + DEBUG_PRINTF("hi_bits_mask %llx\n", hi_bits_mask); + DEBUG_PRINTF("lo_bits_mask %llx\n", lo_bits_mask); + DEBUG_PRINTF("neg_mask %llx\n", neg_mask); + DEBUG_PRINTF("base_offset %d\n", base_offset); + DEBUG_PRINTF("last_start %d\n", last_start); + + // Since we don't have 16x16 now, just call 32x16 instead. + if (bit_index > 8) { + assert(multi_len <= 32); + multi_len = 32; + } + + const auto *end_inst = program.end_instruction(); + assert(multi_len == 16 || multi_len == 32 || multi_len == 64); + if (multi_len == 16) { + neg_mask &= 0xffff; + assert(!(hi_bits_mask & ~0xffffULL)); + assert(!(lo_bits_mask & ~0xffffULL)); + assert(bit_index <=8); + array nib_mask; + copy(begin(lo_mask), begin(lo_mask) + 16, nib_mask.begin()); + copy(begin(hi_mask), begin(hi_mask) + 16, nib_mask.begin() + 16); + + auto ri = make_unique + (nib_mask, bucket_select_lo, data_select_mask, hi_bits_mask, + lo_bits_mask, neg_mask, base_offset, last_start, end_inst); + program.add_before_end(move(ri)); + } else if (multi_len == 32) { + neg_mask &= 0xffffffff; + assert(!(hi_bits_mask & ~0xffffffffULL)); + assert(!(lo_bits_mask & ~0xffffffffULL)); + if (bit_index <= 8) { + auto ri = make_unique + (hi_mask, lo_mask, bucket_select_lo, data_select_mask, + hi_bits_mask, lo_bits_mask, neg_mask, base_offset, + last_start, end_inst); + program.add_before_end(move(ri)); + } else { + auto ri = make_unique + (hi_mask, lo_mask, bucket_select_hi, bucket_select_lo, + data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask, + base_offset, last_start, end_inst); + program.add_before_end(move(ri)); + } + } else { + auto ri = make_unique + (hi_mask, lo_mask, bucket_select_lo, data_select_mask, + hi_bits_mask, lo_bits_mask, neg_mask, base_offset, + last_start, end_inst); + program.add_before_end(move(ri)); + } + return true; +} + +static +void makeRoleMultipathLookaround(build_context &bc, + const vector> &multi_look, + RoseProgram &program) { + assert(!multi_look.empty()); + assert(multi_look.size() <= MAX_LOOKAROUND_PATHS); + vector> ordered_look; + set look_offset; + + assert(!multi_look[0].empty()); + s32 last_start = multi_look[0][0].offset; + + // build offset table. + for (const auto &look : multi_look) { + assert(look.size() > 0); + last_start = max(last_start, (s32)look.begin()->offset); + + for (const auto &t : look) { + look_offset.insert(t.offset); + } + } + + array start_mask; + if (multi_look.size() < MAX_LOOKAROUND_PATHS) { + start_mask.fill((1 << multi_look.size()) - 1); + } else { + start_mask.fill(0xff); + } + + u32 path_idx = 0; + for (const auto &look : multi_look) { + for (const auto &t : look) { + assert(t.offset >= (int)*look_offset.begin()); + size_t update_offset = t.offset - *look_offset.begin() + 1; + if (update_offset < start_mask.size()) { + start_mask[update_offset] &= ~(1 << path_idx); + } + } + path_idx++; + } + + for (u32 i = 1; i < MULTIPATH_MAX_LEN; i++) { + start_mask[i] &= start_mask[i - 1]; + DEBUG_PRINTF("start_mask[%u] = %x\n", i, start_mask[i]); + } + + assert(look_offset.size() <= MULTIPATH_MAX_LEN); + + assert(last_start < 0); + + for (const auto &offset : look_offset) { + vector multi_entry; + multi_entry.resize(MAX_LOOKAROUND_PATHS); + + for (size_t i = 0; i < multi_look.size(); i++) { + for (const auto &t : multi_look[i]) { + if (t.offset == offset) { + multi_entry[i] = t; + } + } + } + ordered_look.emplace_back(multi_entry); + } + + u32 look_idx, reach_idx; + addLookaround(bc, ordered_look, look_idx, reach_idx); + u32 look_count = verify_u32(ordered_look.size()); + + auto ri = make_unique(look_idx, reach_idx, + look_count, last_start, + start_mask, program.end_instruction()); program.add_before_end(move(ri)); } @@ -3187,25 +3561,34 @@ void makeRoleLookaround(const RoseBuildImpl &build, build_context &bc, return; } - vector look; + vector> looks; // Lookaround from leftfix (mandatory). if (contains(bc.leftfix_info, v) && bc.leftfix_info.at(v).has_lookaround) { DEBUG_PRINTF("using leftfix lookaround\n"); - look = bc.leftfix_info.at(v).lookaround; + looks = bc.leftfix_info.at(v).lookaround; } // We may be able to find more lookaround info (advisory) and merge it // in. - vector look_more; - findLookaroundMasks(build, v, look_more); - mergeLookaround(look, look_more); - - if (look.empty()) { + if (looks.size() <= 1) { + vector look; + vector look_more; + if (!looks.empty()) { + look = move(looks.front()); + } + findLookaroundMasks(build, v, look_more); + mergeLookaround(look, look_more); + if (!look.empty()) { + makeLookaroundInstruction(bc, look, program); + } return; } - makeLookaroundInstruction(bc, look, program); + if (!makeRoleMultipathShufti(looks, program)) { + assert(looks.size() <= 8); + makeRoleMultipathLookaround(bc, looks, program); + } } static diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 0e53d59d..30dccb1a 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -569,10 +569,20 @@ static CharReach bitvectorToReach(const u8 *reach) { CharReach cr; - for (size_t i = 0; i < 256; i++) { + for (size_t i = 0; i < N_CHARS; i++) { if (reach[i / 8] & (1U << (i % 8))) { cr.set(i); + } + } + return cr; +} +static +CharReach multiBitvectorToReach(const u8 *reach, u8 path_mask) { + CharReach cr; + for (size_t i = 0; i < N_CHARS; i++) { + if (reach[i] & path_mask) { + cr.set(i); } } return cr; @@ -587,9 +597,9 @@ void dumpLookaround(ofstream &os, const RoseEngine *t, const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset); const u8 *reach_base = base + t->lookaroundReachOffset; - const s8 *look = look_base + ri->index; + const s8 *look = look_base + ri->look_index; const s8 *look_end = look + ri->count; - const u8 *reach = reach_base + ri->index * REACH_BITVECTOR_LEN; + const u8 *reach = reach_base + ri->reach_index; os << " contents:" << endl; @@ -601,6 +611,41 @@ void dumpLookaround(ofstream &os, const RoseEngine *t, } } +static +void dumpMultipathLookaround(ofstream &os, const RoseEngine *t, + const ROSE_STRUCT_MULTIPATH_LOOKAROUND *ri) { + assert(ri); + + const u8 *base = (const u8 *)t; + const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset); + const u8 *reach_base = base + t->lookaroundReachOffset; + + const s8 *look_begin = look_base + ri->look_index; + const s8 *look_end = look_begin + ri->count; + const u8 *reach_begin = reach_base + ri->reach_index; + + os << " contents:" << endl; + + u32 path_mask = ri->start_mask[0]; + while (path_mask) { + u32 path = findAndClearLSB_32(&path_mask); + os << " Path #" << path << ":" << endl; + os << " "; + + const s8 *look = look_begin; + const u8 *reach = reach_begin; + for (; look < look_end; look++, reach += MULTI_REACH_BITVECTOR_LEN) { + CharReach cr = multiBitvectorToReach(reach, 1U << path); + if (cr.any() && !cr.all()) { + os << "<" << int(*look) << ": "; + describeClass(os, cr, 1000, CC_OUT_TEXT); + os << "> "; + } + } + os << endl; + } +} + static vector sparseIterValues(const mmbit_sparse_iter *it, u32 num_bits) { vector keys; @@ -666,7 +711,126 @@ string dumpStrMask(const u8 *mask, size_t len) { return oss.str(); } -#define PROGRAM_CASE(name) \ +static +CharReach shufti2cr(const u8 *lo, const u8 *hi, u8 bucket_mask) { + CharReach cr; + for (u32 i = 0; i < N_CHARS; i++) { + if(lo[i & 0xf] & hi[i >> 4] & bucket_mask) { + cr.set(i); + } + } + return cr; +} + +static +void dumpLookaroundShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi, + const u8 *bucket_mask, u32 neg_mask, s32 offset) { + assert(len == 16 || len == 32); + os << " contents:" << endl; + for (u32 idx = 0; idx < len; idx++) { + CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]); + + if (neg_mask & (1U << idx)) { + cr.flip(); + } + + if (cr.any() && !cr.all()) { + os << " " << std::setw(4) << std::setfill(' ') + << int(offset + idx) << ": "; + describeClass(os, cr, 1000, CC_OUT_TEXT); + os << endl; + } + } +} + +static +void dumpLookaroundShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi, + const u8 *lo_2, const u8 *hi_2, const u8 *bucket_mask, + const u8 *bucket_mask_2, u32 neg_mask, s32 offset) { + assert(len == 16 || len == 32); + os << " contents:" << endl; + for (u32 idx = 0; idx < len; idx++) { + CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]); + cr |= shufti2cr(lo_2, hi_2, bucket_mask_2[idx]); + + if (neg_mask & (1U << idx)) { + cr.flip(); + } + + if (cr.any() && !cr.all()) { + os << " " << std::setw(4) << std::setfill(' ') + << int(offset + idx) << ": "; + describeClass(os, cr, 1000, CC_OUT_TEXT); + os << endl; + } + } +} + +static +void dumpMultipathShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi, + const u8 *bucket_mask, const u8 *data_offset, + u64a neg_mask, s32 base_offset) { + assert(len == 16 || len == 32 || len == 64); + os << " contents:" << endl; + u32 path = 0; + for (u32 idx = 0; idx < len; idx++) { + CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]); + + if (neg_mask & (1ULL << idx)) { + cr.flip(); + } + + if (cr.any() && !cr.all()) { + if (idx == 0 || data_offset[idx - 1] > data_offset[idx]) { + path++; + if (idx) { + os << endl; + } + os << " Path #" << path << ":" << endl; + os << " "; + } + + os << "<" << int(base_offset + data_offset[idx]) << ": "; + describeClass(os, cr, 1000, CC_OUT_TEXT); + os << "> "; + } + } + os << endl; +} + +static +void dumpMultipathShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi, + const u8 *lo_2, const u8 *hi_2, const u8 *bucket_mask, + const u8 *bucket_mask_2, const u8 *data_offset, + u32 neg_mask, s32 base_offset) { + assert(len == 16 || len == 32 || len == 64); + os << " contents:"; + u32 path = 0; + for (u32 idx = 0; idx < len; idx++) { + CharReach cr = shufti2cr(lo, hi, bucket_mask[idx]); + cr |= shufti2cr(lo_2, hi_2, bucket_mask_2[idx]); + + if (neg_mask & (1ULL << idx)) { + cr.flip(); + } + + if (cr.any() && !cr.all()) { + if (idx == 0 || data_offset[idx - 1] > data_offset[idx]) { + path++; + os << endl; + os << " Path #" << path << ":" << endl; + os << " "; + } + + os << "<" << int(base_offset + data_offset[idx]) << ": "; + describeClass(os, cr, 1000, CC_OUT_TEXT); + os << "> "; + } + } + os << endl; +} + + #define PROGRAM_CASE(name) \ case ROSE_INSTR_##name: { \ os << " " << std::setw(4) << std::setfill('0') << (pc - pc_base) \ << ": " #name " (" << (int)ROSE_INSTR_##name << ")" << endl; \ @@ -741,7 +905,8 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_LOOKAROUND) { - os << " index " << ri->index << endl; + os << " look_index " << ri->look_index << endl; + os << " reach_index " << ri->reach_index << endl; os << " count " << ri->count << endl; os << " fail_jump " << offset + ri->fail_jump << endl; dumpLookaround(os, t, ri); @@ -795,8 +960,13 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { << dumpStrMask(ri->bucket_select_mask, sizeof(ri->bucket_select_mask)) << endl; + os << " neg_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->neg_mask << std::dec << endl; os << " offset " << ri->offset << endl; os << " fail_jump " << offset + ri->fail_jump << endl; + dumpLookaroundShufti(os, 16, ri->nib_mask, ri->nib_mask + 16, + ri->bucket_select_mask, ri->neg_mask, + ri->offset); } PROGRAM_NEXT_INSTRUCTION @@ -811,8 +981,13 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { << dumpStrMask(ri->bucket_select_mask, sizeof(ri->bucket_select_mask)) << endl; + os << " neg_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->neg_mask << std::dec << endl; os << " offset " << ri->offset << endl; os << " fail_jump " << offset + ri->fail_jump << endl; + dumpLookaroundShufti(os, 32, ri->lo_mask, ri->hi_mask, + ri->bucket_select_mask, ri->neg_mask, + ri->offset); } PROGRAM_NEXT_INSTRUCTION @@ -827,8 +1002,15 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { << dumpStrMask(ri->bucket_select_mask, sizeof(ri->bucket_select_mask)) << endl; + os << " neg_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->neg_mask << std::dec << endl; os << " offset " << ri->offset << endl; os << " fail_jump " << offset + ri->fail_jump << endl; + dumpLookaroundShufti(os, 16, ri->lo_mask, ri->hi_mask, + ri->lo_mask + 16, ri->hi_mask + 16, + ri->bucket_select_mask, + ri->bucket_select_mask + 16, + ri->neg_mask, ri->offset); } PROGRAM_NEXT_INSTRUCTION @@ -847,8 +1029,15 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { << dumpStrMask(ri->bucket_select_mask_lo, sizeof(ri->bucket_select_mask_lo)) << endl; + os << " neg_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->neg_mask << std::dec << endl; os << " offset " << ri->offset << endl; os << " fail_jump " << offset + ri->fail_jump << endl; + dumpLookaroundShufti(os, 32, ri->lo_mask, ri->hi_mask, + ri->lo_mask + 16, ri->hi_mask + 16, + ri->bucket_select_mask_lo, + ri->bucket_select_mask_hi, + ri->neg_mask, ri->offset); } PROGRAM_NEXT_INSTRUCTION @@ -1103,6 +1292,146 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { PROGRAM_CASE(CLEAR_WORK_DONE) {} PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(MULTIPATH_LOOKAROUND) { + os << " look_index " << ri->look_index << endl; + os << " reach_index " << ri->reach_index << endl; + os << " count " << ri->count << endl; + os << " last_start " << ri->last_start << endl; + os << " start_mask " + << dumpStrMask(ri->start_mask, sizeof(ri->start_mask)) + << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + dumpMultipathLookaround(os, t, ri); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_16x8) { + os << " nib_mask " + << dumpStrMask(ri->nib_mask, sizeof(ri->nib_mask)) + << endl; + os << " bucket_select_mask " + << dumpStrMask(ri->bucket_select_mask, + sizeof(ri->bucket_select_mask)) + << endl; + os << " data_select_mask " + << dumpStrMask(ri->data_select_mask, + sizeof(ri->data_select_mask)) + << endl; + os << " hi_bits_mask 0x" << std::hex << std::setw(4) + << std::setfill('0') << ri->hi_bits_mask << std::dec << endl; + os << " lo_bits_mask 0x" << std::hex << std::setw(4) + << std::setfill('0') << ri->lo_bits_mask << std::dec << endl; + os << " neg_mask 0x" << std::hex << std::setw(4) + << std::setfill('0') << ri->neg_mask << std::dec << endl; + os << " base_offset " << ri->base_offset << endl; + os << " last_start " << ri->last_start << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + dumpMultipathShufti(os, 16, ri->nib_mask, ri->nib_mask + 16, + ri->bucket_select_mask, + ri->data_select_mask, + ri->neg_mask, ri->base_offset); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x8) { + os << " hi_mask " + << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) + << endl; + os << " lo_mask " + << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) + << endl; + os << " bucket_select_mask " + << dumpStrMask(ri->bucket_select_mask, + sizeof(ri->bucket_select_mask)) + << endl; + os << " data_select_mask " + << dumpStrMask(ri->data_select_mask, + sizeof(ri->data_select_mask)) + << endl; + os << " hi_bits_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->hi_bits_mask << std::dec << endl; + os << " lo_bits_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->lo_bits_mask << std::dec << endl; + os << " neg_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->neg_mask << std::dec << endl; + os << " base_offset " << ri->base_offset << endl; + os << " last_start " << ri->last_start << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + dumpMultipathShufti(os, 32, ri->lo_mask, ri->hi_mask, + ri->bucket_select_mask, + ri->data_select_mask, + ri->neg_mask, ri->base_offset); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x16) { + os << " hi_mask " + << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) + << endl; + os << " lo_mask " + << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) + << endl; + os << " bucket_select_mask_hi " + << dumpStrMask(ri->bucket_select_mask_hi, + sizeof(ri->bucket_select_mask_hi)) + << endl; + os << " bucket_select_mask_lo " + << dumpStrMask(ri->bucket_select_mask_lo, + sizeof(ri->bucket_select_mask_lo)) + << endl; + os << " data_select_mask " + << dumpStrMask(ri->data_select_mask, + sizeof(ri->data_select_mask)) + << endl; + os << " hi_bits_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->hi_bits_mask << std::dec << endl; + os << " lo_bits_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->lo_bits_mask << std::dec << endl; + os << " neg_mask 0x" << std::hex << std::setw(8) + << std::setfill('0') << ri->neg_mask << std::dec << endl; + os << " base_offset " << ri->base_offset << endl; + os << " last_start " << ri->last_start << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + dumpMultipathShufti(os, 32, ri->lo_mask, ri->hi_mask, + ri->lo_mask + 16, ri->hi_mask + 16, + ri->bucket_select_mask_lo, + ri->bucket_select_mask_hi, + ri->data_select_mask, + ri->neg_mask, ri->base_offset); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_64) { + os << " hi_mask " + << dumpStrMask(ri->hi_mask, sizeof(ri->hi_mask)) + << endl; + os << " lo_mask " + << dumpStrMask(ri->lo_mask, sizeof(ri->lo_mask)) + << endl; + os << " bucket_select_mask " + << dumpStrMask(ri->bucket_select_mask, + sizeof(ri->bucket_select_mask)) + << endl; + os << " data_select_mask " + << dumpStrMask(ri->data_select_mask, + sizeof(ri->data_select_mask)) + << endl; + os << " hi_bits_mask 0x" << std::hex << std::setw(16) + << std::setfill('0') << ri->hi_bits_mask << std::dec << endl; + os << " lo_bits_mask 0x" << std::hex << std::setw(16) + << std::setfill('0') << ri->lo_bits_mask << std::dec << endl; + os << " neg_mask 0x" << std::hex << std::setw(16) + << std::setfill('0') << ri->neg_mask << std::dec << endl; + os << " base_offset " << ri->base_offset << endl; + os << " last_start " << ri->last_start << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + dumpMultipathShufti(os, 64, ri->lo_mask, ri->hi_mask, + ri->bucket_select_mask, + ri->data_select_mask, + ri->neg_mask, ri->base_offset); + } + PROGRAM_NEXT_INSTRUCTION + default: os << " UNKNOWN (code " << int{code} << ")" << endl; os << " " << endl; diff --git a/src/rose/rose_build_lookaround.cpp b/src/rose/rose_build_lookaround.cpp index ae990f7f..07ab7c59 100644 --- a/src/rose/rose_build_lookaround.cpp +++ b/src/rose/rose_build_lookaround.cpp @@ -45,6 +45,7 @@ #include #include +#include using namespace std; @@ -62,6 +63,20 @@ static const u32 MAX_LOOKAROUND_ENTRIES = 16; /** \brief We would rather have lookarounds with smaller reach than this. */ static const u32 LOOKAROUND_WIDE_REACH = 200; +#if defined(DEBUG) || defined(DUMP_SUPPORT) +static UNUSED +string dump(const map &look) { + ostringstream oss; + for (auto it = look.begin(), ite = look.end(); it != ite; ++it) { + if (it != look.begin()) { + oss << ", "; + } + oss << "{" << it->first << ": " << describeClass(it->second) << "}"; + } + return oss.str(); +} +#endif + static void getForwardReach(const NGHolder &g, u32 top, map &look) { ue2::flat_set curr, next; @@ -298,21 +313,6 @@ void findBackwardReach(const RoseGraph &g, const RoseVertex v, // TODO: implement DFA variants if necessary. } -#if defined(DEBUG) || defined(DUMP_SUPPORT) -#include -static UNUSED -string dump(const map &look) { - ostringstream oss; - for (auto it = look.begin(), ite = look.end(); it != ite; ++it) { - if (it != look.begin()) { - oss << ", "; - } - oss << "{" << it->first << ": " << describeClass(it->second) << "}"; - } - return oss.str(); -} -#endif - static void normalise(map &look) { // We can erase entries where the reach is "all characters". @@ -554,6 +554,76 @@ void trimLiterals(const RoseBuildImpl &build, const RoseVertex v, DEBUG_PRINTF("post-trim lookaround: %s\n", dump(look).c_str()); } +static +void normaliseLeftfix(map &look) { + // We can erase entries where the reach is "all characters", except for the + // very first one -- this might be required to establish a minimum bound on + // the literal's match offset. + + // TODO: It would be cleaner to use a literal program instruction to check + // the minimum bound explicitly. + + if (look.empty()) { + return; + } + + const auto earliest = begin(look)->first; + + vector dead; + for (const auto &m : look) { + if (m.second.all() && m.first != earliest) { + dead.push_back(m.first); + } + } + erase_all(&look, dead); +} + +static +bool trimMultipathLeftfix(const RoseBuildImpl &build, const RoseVertex v, + vector> &looks) { + size_t path_count = 0; + for (auto &look : looks) { + ++path_count; + DEBUG_PRINTF("Path #%ld\n", path_count); + + assert(!look.empty()); + trimLiterals(build, v, look); + + if (look.empty()) { + return false; + } + + // Could be optimized here, just keep the empty byte of the longest path + normaliseLeftfix(look); + + if (look.size() > MAX_LOOKAROUND_ENTRIES) { + DEBUG_PRINTF("lookaround too big (%zu entries)\n", look.size()); + return false; + } + } + return true; +} + +static +void transToLookaround(const vector> &looks, + vector> &lookarounds) { + for (const auto &look : looks) { + vector lookaround; + DEBUG_PRINTF("lookaround: %s\n", dump(look).c_str()); + lookaround.reserve(look.size()); + for (const auto &m : look) { + if (m.first < -128 || m.first > 127) { + DEBUG_PRINTF("range too big\n"); + lookarounds.clear(); + return; + } + s8 offset = verify_s8(m.first); + lookaround.emplace_back(offset, m.second); + } + lookarounds.push_back(lookaround); + } +} + void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v, vector &lookaround) { lookaround.clear(); @@ -592,115 +662,155 @@ void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v, } static -bool hasSingleFloatingStart(const NGHolder &g) { - NFAVertex initial = NGHolder::null_vertex(); - for (auto v : adjacent_vertices_range(g.startDs, g)) { - if (v == g.startDs) { - continue; - } - if (initial != NGHolder::null_vertex()) { - DEBUG_PRINTF("more than one start\n"); - return false; - } - initial = v; - } +bool checkShuftiBuckets(const vector> &looks, + u32 bucket_size) { + set bucket; + for (const auto &look : looks) { + for (const auto &l : look) { + CharReach cr = l.second; + if (cr.count() > 128) { + cr.flip(); + } + map lo2hi; - if (initial == NGHolder::null_vertex()) { - DEBUG_PRINTF("no floating starts\n"); - return false; - } + for (size_t i = cr.find_first(); i != CharReach::npos;) { + u8 it_hi = i >> 4; + u16 low_encode = 0; + while (i != CharReach::npos && (i >> 4) == it_hi) { + low_encode |= 1 << (i &0xf); + i = cr.find_next(i); + } + lo2hi[low_encode] |= 1 << it_hi; + } - // Anchored start must have no successors other than startDs and initial. - for (auto v : adjacent_vertices_range(g.start, g)) { - if (v != initial && v != g.startDs) { - DEBUG_PRINTF("anchored start\n"); - return false; + for (const auto &it : lo2hi) { + u32 hi_lo = (it.second << 16) | it.first; + bucket.insert(hi_lo); + } } } - - return true; + DEBUG_PRINTF("shufti has %lu bucket(s)\n", bucket.size()); + return bucket.size() <= bucket_size; } static -bool getTransientPrefixReach(const NGHolder &g, u32 lag, - map &look) { - if (in_degree(g.accept, g) != 1) { - DEBUG_PRINTF("more than one accept\n"); +bool getTransientPrefixReach(const NGHolder &g, ReportID report, u32 lag, + vector> &looks) { + if (!isAcyclic(g)) { + DEBUG_PRINTF("contains back-edge\n"); return false; } - // Must be a floating chain wired to startDs. - if (!hasSingleFloatingStart(g)) { - DEBUG_PRINTF("not a single floating start\n"); + // Must be floating chains wired to startDs. + if (!isFloating(g)) { + DEBUG_PRINTF("not a floating start\n"); return false; } - NFAVertex v = *(inv_adjacent_vertices(g.accept, g).first); - u32 i = lag + 1; - while (v != g.startDs) { - DEBUG_PRINTF("i=%u, v=%zu\n", i, g[v].index); - if (is_special(v, g)) { - DEBUG_PRINTF("special\n"); + vector curr; + for (auto v : inv_adjacent_vertices_range(g.accept, g)) { + if (v == g.start || v == g.startDs) { + DEBUG_PRINTF("empty graph\n"); + return true; + } + if (contains(g[v].reports, report)) { + curr.push_back(v); + } + } + + assert(!curr.empty()); + + u32 total_len = curr.size(); + + for (const auto &v : curr) { + looks.emplace_back(map()); + looks.back()[0 - (lag + 1)] = g[v].char_reach; + } + + bool curr_active = false; + + /* For each offset -i, we backwardly trace the path by vertices in curr. + * Once there are more than 8 paths and more than 64 bits total_len, + * which means that neither MULTIPATH_LOOKAROUND nor MULTIPATH_SHUFTI + * could be successfully built, we will give up the path finding. + * Otherwise, the loop will halt when all vertices in curr are startDs. + */ + for (u32 i = lag + 2; i < (lag + 2) + MAX_BACK_LEN; i++) { + curr_active = false; + size_t curr_size = curr.size(); + if (curr.size() > 1 && i > lag + MULTIPATH_MAX_LEN) { + DEBUG_PRINTF("range is larger than 16 in multi-path\n"); return false; } - look[0 - i] = g[v].char_reach; - - NFAVertex next = NGHolder::null_vertex(); - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (u == g.start) { - continue; // Benign, checked by hasSingleFloatingStart - } - if (next == NGHolder::null_vertex()) { - next = u; + for (size_t idx = 0; idx < curr_size; idx++) { + NFAVertex v = curr[idx]; + if (v == g.startDs) { continue; } - DEBUG_PRINTF("branch\n"); - return false; - } + assert(!is_special(v, g)); - if (next == NGHolder::null_vertex() || next == v) { - DEBUG_PRINTF("no predecessor or only self-loop\n"); - // This graph is malformed -- all vertices in a graph that makes it - // to this analysis should have predecessors. - assert(0); - return false; - } + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (u == g.start || u == g.startDs) { + curr[idx] = g.startDs; + break; + } + } - v = next; - i++; + if (is_special(curr[idx], g)) { + continue; + } + + for (auto u : inv_adjacent_vertices_range(v, g)) { + curr_active = true; + if (curr[idx] == v) { + curr[idx] = u; + looks[idx][0 - i] = g[u].char_reach; + total_len++; + } else { + curr.push_back(u); + looks.push_back(looks[idx]); + (looks.back())[0 - i] = g[u].char_reach; + total_len += looks.back().size(); + } + + if (curr.size() > MAX_LOOKAROUND_PATHS && total_len > 64) { + DEBUG_PRINTF("too many branches\n"); + return false; + } + } + } + if (!curr_active) { + break; + } } + if (curr_active) { + DEBUG_PRINTF("single path too long\n"); + return false; + } + + // More than 8 paths, check multi-path shufti. + if (curr.size() > MAX_LOOKAROUND_PATHS) { + u32 bucket_size = total_len > 32 ? 8 : 16; + if (!checkShuftiBuckets(looks, bucket_size)) { + DEBUG_PRINTF("shufti has too many buckets\n"); + return false; + } + } + + assert(!looks.empty()); + if (looks.size() == 1) { + DEBUG_PRINTF("single lookaround\n"); + } else { + DEBUG_PRINTF("multi-path lookaround\n"); + } DEBUG_PRINTF("done\n"); return true; } -static -void normaliseLeftfix(map &look) { - // We can erase entries where the reach is "all characters", except for the - // very first one -- this might be required to establish a minimum bound on - // the literal's match offset. - - // TODO: It would be cleaner to use a literal program instruction to check - // the minimum bound explicitly. - - if (look.empty()) { - return; - } - - const auto earliest = begin(look)->first; - - vector dead; - for (const auto &m : look) { - if (m.second.all() && m.first != earliest) { - dead.push_back(m.first); - } - } - erase_all(&look, dead); -} - bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v, - vector &lookaround) { + vector> &lookaround) { lookaround.clear(); const RoseGraph &g = build.g; @@ -716,36 +826,19 @@ bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v, return false; } - map look; - if (!getTransientPrefixReach(*leftfix.graph(), g[v].left.lag, look)) { - DEBUG_PRINTF("not a chain\n"); + vector> looks; + if (!getTransientPrefixReach(*leftfix.graph(), g[v].left.leftfix_report, + g[v].left.lag, looks)) { + DEBUG_PRINTF("graph has loop or too large\n"); return false; } - trimLiterals(build, v, look); - normaliseLeftfix(look); - - if (look.size() > MAX_LOOKAROUND_ENTRIES) { - DEBUG_PRINTF("lookaround too big (%zu entries)\n", look.size()); + if (!trimMultipathLeftfix(build, v, looks)) { return false; } + transToLookaround(looks, lookaround); - if (look.empty()) { - DEBUG_PRINTF("lookaround empty; this is weird\n"); - return false; - } - - lookaround.reserve(look.size()); - for (const auto &m : look) { - if (m.first < -128 || m.first > 127) { - DEBUG_PRINTF("range too big\n"); - return false; - } - s8 offset = verify_s8(m.first); - lookaround.emplace_back(offset, m.second); - } - - return true; + return !lookaround.empty(); } void mergeLookaround(vector &lookaround, diff --git a/src/rose/rose_build_lookaround.h b/src/rose/rose_build_lookaround.h index 993bd229..aea87ccf 100644 --- a/src/rose/rose_build_lookaround.h +++ b/src/rose/rose_build_lookaround.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,6 +36,9 @@ #include +/** \brief Max path number for multi-path lookaround. */ +#define MAX_LOOKAROUND_PATHS 8 + namespace ue2 { class CharReach; @@ -44,6 +47,7 @@ class RoseBuildImpl; /** \brief Lookaround entry prototype, describing the reachability at a given * distance from the end of a role match. */ struct LookEntry { + LookEntry() : offset(0) {} LookEntry(s8 offset_in, const CharReach &reach_in) : offset(offset_in), reach(reach_in) {} s8 offset; //!< offset from role match location. @@ -63,7 +67,7 @@ size_t hash_value(const LookEntry &l) { } void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v, - std::vector &lookaround); + std::vector &look_more); /** * \brief If possible, render the prefix of the given vertex as a lookaround. @@ -72,7 +76,7 @@ void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v, * it can be satisfied with a lookaround alone. */ bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v, - std::vector &lookaround); + std::vector> &lookaround); void mergeLookaround(std::vector &lookaround, const std::vector &more_lookaround); diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index 1c0fd2ab..9e030e8e 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -127,7 +127,8 @@ void RoseInstrCheckLookaround::write(void *dest, RoseEngineBlob &blob, const OffsetMap &offset_map) const { RoseInstrBase::write(dest, blob, offset_map); auto *inst = static_cast(dest); - inst->index = index; + inst->look_index = look_index; + inst->reach_index = reach_index; inst->count = count; inst->fail_jump = calc_jump(offset_map, this, target); } @@ -537,6 +538,93 @@ void RoseInstrCheckMedLitNocase::write(void *dest, RoseEngineBlob &blob, inst->fail_jump = calc_jump(offset_map, this, target); } +void RoseInstrMultipathLookaround::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->look_index = look_index; + inst->reach_index = reach_index; + inst->count = count; + inst->last_start = last_start; + copy(begin(start_mask), end(start_mask), inst->start_mask); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMultipathShufti16x8::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) + const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(nib_mask), end(nib_mask), inst->nib_mask); + copy(begin(bucket_select_mask), begin(bucket_select_mask) + 16, + inst->bucket_select_mask); + copy(begin(data_select_mask), begin(data_select_mask) + 16, + inst->data_select_mask); + inst->hi_bits_mask = hi_bits_mask; + inst->lo_bits_mask = lo_bits_mask; + inst->neg_mask = neg_mask; + inst->base_offset = base_offset; + inst->last_start = last_start; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMultipathShufti32x8::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) + const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(hi_mask), begin(hi_mask) + 16, inst->hi_mask); + copy(begin(lo_mask), begin(lo_mask) + 16, inst->lo_mask); + copy(begin(bucket_select_mask), begin(bucket_select_mask) + 32, + inst->bucket_select_mask); + copy(begin(data_select_mask), begin(data_select_mask) + 32, + inst->data_select_mask); + inst->hi_bits_mask = hi_bits_mask; + inst->lo_bits_mask = lo_bits_mask; + inst->neg_mask = neg_mask; + inst->base_offset = base_offset; + inst->last_start = last_start; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMultipathShufti32x16::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(hi_mask), end(hi_mask), inst->hi_mask); + copy(begin(lo_mask), end(lo_mask), inst->lo_mask); + copy(begin(bucket_select_mask_hi), begin(bucket_select_mask_hi) + 32, + inst->bucket_select_mask_hi); + copy(begin(bucket_select_mask_lo), begin(bucket_select_mask_lo) + 32, + inst->bucket_select_mask_lo); + copy(begin(data_select_mask), begin(data_select_mask) + 32, + inst->data_select_mask); + inst->hi_bits_mask = hi_bits_mask; + inst->lo_bits_mask = lo_bits_mask; + inst->neg_mask = neg_mask; + inst->base_offset = base_offset; + inst->last_start = last_start; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMultipathShufti64::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(hi_mask), begin(hi_mask) + 16, inst->hi_mask); + copy(begin(lo_mask), begin(lo_mask) + 16, inst->lo_mask); + copy(begin(bucket_select_mask), end(bucket_select_mask), + inst->bucket_select_mask); + copy(begin(data_select_mask), end(data_select_mask), + inst->data_select_mask); + inst->hi_bits_mask = hi_bits_mask; + inst->lo_bits_mask = lo_bits_mask; + inst->neg_mask = neg_mask; + inst->base_offset = base_offset; + inst->last_start = last_start; + inst->fail_jump = calc_jump(offset_map, this, target); +} + static OffsetMap makeOffsetMap(const RoseProgram &program, u32 *total_len) { OffsetMap offset_map; diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index a63f03c8..3931f095 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -420,20 +420,24 @@ class RoseInstrCheckLookaround ROSE_STRUCT_CHECK_LOOKAROUND, RoseInstrCheckLookaround> { public: - u32 index; + u32 look_index; + u32 reach_index; u32 count; const RoseInstruction *target; - RoseInstrCheckLookaround(u32 index_in, u32 count_in, - const RoseInstruction *target_in) - : index(index_in), count(count_in), target(target_in) {} + RoseInstrCheckLookaround(u32 look_index_in, u32 reach_index_in, + u32 count_in, const RoseInstruction *target_in) + : look_index(look_index_in), reach_index(reach_index_in), + count(count_in), target(target_in) {} bool operator==(const RoseInstrCheckLookaround &ri) const { - return index == ri.index && count == ri.count && target == ri.target; + return look_index == ri.look_index && reach_index == ri.reach_index && + count == ri.count && target == ri.target; } size_t hash() const override { - return hash_all(static_cast(opcode), index, count); + return hash_all(static_cast(opcode), look_index, reach_index, + count); } void write(void *dest, RoseEngineBlob &blob, @@ -441,7 +445,8 @@ public: bool equiv_to(const RoseInstrCheckLookaround &ri, const OffsetMap &offsets, const OffsetMap &other_offsets) const { - return index == ri.index && count == ri.count && + return look_index == ri.look_index && reach_index == ri.reach_index && + count == ri.count && offsets.at(target) == other_offsets.at(ri.target); } }; @@ -498,7 +503,7 @@ public: RoseInstrCheckMask32(std::array and_mask_in, std::array cmp_mask_in, u32 neg_mask_in, s32 offset_in, const RoseInstruction *target_in) - : and_mask(move(and_mask_in)), cmp_mask(move(cmp_mask_in)), + : and_mask(std::move(and_mask_in)), cmp_mask(std::move(cmp_mask_in)), neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} bool operator==(const RoseInstrCheckMask32 &ri) const { @@ -576,8 +581,8 @@ public: std::array bucket_select_mask_in, u32 neg_mask_in, s32 offset_in, const RoseInstruction *target_in) - : nib_mask(move(nib_mask_in)), - bucket_select_mask(move(bucket_select_mask_in)), + : nib_mask(std::move(nib_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} bool operator==(const RoseInstrCheckShufti16x8 &ri) const { @@ -621,8 +626,8 @@ public: std::array bucket_select_mask_in, u32 neg_mask_in, s32 offset_in, const RoseInstruction *target_in) - : hi_mask(move(hi_mask_in)), lo_mask(move(lo_mask_in)), - bucket_select_mask(move(bucket_select_mask_in)), + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} bool operator==(const RoseInstrCheckShufti32x8 &ri) const { @@ -666,8 +671,8 @@ public: std::array bucket_select_mask_in, u32 neg_mask_in, s32 offset_in, const RoseInstruction *target_in) - : hi_mask(move(hi_mask_in)), lo_mask(move(lo_mask_in)), - bucket_select_mask(move(bucket_select_mask_in)), + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} bool operator==(const RoseInstrCheckShufti16x16 &ri) const { @@ -713,9 +718,9 @@ public: std::array bucket_select_mask_lo_in, u32 neg_mask_in, s32 offset_in, const RoseInstruction *target_in) - : hi_mask(move(hi_mask_in)), lo_mask(move(lo_mask_in)), - bucket_select_mask_hi(move(bucket_select_mask_hi_in)), - bucket_select_mask_lo(move(bucket_select_mask_lo_in)), + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask_hi(std::move(bucket_select_mask_hi_in)), + bucket_select_mask_lo(std::move(bucket_select_mask_lo_in)), neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} bool operator==(const RoseInstrCheckShufti32x16 &ri) const { @@ -1859,6 +1864,306 @@ public: ~RoseInstrClearWorkDone() override; }; +class RoseInstrMultipathLookaround + : public RoseInstrBaseOneTarget { +public: + u32 look_index; + u32 reach_index; + u32 count; + s32 last_start; + std::array start_mask; + const RoseInstruction *target; + + RoseInstrMultipathLookaround(u32 look_index_in, u32 reach_index_in, + u32 count_in, s32 last_start_in, + std::array start_mask_in, + const RoseInstruction *target_in) + : look_index(look_index_in), reach_index(reach_index_in), + count(count_in), last_start(last_start_in), + start_mask(std::move(start_mask_in)), target(target_in) {} + + bool operator==(const RoseInstrMultipathLookaround &ri) const { + return look_index == ri.look_index && reach_index == ri.reach_index && + count == ri.count && last_start == ri.last_start && + start_mask == ri.start_mask && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), look_index, reach_index, + count, last_start, start_mask); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrMultipathLookaround &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return look_index == ri.look_index && reach_index == ri.reach_index && + count == ri.count && last_start == ri.last_start && + start_mask == ri.start_mask && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMultipathShufti16x8 + : public RoseInstrBaseOneTarget { +public: + std::array nib_mask; + std::array bucket_select_mask; + std::array data_select_mask; + u16 hi_bits_mask; + u16 lo_bits_mask; + u16 neg_mask; + s32 base_offset; + s32 last_start; + const RoseInstruction *target; + + RoseInstrCheckMultipathShufti16x8(std::array nib_mask_in, + std::array bucket_select_mask_in, + std::array data_select_mask_in, + u16 hi_bits_mask_in, u16 lo_bits_mask_in, + u16 neg_mask_in, s32 base_offset_in, + s32 last_start_in, + const RoseInstruction *target_in) + : nib_mask(std::move(nib_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), + data_select_mask(std::move(data_select_mask_in)), + hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), + neg_mask(neg_mask_in), base_offset(base_offset_in), + last_start(last_start_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMultipathShufti16x8 &ri) const { + return nib_mask == ri.nib_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && + neg_mask == ri.neg_mask && base_offset == ri.base_offset && + last_start == ri.last_start && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), nib_mask, + bucket_select_mask, data_select_mask, hi_bits_mask, + lo_bits_mask, neg_mask, base_offset, last_start); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMultipathShufti16x8 &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return nib_mask == ri.nib_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && + base_offset == ri.base_offset && last_start == ri.last_start && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMultipathShufti32x8 + : public RoseInstrBaseOneTarget { +public: + std::array hi_mask; + std::array lo_mask; + std::array bucket_select_mask; + std::array data_select_mask; + u32 hi_bits_mask; + u32 lo_bits_mask; + u32 neg_mask; + s32 base_offset; + s32 last_start; + const RoseInstruction *target; + + RoseInstrCheckMultipathShufti32x8(std::array hi_mask_in, + std::array lo_mask_in, + std::array bucket_select_mask_in, + std::array data_select_mask_in, + u32 hi_bits_mask_in, u32 lo_bits_mask_in, + u32 neg_mask_in, s32 base_offset_in, + s32 last_start_in, + const RoseInstruction *target_in) + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), + data_select_mask(std::move(data_select_mask_in)), + hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), + neg_mask(neg_mask_in), base_offset(base_offset_in), + last_start(last_start_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMultipathShufti32x8 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && + neg_mask == ri.neg_mask && base_offset == ri.base_offset && + last_start == ri.last_start && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), hi_mask, lo_mask, + bucket_select_mask, data_select_mask, hi_bits_mask, + lo_bits_mask, neg_mask, base_offset, last_start); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMultipathShufti32x8 &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && + base_offset == ri.base_offset && last_start == ri.last_start && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMultipathShufti32x16 + : public RoseInstrBaseOneTarget { +public: + std::array hi_mask; + std::array lo_mask; + std::array bucket_select_mask_hi; + std::array bucket_select_mask_lo; + std::array data_select_mask; + u32 hi_bits_mask; + u32 lo_bits_mask; + u32 neg_mask; + s32 base_offset; + s32 last_start; + const RoseInstruction *target; + + RoseInstrCheckMultipathShufti32x16(std::array hi_mask_in, + std::array lo_mask_in, + std::array bucket_select_mask_hi_in, + std::array bucket_select_mask_lo_in, + std::array data_select_mask_in, + u32 hi_bits_mask_in, u32 lo_bits_mask_in, + u32 neg_mask_in, s32 base_offset_in, + s32 last_start_in, + const RoseInstruction *target_in) + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask_hi(std::move(bucket_select_mask_hi_in)), + bucket_select_mask_lo(std::move(bucket_select_mask_lo_in)), + data_select_mask(std::move(data_select_mask_in)), + hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), + neg_mask(neg_mask_in), base_offset(base_offset_in), + last_start(last_start_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMultipathShufti32x16 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask_hi == ri.bucket_select_mask_hi && + bucket_select_mask_lo == ri.bucket_select_mask_lo && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && + neg_mask == ri.neg_mask && base_offset == ri.base_offset && + last_start == ri.last_start && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), hi_mask, lo_mask, + bucket_select_mask_hi, bucket_select_mask_lo, + data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask, + base_offset, last_start); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMultipathShufti32x16 &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask_hi == ri.bucket_select_mask_hi && + bucket_select_mask_lo == ri.bucket_select_mask_lo && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && + base_offset == ri.base_offset && last_start == ri.last_start && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMultipathShufti64 + : public RoseInstrBaseOneTarget { +public: + std::array hi_mask; + std::array lo_mask; + std::array bucket_select_mask; + std::array data_select_mask; + u64a hi_bits_mask; + u64a lo_bits_mask; + u64a neg_mask; + s32 base_offset; + s32 last_start; + const RoseInstruction *target; + + RoseInstrCheckMultipathShufti64(std::array hi_mask_in, + std::array lo_mask_in, + std::array bucket_select_mask_in, + std::array data_select_mask_in, + u64a hi_bits_mask_in, u64a lo_bits_mask_in, + u64a neg_mask_in, s32 base_offset_in, + s32 last_start_in, + const RoseInstruction *target_in) + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), + data_select_mask(std::move(data_select_mask_in)), + hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), + neg_mask(neg_mask_in), base_offset(base_offset_in), + last_start(last_start_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMultipathShufti64 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && + neg_mask == ri.neg_mask && base_offset == ri.base_offset && + last_start == ri.last_start && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), hi_mask, lo_mask, + bucket_select_mask, data_select_mask, hi_bits_mask, + lo_bits_mask, neg_mask, base_offset, last_start); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMultipathShufti64 &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && + base_offset == ri.base_offset && last_start == ri.last_start && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + class RoseInstrEnd : public RoseInstrBaseTrivial { diff --git a/src/rose/rose_common.h b/src/rose/rose_common.h index c0250aa5..34678b8f 100644 --- a/src/rose/rose_common.h +++ b/src/rose/rose_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -41,6 +41,15 @@ /** \brief Length in bytes of a reach bitvector, used by the lookaround code. */ #define REACH_BITVECTOR_LEN 32 +/** \brief Length in bytes of a reach bitvector for multi-path lookaround. */ +#define MULTI_REACH_BITVECTOR_LEN 256 + +/** + * \brief The max offset from the leftmost byte to the rightmost byte in + * multi-path lookaround. + */ +#define MULTIPATH_MAX_LEN 16 + /** \brief Value used to represent an invalid Rose program offset. */ #define ROSE_INVALID_PROG_OFFSET 0 diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 48f15ff0..06a9b069 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -386,7 +386,8 @@ struct RoseEngine { u32 roseCount; u32 lookaroundTableOffset; //!< base of lookaround offset list (of s8 values) u32 lookaroundReachOffset; /**< base of lookaround reach bitvectors (32 - * bytes each) */ + * bytes for single-path lookaround and 256 bytes + * for multi-path lookaround) */ u32 eodProgramOffset; //!< EOD program, otherwise 0. diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index cf1a9eb6..ebda679a 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -36,6 +36,7 @@ #include "som/som_operation.h" #include "rose_internal.h" #include "ue2common.h" +#include "util/simd_types.h" /** \brief Minimum alignment for each instruction in memory. */ #define ROSE_INSTR_MIN_ALIGN 8U @@ -146,7 +147,38 @@ enum RoseInstructionCode { */ ROSE_INSTR_CLEAR_WORK_DONE, - LAST_ROSE_INSTRUCTION = ROSE_INSTR_CLEAR_WORK_DONE //!< Sentinel. + /** \brief Check lookaround if it has multiple paths. */ + ROSE_INSTR_MULTIPATH_LOOKAROUND, + + /** + * \brief Use shufti to check lookaround with multiple paths. The total + * length of the paths is 16 bytes at most and shufti has 8 buckets. + * All paths can be at most 16 bytes long. + */ + ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_16x8, + + /** + * \brief Use shufti to check lookaround with multiple paths. The total + * length of the paths is 32 bytes at most and shufti has 8 buckets. + * All paths can be at most 16 bytes long. + */ + ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_32x8, + + /** + * \brief Use shufti to check lookaround with multiple paths. The total + * length of the paths is 32 bytes at most and shufti has 16 buckets. + * All paths can be at most 16 bytes long. + */ + ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_32x16, + + /** + * \brief Use shufti to check multiple paths lookaround. The total + * length of the paths is 64 bytes at most and shufti has 8 buckets. + * All paths can be at most 16 bytes long. + */ + ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_64, + + LAST_ROSE_INSTRUCTION = ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_64 //!< Sentinel. }; struct ROSE_STRUCT_END { @@ -192,14 +224,15 @@ struct ROSE_STRUCT_CHECK_NOT_HANDLED { struct ROSE_STRUCT_CHECK_SINGLE_LOOKAROUND { u8 code; //!< From enum RoseInstructionCode. s8 offset; //!< The offset of the byte to examine. - u32 reach_index; //!< The index of the reach table entry to use. + u32 reach_index; //!< Index for lookaround reach bitvectors. u32 fail_jump; //!< Jump forward this many bytes on failure. }; struct ROSE_STRUCT_CHECK_LOOKAROUND { u8 code; //!< From enum RoseInstructionCode. - u32 index; - u32 count; + u32 look_index; //!< Index for lookaround offset list. + u32 reach_index; //!< Index for lookaround reach bitvectors. + u32 count; //!< The count of lookaround entries in one instruction. u32 fail_jump; //!< Jump forward this many bytes on failure. }; @@ -526,4 +559,70 @@ struct ROSE_STRUCT_CLEAR_WORK_DONE { u8 code; //!< From enum RoseInstructionCode. }; +struct ROSE_STRUCT_MULTIPATH_LOOKAROUND { + u8 code; //!< From enum RoseInstructionCode. + u32 look_index; //!< Index for lookaround offset list. + u32 reach_index; //!< Index for lookaround reach bitvectors. + u32 count; //!< The lookaround byte numbers for each path. + s32 last_start; //!< The latest start offset among 8 paths. + u8 start_mask[MULTIPATH_MAX_LEN]; /*!< Used to initialize path if left-most + * data is missed. */ + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_16x8 { + u8 code; //!< From enum RoseInstructionCode. + u8 nib_mask[2 * sizeof(m128)]; //!< High and low nibble mask in shufti. + u8 bucket_select_mask[sizeof(m128)]; //!< Mask for bucket assigning. + u8 data_select_mask[sizeof(m128)]; //!< Shuffle mask for data ordering. + u32 hi_bits_mask; //!< High-bits used in multi-path validation. + u32 lo_bits_mask; //!< Low-bits used in multi-path validation. + u32 neg_mask; //!< 64 bits negation mask. + s32 base_offset; //!< Relative offset of the first byte. + s32 last_start; //!< The latest start offset among 8 paths. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x8 { + u8 code; //!< From enum RoseInstructionCode. + u8 hi_mask[sizeof(m128)]; //!< High nibble mask in shufti. + u8 lo_mask[sizeof(m128)]; //!< Low nibble mask in shufti. + u8 bucket_select_mask[sizeof(m256)]; //!< Mask for bucket assigning. + u8 data_select_mask[sizeof(m256)]; //!< Shuffle mask for data ordering. + u32 hi_bits_mask; //!< High-bits used in multi-path validation. + u32 lo_bits_mask; //!< Low-bits used in multi-path validation. + u32 neg_mask; //!< 64 bits negation mask. + s32 base_offset; //!< Relative offset of the first byte. + s32 last_start; //!< The latest start offset among 8 paths. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x16 { + u8 code; //!< From enum RoseInstructionCode. + u8 hi_mask[sizeof(m256)]; //!< High nibble mask in shufti. + u8 lo_mask[sizeof(m256)]; //!< Low nibble mask in shufti. + u8 bucket_select_mask_hi[sizeof(m256)]; //!< Mask for bucket assigning. + u8 bucket_select_mask_lo[sizeof(m256)]; //!< Mask for bucket assigning. + u8 data_select_mask[sizeof(m256)]; //!< Shuffle mask for data ordering. + u32 hi_bits_mask; //!< High-bits used in multi-path validation. + u32 lo_bits_mask; //!< Low-bits used in multi-path validation. + u32 neg_mask; //!< 64 bits negation mask. + s32 base_offset; //!< Relative offset of the first byte. + s32 last_start; //!< The latest start offset among 8 paths. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_64 { + u8 code; //!< From enum RoseInstructionCode. + u8 hi_mask[sizeof(m128)]; //!< High nibble mask in shufti. + u8 lo_mask[sizeof(m128)]; //!< Low nibble mask in shufti. + u8 bucket_select_mask[2 * sizeof(m256)]; //!< Mask for bucket assigning. + u8 data_select_mask[2 * sizeof(m256)]; //!< Shuffle mask for data ordering. + u64a hi_bits_mask; //!< High-bits used in multi-path validation. + u64a lo_bits_mask; //!< Low-bits used in multi-path validation. + u64a neg_mask; //!< 64 bits negation mask. + s32 base_offset; //!< Relative offset of the first byte. + s32 last_start; //!< The latest start offset among 8 paths. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; #endif // ROSE_ROSE_PROGRAM_H diff --git a/src/rose/validate_shufti.h b/src/rose/validate_shufti.h index 49d2c2fe..e26d6c2b 100644 --- a/src/rose/validate_shufti.h +++ b/src/rose/validate_shufti.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -46,7 +46,7 @@ void dumpMask(const void *mask, int len) { static really_inline int validateShuftiMask16x16(const m256 data, const m256 hi_mask, const m256 lo_mask, const m256 and_mask, - const u32 neg_mask, const u16 valid_data_mask) { + const u32 neg_mask, const u32 valid_data_mask) { m256 low4bits = set32x8(0xf); m256 c_lo = vpshufb(lo_mask, and256(data, low4bits)); m256 c_hi = vpshufb(hi_mask, rshift64_m256(andnot256(low4bits, data), 4)); @@ -75,7 +75,7 @@ int validateShuftiMask16x16(const m256 data, const m256 hi_mask, static really_inline int validateShuftiMask16x8(const m128 data, const m256 nib_mask, const m128 and_mask, const u32 neg_mask, - const u16 valid_data_mask) { + const u32 valid_data_mask) { m256 data_m256 = combine2x128(rshift64_m128(data, 4), data); m256 low4bits = set32x8(0xf); m256 c_nib = vpshufb(nib_mask, and256(data_m256, low4bits)); @@ -172,4 +172,121 @@ int validateShuftiMask32x16(const m256 data, u32 cmp_result = (nresult ^ neg_mask) & valid_data_mask; return !cmp_result; } + +static really_inline +int checkMultipath32(u32 data, u32 hi_bits, u32 lo_bits) { + u32 t = ~(data | hi_bits); + t += lo_bits; + t &= (~data) & hi_bits; + DEBUG_PRINTF("t %x\n", t); + return !!t; +} + +static really_inline +int checkMultipath64(u64a data, u64a hi_bits, u64a lo_bits) { + u64a t = ~(data | hi_bits); + t += lo_bits; + t &= (~data) & hi_bits; + DEBUG_PRINTF("t %llx\n", t); + return !!t; +} + +static really_inline +int validateMultipathShuftiMask16x8(const m128 data, + const m256 nib_mask, + const m128 bucket_select_mask, + const u32 hi_bits, const u32 lo_bits, + const u32 neg_mask, + const u32 valid_path_mask) { + m256 data_256 = combine2x128(rshift64_m128(data, 4), data); + m256 low4bits = set32x8(0xf); + m256 c_nib = vpshufb(nib_mask, and256(data_256, low4bits)); + m128 t = and128(movdq_hi(c_nib), movdq_lo(c_nib)); + m128 result = and128(t, bucket_select_mask); + u32 nresult = movemask128(eq128(result, zeroes128())); + u32 cmp_result = (nresult ^ neg_mask) | valid_path_mask; + + DEBUG_PRINTF("cmp_result %x\n", cmp_result); + + return checkMultipath32(cmp_result, hi_bits, lo_bits); +} + +static really_inline +int validateMultipathShuftiMask32x8(const m256 data, + const m256 hi_mask, const m256 lo_mask, + const m256 bucket_select_mask, + const u32 hi_bits, const u32 lo_bits, + const u32 neg_mask, + const u32 valid_path_mask) { + m256 low4bits = set32x8(0xf); + m256 data_lo = and256(data, low4bits); + m256 data_hi = and256(rshift64_m256(data, 4), low4bits); + m256 c_lo = vpshufb(lo_mask, data_lo); + m256 c_hi = vpshufb(hi_mask, data_hi); + m256 c = and256(c_lo, c_hi); + m256 result = and256(c, bucket_select_mask); + u32 nresult = movemask256(eq256(result, zeroes256())); + u32 cmp_result = (nresult ^ neg_mask) | valid_path_mask; + + DEBUG_PRINTF("cmp_result %x\n", cmp_result); + + return checkMultipath32(cmp_result, hi_bits, lo_bits); +} + +static really_inline +int validateMultipathShuftiMask32x16(const m256 data, + const m256 hi_mask_1, const m256 hi_mask_2, + const m256 lo_mask_1, const m256 lo_mask_2, + const m256 bucket_select_mask_hi, + const m256 bucket_select_mask_lo, + const u32 hi_bits, const u32 lo_bits, + const u32 neg_mask, + const u32 valid_path_mask) { + m256 low4bits = set32x8(0xf); + m256 data_lo = and256(data, low4bits); + m256 data_hi = and256(rshift64_m256(data, 4), low4bits); + m256 c_lo_1 = vpshufb(lo_mask_1, data_lo); + m256 c_lo_2 = vpshufb(lo_mask_2, data_lo); + m256 c_hi_1 = vpshufb(hi_mask_1, data_hi); + m256 c_hi_2 = vpshufb(hi_mask_2, data_hi); + m256 t1 = and256(c_lo_1, c_hi_1); + m256 t2 = and256(c_lo_2, c_hi_2); + m256 result = or256(and256(t1, bucket_select_mask_lo), + and256(t2, bucket_select_mask_hi)); + u32 nresult = movemask256(eq256(result, zeroes256())); + u32 cmp_result = (nresult ^ neg_mask) | valid_path_mask; + + DEBUG_PRINTF("cmp_result %x\n", cmp_result); + + return checkMultipath32(cmp_result, hi_bits, lo_bits); +} + +static really_inline +int validateMultipathShuftiMask64(const m256 data_1, const m256 data_2, + const m256 hi_mask, const m256 lo_mask, + const m256 bucket_select_mask_1, + const m256 bucket_select_mask_2, + const u64a hi_bits, const u64a lo_bits, + const u64a neg_mask, + const u64a valid_path_mask) { + m256 low4bits = set32x8(0xf); + m256 c_lo_1 = vpshufb(lo_mask, and256(data_1, low4bits)); + m256 c_lo_2 = vpshufb(lo_mask, and256(data_2, low4bits)); + m256 c_hi_1 = vpshufb(hi_mask, + rshift64_m256(andnot256(low4bits, data_1), 4)); + m256 c_hi_2 = vpshufb(hi_mask, + rshift64_m256(andnot256(low4bits, data_2), 4)); + m256 t1 = and256(c_lo_1, c_hi_1); + m256 t2 = and256(c_lo_2, c_hi_2); + m256 nresult_1 = eq256(and256(t1, bucket_select_mask_1), zeroes256()); + m256 nresult_2 = eq256(and256(t2, bucket_select_mask_2), zeroes256()); + u64a nresult = (u64a)movemask256(nresult_1) | + (u64a)movemask256(nresult_2) << 32; + u64a cmp_result = (nresult ^ neg_mask) | valid_path_mask; + + DEBUG_PRINTF("cmp_result %llx\n", cmp_result); + + return checkMultipath64(cmp_result, hi_bits, lo_bits); +} + #endif diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index bc49a046..1f884843 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -317,6 +317,11 @@ m128 sub_u8_m128(m128 a, m128 b) { return _mm_sub_epi8(a, b); } +static really_inline +m128 set64x2(u64a hi, u64a lo) { + return _mm_set_epi64x(hi, lo); +} + /**** **** 256-bit Primitives ****/ @@ -592,6 +597,18 @@ m256 mask1bit256(unsigned int n) { return loadu256(&simd_onebit_masks[mask_idx]); } +static really_inline +m256 set64x4(u64a hi_1, u64a hi_0, u64a lo_1, u64a lo_0) { +#if defined(HAVE_AVX2) + return _mm256_set_epi64x(hi_1, hi_0, lo_1, lo_0); +#else + m256 rv; + rv.hi = set64x2(hi_1, hi_0); + rv.lo = set64x2(lo_1, lo_0); + return rv; +#endif +} + #if !defined(HAVE_AVX2) // switches on bit N in the given vector. static really_inline From a2b2940f857f7bca77ed32f2e4dd9c3926569b17 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 30 Mar 2017 11:18:24 +1100 Subject: [PATCH 197/326] limex: move load from input closer to use --- src/nfa/limex_runtime_impl.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/nfa/limex_runtime_impl.h b/src/nfa/limex_runtime_impl.h index 016d1f92..ca761924 100644 --- a/src/nfa/limex_runtime_impl.h +++ b/src/nfa/limex_runtime_impl.h @@ -202,7 +202,8 @@ char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, = LOAD_FROM_ENG(&limex->accel_and_friends); const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask); #endif - const u8 *accelTable = (const u8 *)((const char *)limex + limex->accelTableOffset); + const u8 *accelTable = + (const u8 *)((const char *)limex + limex->accelTableOffset); const union AccelAux *accelAux = (const union AccelAux *)((const char *)limex + limex->accelAuxOffset); const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); @@ -229,7 +230,6 @@ without_accel: return MO_CONTINUE_MATCHING; } - u8 c = input[i]; STATE_T succ; NFA_EXEC_GET_LIM_SUCC(limex, s, succ); @@ -238,6 +238,7 @@ without_accel: return MO_HALT_MATCHING; } + u8 c = input[i]; s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); } @@ -279,7 +280,6 @@ with_accel: goto without_accel; } - u8 c = input[i]; STATE_T succ; NFA_EXEC_GET_LIM_SUCC(limex, s, succ); @@ -288,6 +288,7 @@ with_accel: return MO_HALT_MATCHING; } + u8 c = input[i]; s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); } @@ -333,14 +334,13 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, u64a *final_loc = NULL; for (size_t i = length; i != 0; i--) { - DUMP_INPUT(i-1); + DUMP_INPUT(i - 1); if (ISZERO_STATE(s)) { DEBUG_PRINTF("no states are switched on, early exit\n"); ctx->s = s; return MO_CONTINUE_MATCHING; } - u8 c = input[i-1]; STATE_T succ; NFA_EXEC_GET_LIM_SUCC(limex, s, succ); @@ -349,6 +349,7 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, return MO_HALT_MATCHING; } + u8 c = input[i - 1]; s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); } From 37cb93e60f5d423aecb0571cb2e1305f8f2c7599 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 30 Mar 2017 14:34:33 +1100 Subject: [PATCH 198/326] rose_build: reduce size/scope of context objects --- src/nfa/castlecompile.cpp | 4 +- src/rose/rose_build_anchored.cpp | 57 ++-- src/rose/rose_build_anchored.h | 9 +- src/rose/rose_build_bytecode.cpp | 501 +++++++++++++++---------------- src/rose/rose_build_compile.cpp | 4 +- src/rose/rose_build_dump.cpp | 91 ++++-- src/rose/rose_build_dump.h | 23 +- src/rose/rose_build_impl.h | 18 -- src/rose/rose_build_matchers.cpp | 194 ++++++------ src/rose/rose_build_matchers.h | 20 ++ src/rose/rose_build_program.cpp | 6 +- src/util/multibit_build.cpp | 7 +- src/util/multibit_build.h | 6 +- unit/internal/multi_bit.cpp | 38 +-- 14 files changed, 520 insertions(+), 458 deletions(-) diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index 3b40ab9a..a7fe1e90 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -560,7 +560,7 @@ buildCastle(const CastleProto &proto, DEBUG_PRINTF("%zu subcastles may go stale\n", may_stale.size()); vector stale_iter; if (!may_stale.empty()) { - mmbBuildSparseIterator(stale_iter, may_stale, numRepeats); + stale_iter = mmbBuildSparseIterator(may_stale, numRepeats); } diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index d4e08bb3..6c7bb1c1 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -30,6 +30,7 @@ #include "grey.h" #include "rose_build_impl.h" +#include "rose_build_matchers.h" #include "rose_internal.h" #include "ue2common.h" #include "nfa/dfa_min.h" @@ -71,6 +72,8 @@ namespace ue2 { #define INIT_STATE (DEAD_STATE + 1) +#define NO_FRAG_ID (~0U) + // Adds a vertex with the given reach. static NFAVertex add_vertex(NGHolder &h, const CharReach &cr) { @@ -173,7 +176,7 @@ void mergeAnchoredDfas(vector> &dfas, } static -void remapAnchoredReports(raw_dfa &rdfa, const RoseBuildImpl &build) { +void remapAnchoredReports(raw_dfa &rdfa, const vector &frag_map) { for (dstate &ds : rdfa.states) { assert(ds.reports_eod.empty()); // Not used in anchored matcher. if (ds.reports.empty()) { @@ -182,8 +185,8 @@ void remapAnchoredReports(raw_dfa &rdfa, const RoseBuildImpl &build) { flat_set new_reports; for (auto id : ds.reports) { - assert(id < build.literal_info.size()); - new_reports.insert(build.literal_info.at(id).fragment_id); + assert(id < frag_map.size()); + new_reports.insert(frag_map[id]); } ds.reports = std::move(new_reports); } @@ -194,21 +197,37 @@ void remapAnchoredReports(raw_dfa &rdfa, const RoseBuildImpl &build) { * ids) with the fragment id for each literal. */ static -void remapAnchoredReports(RoseBuildImpl &build) { +void remapAnchoredReports(RoseBuildImpl &build, const vector &frag_map) { for (auto &m : build.anchored_nfas) { for (auto &rdfa : m.second) { assert(rdfa); - remapAnchoredReports(*rdfa, build); + remapAnchoredReports(*rdfa, frag_map); } } } +/** + * Returns mapping from literal ids to fragment ids. + */ +static +vector reverseFragMap(const RoseBuildImpl &build, + const vector &fragments) { + vector rev(build.literal_info.size(), NO_FRAG_ID); + for (const auto &f : fragments) { + for (u32 lit_id : f.lit_ids) { + assert(lit_id < rev.size()); + rev[lit_id] = f.fragment_id; + } + } + return rev; +} + /** * \brief Replace the reports (which are literal final_ids) in the given * raw_dfa with program offsets. */ static -void remapIdsToPrograms(const RoseBuildImpl &build, raw_dfa &rdfa) { +void remapIdsToPrograms(const vector &fragments, raw_dfa &rdfa) { for (dstate &ds : rdfa.states) { assert(ds.reports_eod.empty()); // Not used in anchored matcher. if (ds.reports.empty()) { @@ -217,7 +236,7 @@ void remapIdsToPrograms(const RoseBuildImpl &build, raw_dfa &rdfa) { flat_set new_reports; for (auto fragment_id : ds.reports) { - auto &frag = build.fragments.at(fragment_id); + const auto &frag = fragments.at(fragment_id); new_reports.insert(frag.lit_program_offset); } ds.reports = std::move(new_reports); @@ -731,7 +750,7 @@ int addToAnchoredMatcher(RoseBuildImpl &build, const NGHolder &anchored, } static -void buildSimpleDfas(const RoseBuildImpl &build, +void buildSimpleDfas(const RoseBuildImpl &build, const vector &frag_map, vector> *anchored_dfas) { /* we should have determinised all of these before so there should be no * chance of failure. */ @@ -739,7 +758,8 @@ void buildSimpleDfas(const RoseBuildImpl &build, for (const auto &simple : build.anchored_simple) { exit_ids.clear(); for (auto lit_id : simple.second) { - exit_ids.insert(build.literal_info[lit_id].fragment_id); + assert(lit_id < frag_map.size()); + exit_ids.insert(frag_map[lit_id]); } auto h = populate_holder(simple.first, exit_ids); Automaton_Holder autom(*h); @@ -760,7 +780,8 @@ void buildSimpleDfas(const RoseBuildImpl &build, * from RoseBuildImpl. */ static -vector> getAnchoredDfas(RoseBuildImpl &build) { +vector> getAnchoredDfas(RoseBuildImpl &build, + const vector &frag_map) { vector> dfas; // DFAs that already exist as raw_dfas. @@ -773,7 +794,7 @@ vector> getAnchoredDfas(RoseBuildImpl &build) { // DFAs we currently have as simple literals. if (!build.anchored_simple.empty()) { - buildSimpleDfas(build, &dfas); + buildSimpleDfas(build, frag_map, &dfas); build.anchored_simple.clear(); } @@ -825,7 +846,8 @@ size_t buildNfas(vector &anchored_dfas, return total_size; } -vector buildAnchoredDfas(RoseBuildImpl &build) { +vector buildAnchoredDfas(RoseBuildImpl &build, + const vector &fragments) { vector dfas; if (build.anchored_nfas.empty() && build.anchored_simple.empty()) { @@ -833,9 +855,10 @@ vector buildAnchoredDfas(RoseBuildImpl &build) { return dfas; } - remapAnchoredReports(build); + const auto frag_map = reverseFragMap(build, fragments); + remapAnchoredReports(build, frag_map); - auto anch_dfas = getAnchoredDfas(build); + auto anch_dfas = getAnchoredDfas(build, frag_map); mergeAnchoredDfas(anch_dfas, build); dfas.reserve(anch_dfas.size()); @@ -847,8 +870,8 @@ vector buildAnchoredDfas(RoseBuildImpl &build) { } aligned_unique_ptr -buildAnchoredMatcher(RoseBuildImpl &build, vector &dfas, - size_t *asize) { +buildAnchoredMatcher(RoseBuildImpl &build, const vector &fragments, + vector &dfas, size_t *asize) { const CompileContext &cc = build.cc; if (dfas.empty()) { @@ -858,7 +881,7 @@ buildAnchoredMatcher(RoseBuildImpl &build, vector &dfas, } for (auto &rdfa : dfas) { - remapIdsToPrograms(build, rdfa); + remapIdsToPrograms(fragments, rdfa); } vector> nfas; diff --git a/src/rose/rose_build_anchored.h b/src/rose/rose_build_anchored.h index ad89df65..dd59ca32 100644 --- a/src/rose/rose_build_anchored.h +++ b/src/rose/rose_build_anchored.h @@ -44,11 +44,13 @@ namespace ue2 { class RoseBuildImpl; struct raw_dfa; +struct LitFragment; /** * \brief Construct a set of anchored DFAs from our anchored literals/engines. */ -std::vector buildAnchoredDfas(RoseBuildImpl &build); +std::vector buildAnchoredDfas(RoseBuildImpl &build, + const std::vector &fragments); /** * \brief Construct an anchored_matcher_info runtime structure from the given @@ -58,8 +60,9 @@ std::vector buildAnchoredDfas(RoseBuildImpl &build); * given in litPrograms. */ aligned_unique_ptr -buildAnchoredMatcher(RoseBuildImpl &build, std::vector &dfas, - size_t *asize); +buildAnchoredMatcher(RoseBuildImpl &build, + const std::vector &fragments, + std::vector &dfas, size_t *asize); u32 anchoredStateSize(const anchored_matcher_info &atable); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index a0edc711..41113457 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -187,13 +187,6 @@ struct build_context : noncopyable { /** \brief mapping from suffix to queue index. */ map suffixes; - /** \brief Number of roles with a state bit. - * - * This is set by assignStateIndices() and should be constant throughout - * the rest of the compile. - */ - size_t numStates = 0; - /** \brief Simple cache of programs written to engine blob, used for * deduplication. */ ue2::unordered_map roleStateIndices; + /** \brief State indices, for those roles that have them. + * Each vertex present has a unique state index in the range + * [0, roleStateIndices.size()). */ + unordered_map roleStateIndices; /** \brief Mapping from queue index to bytecode offset for built engines * that have already been pushed into the engine_blob. */ @@ -227,9 +222,6 @@ struct build_context : noncopyable { * that need hash table support. */ vector longLiterals; - /** \brief Minimum offset of a match from the floating table. */ - u32 floatingMinLiteralMatchOffset = 0; - /** \brief Long literal length threshold, used in streaming mode. */ size_t longLitLengthThreshold = 0; @@ -251,6 +243,13 @@ struct build_context : noncopyable { /** \brief Data only used during construction of various programs (literal, * anchored, delay, etc). */ struct ProgramBuild : noncopyable { + explicit ProgramBuild(u32 fMinLitOffset) + : floatingMinLiteralMatchOffset(fMinLitOffset) { + } + + /** \brief Minimum offset of a match from the floating table. */ + const u32 floatingMinLiteralMatchOffset; + /** \brief Mapping from vertex to key, for vertices with a * CHECK_NOT_HANDLED instruction. */ ue2::unordered_map handledKeys; @@ -444,21 +443,21 @@ bool isSingleOutfix(const RoseBuildImpl &tbi) { } static -u8 pickRuntimeImpl(const RoseBuildImpl &build, const build_context &bc, +u8 pickRuntimeImpl(const RoseBuildImpl &build, const RoseResources &resources, UNUSED u32 outfixEndQueue) { - DEBUG_PRINTF("has_outfixes=%d\n", bc.resources.has_outfixes); - DEBUG_PRINTF("has_suffixes=%d\n", bc.resources.has_suffixes); - DEBUG_PRINTF("has_leftfixes=%d\n", bc.resources.has_leftfixes); - DEBUG_PRINTF("has_literals=%d\n", bc.resources.has_literals); - DEBUG_PRINTF("has_states=%d\n", bc.resources.has_states); - DEBUG_PRINTF("checks_groups=%d\n", bc.resources.checks_groups); - DEBUG_PRINTF("has_lit_delay=%d\n", bc.resources.has_lit_delay); - DEBUG_PRINTF("has_lit_check=%d\n", bc.resources.has_lit_check); - DEBUG_PRINTF("has_anchored=%d\n", bc.resources.has_anchored); - DEBUG_PRINTF("has_floating=%d\n", bc.resources.has_floating); - DEBUG_PRINTF("has_eod=%d\n", bc.resources.has_eod); + DEBUG_PRINTF("has_outfixes=%d\n", resources.has_outfixes); + DEBUG_PRINTF("has_suffixes=%d\n", resources.has_suffixes); + DEBUG_PRINTF("has_leftfixes=%d\n", resources.has_leftfixes); + DEBUG_PRINTF("has_literals=%d\n", resources.has_literals); + DEBUG_PRINTF("has_states=%d\n", resources.has_states); + DEBUG_PRINTF("checks_groups=%d\n", resources.checks_groups); + DEBUG_PRINTF("has_lit_delay=%d\n", resources.has_lit_delay); + DEBUG_PRINTF("has_lit_check=%d\n", resources.has_lit_check); + DEBUG_PRINTF("has_anchored=%d\n", resources.has_anchored); + DEBUG_PRINTF("has_floating=%d\n", resources.has_floating); + DEBUG_PRINTF("has_eod=%d\n", resources.has_eod); - if (isPureFloating(bc.resources, build.cc)) { + if (isPureFloating(resources, build.cc)) { return ROSE_RUNTIME_PURE_LITERAL; } @@ -1196,7 +1195,6 @@ bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi, setLeftNfaProperties(*nfa, leftfix); - build.leftfix_queue_map.emplace(leftfix, qi); nfa->queueIndex = qi; if (!prefix && !leftfix.haig() && leftfix.graph() @@ -1374,9 +1372,9 @@ void buildSuffixContainer(RoseGraph &g, build_context &bc, static void updateExclusiveInfixProperties(const RoseBuildImpl &build, - build_context &bc, - const vector &exclusive_info, - set *no_retrigger_queues) { + const vector &exclusive_info, + map &leftfix_info, + set *no_retrigger_queues) { const RoseGraph &g = build.g; for (const auto &info : exclusive_info) { // Set leftfix optimisations, disabled for tamarama subengines @@ -1426,9 +1424,10 @@ void updateExclusiveInfixProperties(const RoseBuildImpl &build, const auto &verts = sub.vertices; for (const auto &v : verts) { u32 lag = g[v].left.lag; - bc.leftfix_info.emplace( - v, left_build_info(qi, lag, max_width, squash_mask, stop, - max_queuelen, cm_count, cm_cr)); + leftfix_info.emplace(v, left_build_info(qi, lag, max_width, + squash_mask, stop, + max_queuelen, cm_count, + cm_cr)); } } } @@ -1490,7 +1489,7 @@ void buildExclusiveInfixes(RoseBuildImpl &build, build_context &bc, info.queue = qif.get_queue(); exclusive_info.push_back(move(info)); } - updateExclusiveInfixProperties(build, bc, exclusive_info, + updateExclusiveInfixProperties(build, exclusive_info, bc.leftfix_info, no_retrigger_queues); buildInfixContainer(g, bc, exclusive_info); } @@ -1843,7 +1842,7 @@ bool prepOutfixes(RoseBuildImpl &tbi, build_context &bc, } static -void assignSuffixQueues(RoseBuildImpl &build, build_context &bc) { +void assignSuffixQueues(RoseBuildImpl &build, map &suffixes) { const RoseGraph &g = build.g; for (auto v : vertices_range(g)) { @@ -1856,14 +1855,13 @@ void assignSuffixQueues(RoseBuildImpl &build, build_context &bc) { DEBUG_PRINTF("vertex %zu triggers suffix %p\n", g[v].index, s.graph()); // We may have already built this NFA. - if (contains(bc.suffixes, s)) { + if (contains(suffixes, s)) { continue; } u32 queue = build.qif.get_queue(); DEBUG_PRINTF("assigning %p to queue %u\n", s.graph(), queue); - bc.suffixes.emplace(s, queue); - build.suffix_queue_map.emplace(s, queue); + suffixes.emplace(s, queue); } } @@ -2096,7 +2094,7 @@ bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif, no_retrigger_queues); } - assignSuffixQueues(tbi, bc); + assignSuffixQueues(tbi, bc.suffixes); if (!buildSuffixes(tbi, bc, no_retrigger_queues, suffixTriggers)) { return false; @@ -2268,8 +2266,7 @@ u32 buildLastByteIter(const RoseGraph &g, build_context &bc) { return 0; /* invalid offset */ } - vector iter; - mmbBuildSparseIterator(iter, lb_roles, bc.numStates); + auto iter = mmbBuildSparseIterator(lb_roles, bc.roleStateIndices.size()); return bc.engine_blob.add_iterator(iter); } @@ -2382,8 +2379,7 @@ u32 buildEodNfaIterator(build_context &bc, const u32 activeQueueCount) { DEBUG_PRINTF("building iter for %zu nfas\n", keys.size()); - vector iter; - mmbBuildSparseIterator(iter, keys, activeQueueCount); + auto iter = mmbBuildSparseIterator(keys, activeQueueCount); return bc.engine_blob.add_iterator(iter); } @@ -2512,16 +2508,13 @@ void recordResources(RoseResources &resources, const RoseProgram &program) { static void recordResources(RoseResources &resources, - const RoseBuildImpl &build) { + const RoseBuildImpl &build, + const vector &fragments) { if (!build.outfixes.empty()) { resources.has_outfixes = true; } - resources.has_literals = - any_of(begin(build.literal_info), end(build.literal_info), - [](const rose_literal_info &info) { - return info.fragment_id != MO_INVALID_IDX; - }); + resources.has_literals = !fragments.empty(); const auto &g = build.g; for (const auto &v : vertices_range(g)) { @@ -2537,20 +2530,21 @@ void recordResources(RoseResources &resources, } static -void recordLongLiterals(build_context &bc, const RoseProgram &program) { +void recordLongLiterals(vector &longLiterals, + const RoseProgram &program) { for (const auto &ri : program) { if (const auto *ri_check = dynamic_cast(ri.get())) { DEBUG_PRINTF("found CHECK_LONG_LIT for string '%s'\n", escapeString(ri_check->literal).c_str()); - bc.longLiterals.emplace_back(ri_check->literal, false); + longLiterals.emplace_back(ri_check->literal, false); continue; } if (const auto *ri_check = dynamic_cast(ri.get())) { DEBUG_PRINTF("found CHECK_LONG_LIT_NOCASE for string '%s'\n", escapeString(ri_check->literal).c_str()); - bc.longLiterals.emplace_back(ri_check->literal, true); + longLiterals.emplace_back(ri_check->literal, true); } } } @@ -2569,7 +2563,7 @@ u32 writeProgram(build_context &bc, RoseProgram &&program) { } recordResources(bc.resources, program); - recordLongLiterals(bc, program); + recordLongLiterals(bc.longLiterals, program); u32 len = 0; auto prog_bytecode = writeProgram(bc.engine_blob, program, &len); @@ -2581,7 +2575,7 @@ u32 writeProgram(build_context &bc, RoseProgram &&program) { } static -u32 writeActiveLeftIter(build_context &bc, +u32 writeActiveLeftIter(RoseEngineBlob &engine_blob, const vector &leftInfoTable) { vector keys; for (size_t i = 0; i < leftInfoTable.size(); i++) { @@ -2597,9 +2591,8 @@ u32 writeActiveLeftIter(build_context &bc, return 0; } - vector iter; - mmbBuildSparseIterator(iter, keys, verify_u32(leftInfoTable.size())); - return bc.engine_blob.add_iterator(iter); + auto iter = mmbBuildSparseIterator(keys, verify_u32(leftInfoTable.size())); + return engine_blob.add_iterator(iter); } static @@ -2726,19 +2719,20 @@ void writeLookaroundTables(build_context &bc, RoseEngine &proto) { } static -void writeDkeyInfo(const ReportManager &rm, build_context &bc, +void writeDkeyInfo(const ReportManager &rm, RoseEngineBlob &engine_blob, RoseEngine &proto) { const auto inv_dkeys = rm.getDkeyToReportTable(); - proto.invDkeyOffset = bc.engine_blob.add_range(inv_dkeys); + proto.invDkeyOffset = engine_blob.add_range(inv_dkeys); proto.dkeyCount = rm.numDkeys(); proto.dkeyLogSize = fatbit_size(proto.dkeyCount); } static -void writeLeftInfo(build_context &bc, RoseEngine &proto, +void writeLeftInfo(RoseEngineBlob &engine_blob, RoseEngine &proto, const vector &leftInfoTable) { - proto.leftOffset = bc.engine_blob.add_range(leftInfoTable); - proto.activeLeftIterOffset = writeActiveLeftIter(bc, leftInfoTable); + proto.leftOffset = engine_blob.add_range(leftInfoTable); + proto.activeLeftIterOffset + = writeActiveLeftIter(engine_blob, leftInfoTable); proto.roseCount = verify_u32(leftInfoTable.size()); proto.activeLeftCount = verify_u32(leftInfoTable.size()); proto.rosePrefixCount = countRosePrefixes(leftInfoTable); @@ -3592,10 +3586,11 @@ void makeRoleLookaround(const RoseBuildImpl &build, build_context &bc, } static -void makeRoleCheckLeftfix(const RoseBuildImpl &build, build_context &bc, +void makeRoleCheckLeftfix(const RoseBuildImpl &build, + const map &leftfix_info, RoseVertex v, RoseProgram &program) { - auto it = bc.leftfix_info.find(v); - if (it == end(bc.leftfix_info)) { + auto it = leftfix_info.find(v); + if (it == end(leftfix_info)) { return; } const left_build_info &lni = it->second; @@ -3623,7 +3618,8 @@ void makeRoleCheckLeftfix(const RoseBuildImpl &build, build_context &bc, } static -void makeRoleAnchoredDelay(const RoseBuildImpl &build, build_context &bc, +void makeRoleAnchoredDelay(const RoseBuildImpl &build, + u32 floatingMinLiteralMatchOffset, RoseVertex v, RoseProgram &program) { // Only relevant for roles that can be triggered by the anchored table. if (!build.isAnchored(v)) { @@ -3632,7 +3628,7 @@ void makeRoleAnchoredDelay(const RoseBuildImpl &build, build_context &bc, // If this match cannot occur after floatingMinLiteralMatchOffset, we do // not need this check. - if (build.g[v].max_offset <= bc.floatingMinLiteralMatchOffset) { + if (build.g[v].max_offset <= floatingMinLiteralMatchOffset) { return; } @@ -3662,9 +3658,9 @@ void makeDedupeSom(const RoseBuildImpl &build, const Report &report, } static -void makeCatchup(const RoseBuildImpl &build, const build_context &bc, +void makeCatchup(const RoseBuildImpl &build, bool needs_catchup, const flat_set &reports, RoseProgram &program) { - if (!bc.needs_catchup) { + if (!needs_catchup) { return; } @@ -3685,9 +3681,9 @@ void makeCatchup(const RoseBuildImpl &build, const build_context &bc, } static -void makeCatchupMpv(const RoseBuildImpl &build, const build_context &bc, +void makeCatchupMpv(const RoseBuildImpl &build, bool needs_mpv_catchup, ReportID id, RoseProgram &program) { - if (!bc.needs_mpv_catchup) { + if (!needs_mpv_catchup) { return; } @@ -3931,7 +3927,7 @@ void makeRoleReports(const RoseBuildImpl &build, const build_context &bc, } const auto &reports = g[v].reports; - makeCatchup(build, bc, reports, program); + makeCatchup(build, bc.needs_catchup, reports, program); RoseProgram report_block; for (ReportID id : reports) { @@ -4067,12 +4063,12 @@ void makeRoleInfixTriggers(const RoseBuildImpl &build, const build_context &bc, } static -void makeRoleSetState(const build_context &bc, RoseVertex v, - RoseProgram &program) { +void makeRoleSetState(const unordered_map &roleStateIndices, + RoseVertex v, RoseProgram &program) { // We only need this instruction if a state index has been assigned to this // vertex. - auto it = bc.roleStateIndices.find(v); - if (it == end(bc.roleStateIndices)) { + auto it = roleStateIndices.find(v); + if (it == end(roleStateIndices)) { return; } program.add_before_end(make_unique(it->second)); @@ -4181,7 +4177,8 @@ RoseProgram makeProgram(const RoseBuildImpl &build, build_context &bc, // First, add program instructions that enforce preconditions without // effects. - makeRoleAnchoredDelay(build, bc, v, program); + makeRoleAnchoredDelay(build, prog_build.floatingMinLiteralMatchOffset, v, + program); if (onlyAtEod(build, v)) { DEBUG_PRINTF("only at eod\n"); @@ -4201,7 +4198,7 @@ RoseProgram makeProgram(const RoseBuildImpl &build, build_context &bc, } makeRoleLookaround(build, bc, v, program); - makeRoleCheckLeftfix(build, bc, v, program); + makeRoleCheckLeftfix(build, bc.leftfix_info, v, program); // Next, we can add program instructions that have effects. This must be // done as a series of blocks, as some of them (like reports) are @@ -4228,7 +4225,7 @@ RoseProgram makeProgram(const RoseBuildImpl &build, build_context &bc, effects_block.add_block(move(suffix_block)); RoseProgram state_block; - makeRoleSetState(bc, v, state_block); + makeRoleSetState(bc.roleStateIndices, v, state_block); effects_block.add_block(move(state_block)); // Note: EOD eager reports may generate a CHECK_ONLY_EOD instruction (if @@ -4279,11 +4276,11 @@ void makeBoundaryPrograms(const RoseBuildImpl &build, build_context &bc, } static -void assignStateIndices(const RoseBuildImpl &build, build_context &bc) { +unordered_map assignStateIndices(const RoseBuildImpl &build) { const auto &g = build.g; u32 state = 0; - + unordered_map roleStateIndices; for (auto v : vertices_range(g)) { // Virtual vertices (starts, EOD accept vertices) never need state // indices. @@ -4306,12 +4303,13 @@ void assignStateIndices(const RoseBuildImpl &build, build_context &bc) { } /* TODO: also don't need a state index if all edges are nfa based */ - bc.roleStateIndices.emplace(v, state++); + roleStateIndices.emplace(v, state++); } DEBUG_PRINTF("assigned %u states (from %zu vertices)\n", state, num_vertices(g)); - bc.numStates = state; + + return roleStateIndices; } static @@ -4426,8 +4424,7 @@ void addPredBlockSingle(u32 pred_state, RoseProgram &pred_block, } static -void addPredBlocksAny(const build_context &bc, - map &pred_blocks, +void addPredBlocksAny(map &pred_blocks, u32 num_states, RoseProgram &program) { RoseProgram sparse_program; @@ -4437,7 +4434,7 @@ void addPredBlocksAny(const build_context &bc, } const RoseInstruction *end_inst = sparse_program.end_instruction(); - auto ri = make_unique(bc.numStates, keys, end_inst); + auto ri = make_unique(num_states, keys, end_inst); sparse_program.add_before_end(move(ri)); RoseProgram &block = pred_blocks.begin()->second; @@ -4446,9 +4443,8 @@ void addPredBlocksAny(const build_context &bc, } static -void addPredBlocksMulti(const build_context &bc, - map &pred_blocks, - RoseProgram &program) { +void addPredBlocksMulti(map &pred_blocks, + u32 num_states, RoseProgram &program) { assert(!pred_blocks.empty()); RoseProgram sparse_program; @@ -4456,8 +4452,7 @@ void addPredBlocksMulti(const build_context &bc, vector> jump_table; // BEGIN instruction. - auto ri_begin = - make_unique(bc.numStates, end_inst); + auto ri_begin = make_unique(num_states, end_inst); RoseInstrSparseIterBegin *begin_inst = ri_begin.get(); sparse_program.add_before_end(move(ri_begin)); @@ -4498,7 +4493,7 @@ void addPredBlocksMulti(const build_context &bc, } static -void addPredBlocks(const build_context &bc, map &pred_blocks, +void addPredBlocks(map &pred_blocks, u32 num_states, RoseProgram &program) { // Trim empty blocks, if any exist. for (auto it = pred_blocks.begin(); it != pred_blocks.end();) { @@ -4527,11 +4522,11 @@ void addPredBlocks(const build_context &bc, map &pred_blocks, return RoseProgramEquivalence()(*begin(blocks), block); })) { DEBUG_PRINTF("all blocks equiv\n"); - addPredBlocksAny(bc, pred_blocks, program); + addPredBlocksAny(pred_blocks, num_states, program); return; } - addPredBlocksMulti(bc, pred_blocks, program); + addPredBlocksMulti(pred_blocks, num_states, program); } static @@ -4658,14 +4653,15 @@ u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id) { } static -void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc, - u32 lit_id, const vector &lit_edges, +void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, u32 lit_id, + const vector &lit_edges, + u32 floatingMinLiteralMatchOffset, RoseProgram &program) { if (lit_edges.empty()) { return; } - if (bc.floatingMinLiteralMatchOffset == 0) { + if (floatingMinLiteralMatchOffset == 0) { return; } @@ -4677,18 +4673,17 @@ void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc, const auto &lit = build.literals.right.at(lit_id); size_t min_len = lit.elength(); u32 min_offset = findMinOffset(build, lit_id); - DEBUG_PRINTF("has min_len=%zu, min_offset=%u, " - "global min is %u\n", min_len, min_offset, - bc.floatingMinLiteralMatchOffset); + DEBUG_PRINTF("has min_len=%zu, min_offset=%u, global min is %u\n", min_len, + min_offset, floatingMinLiteralMatchOffset); // If we can't match before the min offset, we don't need the check. - if (min_len >= bc.floatingMinLiteralMatchOffset) { + if (min_len >= floatingMinLiteralMatchOffset) { DEBUG_PRINTF("no need for check, min is %u\n", - bc.floatingMinLiteralMatchOffset); + floatingMinLiteralMatchOffset); return; } - assert(min_offset >= bc.floatingMinLiteralMatchOffset); + assert(min_offset >= floatingMinLiteralMatchOffset); assert(min_offset < UINT32_MAX); DEBUG_PRINTF("adding lit early check, min_offset=%u\n", min_offset); @@ -4698,13 +4693,13 @@ void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc, } static -void makeCheckLiteralInstruction(const RoseBuildImpl &build, - const build_context &bc, u32 lit_id, +void makeCheckLiteralInstruction(const RoseBuildImpl &build, u32 lit_id, + size_t longLitLengthThreshold, RoseProgram &program) { - assert(bc.longLitLengthThreshold > 0); + assert(longLitLengthThreshold > 0); DEBUG_PRINTF("lit_id=%u, long lit threshold %zu\n", lit_id, - bc.longLitLengthThreshold); + longLitLengthThreshold); if (build.isDelayed(lit_id)) { return; @@ -4722,7 +4717,7 @@ void makeCheckLiteralInstruction(const RoseBuildImpl &build, throw ResourceLimitError(); } - if (lit.s.length() <= bc.longLitLengthThreshold) { + if (lit.s.length() <= longLitLengthThreshold) { DEBUG_PRINTF("is a medium-length literal\n"); const auto *end_inst = program.end_instruction(); unique_ptr ri; @@ -4776,7 +4771,8 @@ RoseProgram buildLitInitialProgram(const RoseBuildImpl &build, RoseProgram program; // Check long literal info. - makeCheckLiteralInstruction(build, bc, lit_id, program); + makeCheckLiteralInstruction(build, lit_id, bc.longLitLengthThreshold, + program); // Check lit mask. makeCheckLitMaskInstruction(build, bc, lit_id, program); @@ -4792,7 +4788,9 @@ RoseProgram buildLitInitialProgram(const RoseBuildImpl &build, makePushDelayedInstructions(build, prog_build, lit_id, program); // Add pre-check for early literals in the floating table. - makeCheckLitEarlyInstruction(build, bc, lit_id, lit_edges, program); + makeCheckLitEarlyInstruction(build, lit_id, lit_edges, + prog_build.floatingMinLiteralMatchOffset, + program); return program; } @@ -4827,7 +4825,7 @@ RoseProgram buildLiteralProgram(const RoseBuildImpl &build, build_context &bc, // Add blocks to deal with non-root edges (triggered by sparse iterator or // mmbit_isset checks). - addPredBlocks(bc, pred_blocks, program); + addPredBlocks(pred_blocks, bc.roleStateIndices.size(), program); // Add blocks to handle root roles. for (const auto &e : lit_edges) { @@ -4896,7 +4894,7 @@ RoseProgram assembleProgramBlocks(vector &&blocks) { static u32 writeLiteralProgram(const RoseBuildImpl &build, build_context &bc, - ProgramBuild &prog_build, const flat_set &lit_ids, + ProgramBuild &prog_build, const vector &lit_ids, const map> &lit_edge_map, bool is_anchored_program) { assert(!lit_ids.empty()); @@ -4947,7 +4945,7 @@ u32 writeLiteralProgram(const RoseBuildImpl &build, build_context &bc, static u32 writeDelayRebuildProgram(const RoseBuildImpl &build, build_context &bc, ProgramBuild &prog_build, - const flat_set &lit_ids) { + const vector &lit_ids) { assert(!lit_ids.empty()); if (!build.cc.streaming) { @@ -4964,7 +4962,8 @@ u32 writeDelayRebuildProgram(const RoseBuildImpl &build, build_context &bc, } RoseProgram prog; - makeCheckLiteralInstruction(build, bc, lit_id, prog); + makeCheckLiteralInstruction(build, lit_id, bc.longLitLengthThreshold, + prog); makeCheckLitMaskInstruction(build, bc, lit_id, prog); makePushDelayedInstructions(build, prog_build, lit_id, prog); blocks.push_back(move(prog)); @@ -5046,7 +5045,8 @@ rose_literal_id getFragment(const rose_literal_id &lit) { } static -void groupByFragment(RoseBuildImpl &build) { +vector groupByFragment(const RoseBuildImpl &build) { + vector fragments; u32 frag_id = 0; struct FragmentInfo { @@ -5055,9 +5055,6 @@ void groupByFragment(RoseBuildImpl &build) { }; map frag_info; - map lit_to_frag; - - auto &fragments = build.fragments; for (const auto &m : build.literals.right) { const u32 lit_id = m.first; @@ -5077,8 +5074,7 @@ void groupByFragment(RoseBuildImpl &build) { auto groups = info.group_mask; if (lit.s.length() < ROSE_SHORT_LITERAL_LEN_MAX) { - lit_to_frag.emplace(lit_id, frag_id); - fragments.emplace_back(frag_id, groups); + fragments.emplace_back(frag_id, groups, lit_id); frag_id++; continue; } @@ -5090,53 +5086,38 @@ void groupByFragment(RoseBuildImpl &build) { fi.groups |= groups; } - for (const auto &m : frag_info) { - const auto &fi = m.second; + for (auto &m : frag_info) { + auto &fi = m.second; DEBUG_PRINTF("frag %s -> ids: %s\n", dumpString(m.first.s).c_str(), as_string_list(fi.lit_ids).c_str()); - fragments.emplace_back(frag_id, fi.groups); - for (const auto lit_id : fi.lit_ids) { - assert(!contains(lit_to_frag, lit_id)); - lit_to_frag.emplace(lit_id, frag_id); - } + sort(fi.lit_ids.begin(), fi.lit_ids.end()); /* to match old behaviour */ + fragments.emplace_back(frag_id, fi.groups, move(fi.lit_ids)); frag_id++; + assert(frag_id == fragments.size()); } - // Write the fragment IDs into the literal_info structures. - for (const auto &m : lit_to_frag) { - build.literal_info[m.first].fragment_id = m.second; - } + return fragments; } /** * \brief Build the interpreter programs for each literal. */ static -void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc, +void buildLiteralPrograms(const RoseBuildImpl &build, + vector &fragments, build_context &bc, ProgramBuild &prog_build) { - // Build a reverse mapping from fragment -> {lit_id, lit_id,...} - map> frag_to_lit_map; - for (u32 lit_id = 0; lit_id < verify_u32(build.literal_info.size()); - lit_id++) { - const auto &info = build.literal_info[lit_id]; - if (info.fragment_id == MO_INVALID_IDX) { - continue; - } - frag_to_lit_map[info.fragment_id].insert(lit_id); - } - - DEBUG_PRINTF("%zu fragments\n", build.fragments.size()); + DEBUG_PRINTF("%zu fragments\n", fragments.size()); auto lit_edge_map = findEdgesByLiteral(build); - for (auto &frag : build.fragments) { - const auto &lit_ids = frag_to_lit_map[frag.fragment_id]; + for (auto &frag : fragments) { DEBUG_PRINTF("frag_id=%u, lit_ids=[%s]\n", frag.fragment_id, - as_string_list(lit_ids).c_str()); + as_string_list(frag.lit_ids).c_str()); - frag.lit_program_offset = writeLiteralProgram( - build, bc, prog_build, lit_ids, lit_edge_map, false); - frag.delay_program_offset = - writeDelayRebuildProgram(build, bc, prog_build, lit_ids); + frag.lit_program_offset + = writeLiteralProgram(build, bc, prog_build, frag.lit_ids, + lit_edge_map, false); + frag.delay_program_offset + = writeDelayRebuildProgram(build, bc, prog_build, frag.lit_ids); } } @@ -5147,39 +5128,40 @@ void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc, * programs. */ static -pair writeDelayPrograms(const RoseBuildImpl &build, build_context &bc, +pair writeDelayPrograms(const RoseBuildImpl &build, + const vector &fragments, + build_context &bc, ProgramBuild &prog_build) { auto lit_edge_map = findEdgesByLiteral(build); vector programs; // program offsets indexed by (delayed) lit id unordered_map cache; // program offsets we have already seen - for (const auto &lit_id : build.literals.right | map_keys) { - const auto &info = build.literal_info.at(lit_id); + for (const auto &frag : fragments) { + for (const u32 lit_id : frag.lit_ids) { + const auto &info = build.literal_info.at(lit_id); - if (info.fragment_id == MO_INVALID_IDX) { - continue; // Unused literal. - } + for (const auto &delayed_lit_id : info.delayed_ids) { + DEBUG_PRINTF("lit id %u delay id %u\n", lit_id, delayed_lit_id); + u32 offset = writeLiteralProgram(build, bc, prog_build, + {delayed_lit_id}, lit_edge_map, + false); - for (const auto &delayed_lit_id : info.delayed_ids) { - DEBUG_PRINTF("lit id %u delay id %u\n", lit_id, delayed_lit_id); - u32 offset = writeLiteralProgram( - build, bc, prog_build, {delayed_lit_id}, lit_edge_map, false); - - u32 delay_id; - auto it = cache.find(offset); - if (it != end(cache)) { - delay_id = it->second; - DEBUG_PRINTF("reusing delay_id %u for offset %u\n", delay_id, - offset); - } else { - delay_id = verify_u32(programs.size()); - programs.push_back(offset); - cache.emplace(offset, delay_id); - DEBUG_PRINTF("assigned new delay_id %u for offset %u\n", - delay_id, offset); + u32 delay_id; + auto it = cache.find(offset); + if (it != end(cache)) { + delay_id = it->second; + DEBUG_PRINTF("reusing delay_id %u for offset %u\n", + delay_id, offset); + } else { + delay_id = verify_u32(programs.size()); + programs.push_back(offset); + cache.emplace(offset, delay_id); + DEBUG_PRINTF("assigned new delay_id %u for offset %u\n", + delay_id, offset); + } + prog_build.delay_programs.emplace(delayed_lit_id, delay_id); } - prog_build.delay_programs.emplace(delayed_lit_id, delay_id); } } @@ -5195,6 +5177,7 @@ pair writeDelayPrograms(const RoseBuildImpl &build, build_context &bc, */ static pair writeAnchoredPrograms(const RoseBuildImpl &build, + const vector &fragments, build_context &bc, ProgramBuild &prog_build) { auto lit_edge_map = findEdgesByLiteral(build); @@ -5202,44 +5185,42 @@ pair writeAnchoredPrograms(const RoseBuildImpl &build, vector programs; // program offsets indexed by anchored id unordered_map cache; // program offsets we have already seen - for (const auto &m : build.literals.right) { - u32 lit_id = m.first; - const auto &lit = m.second; + for (const auto &frag : fragments) { + for (const u32 lit_id : frag.lit_ids) { + const auto &lit = build.literals.right.at(lit_id); - if (lit.table != ROSE_ANCHORED) { - continue; + if (lit.table != ROSE_ANCHORED) { + continue; + } + + // If this anchored literal can never match past + // floatingMinLiteralMatchOffset, we will never have to record it. + if (findMaxOffset(build, lit_id) + <= prog_build.floatingMinLiteralMatchOffset) { + DEBUG_PRINTF("can never match after " + "floatingMinLiteralMatchOffset=%u\n", + prog_build.floatingMinLiteralMatchOffset); + continue; + } + + u32 offset = writeLiteralProgram(build, bc, prog_build, {lit_id}, + lit_edge_map, true); + DEBUG_PRINTF("lit_id=%u -> anch prog at %u\n", lit_id, offset); + + u32 anch_id; + auto it = cache.find(offset); + if (it != end(cache)) { + anch_id = it->second; + DEBUG_PRINTF("reusing anch_id %u for offset %u\n", anch_id, offset); + } else { + anch_id = verify_u32(programs.size()); + programs.push_back(offset); + cache.emplace(offset, anch_id); + DEBUG_PRINTF("assigned new anch_id %u for offset %u\n", anch_id, + offset); + } + prog_build.anchored_programs.emplace(lit_id, anch_id); } - - if (build.literal_info.at(lit_id).fragment_id == MO_INVALID_IDX) { - continue; // Unused literal. - } - - // If this anchored literal can never match past - // floatingMinLiteralMatchOffset, we will never have to record it. - if (findMaxOffset(build, lit_id) <= bc.floatingMinLiteralMatchOffset) { - DEBUG_PRINTF("can never match after " - "floatingMinLiteralMatchOffset=%u\n", - bc.floatingMinLiteralMatchOffset); - continue; - } - - u32 offset = writeLiteralProgram(build, bc, prog_build, {lit_id}, - lit_edge_map, true); - DEBUG_PRINTF("lit_id=%u -> anch prog at %u\n", lit_id, offset); - - u32 anch_id; - auto it = cache.find(offset); - if (it != end(cache)) { - anch_id = it->second; - DEBUG_PRINTF("reusing anch_id %u for offset %u\n", anch_id, offset); - } else { - anch_id = verify_u32(programs.size()); - programs.push_back(offset); - cache.emplace(offset, anch_id); - DEBUG_PRINTF("assigned new anch_id %u for offset %u\n", anch_id, - offset); - } - prog_build.anchored_programs.emplace(lit_id, anch_id); } DEBUG_PRINTF("%zu anchored programs\n", programs.size()); @@ -5283,7 +5264,7 @@ pair buildReportPrograms(const RoseBuildImpl &build, for (ReportID id : reports) { RoseProgram program; const bool has_som = false; - makeCatchupMpv(build, bc, id, program); + makeCatchupMpv(build, bc.needs_mpv_catchup, id, program); makeReport(build, id, has_som, program); applyFinalSpecialisation(program); u32 offset = writeProgram(bc, move(program)); @@ -5300,7 +5281,7 @@ pair buildReportPrograms(const RoseBuildImpl &build, static RoseProgram makeEodAnchorProgram(const RoseBuildImpl &build, - const build_context &bc, + bool needs_catchup, ProgramBuild &prog_build, const RoseEdge &e, const bool multiple_preds) { const RoseGraph &g = build.g; @@ -5318,7 +5299,7 @@ RoseProgram makeEodAnchorProgram(const RoseBuildImpl &build, } const auto &reports = g[v].reports; - makeCatchup(build, bc, reports, program); + makeCatchup(build, needs_catchup, reports, program); const bool has_som = false; RoseProgram report_block; @@ -5393,11 +5374,12 @@ void addEodAnchorProgram(const RoseBuildImpl &build, const build_context &bc, assert(contains(bc.roleStateIndices, u)); u32 pred_state = bc.roleStateIndices.at(u); pred_blocks[pred_state].add_block( - makeEodAnchorProgram(build, bc, prog_build, e, multiple_preds)); + makeEodAnchorProgram(build, bc.needs_catchup, prog_build, e, + multiple_preds)); } } - addPredBlocks(bc, pred_blocks, program); + addPredBlocks(pred_blocks, bc.roleStateIndices.size(), program); } static @@ -5588,7 +5570,7 @@ void fillMatcherDistances(const RoseBuildImpl &build, RoseEngine *engine) { static u32 writeEagerQueueIter(const set &eager, u32 leftfixBeginQueue, - u32 queue_count, build_context &bc) { + u32 queue_count, RoseEngineBlob &engine_blob) { if (eager.empty()) { return 0; } @@ -5599,9 +5581,8 @@ u32 writeEagerQueueIter(const set &eager, u32 leftfixBeginQueue, vec.push_back(q - leftfixBeginQueue); } - vector iter; - mmbBuildSparseIterator(iter, vec, queue_count - leftfixBeginQueue); - return bc.engine_blob.add_iterator(iter); + auto iter = mmbBuildSparseIterator(vec, queue_count - leftfixBeginQueue); + return engine_blob.add_iterator(iter); } static @@ -5707,6 +5688,19 @@ size_t calcLongLitThreshold(const RoseBuildImpl &build, return longLitLengthThreshold; } +static +map makeLeftQueueMap(const RoseGraph &g, + const map &leftfix_info) { + map lqm; + for (const auto &e : leftfix_info) { + left_id left(g[e.first].left); + assert(!contains(lqm, left) || lqm[left] == e.second.queue); + lqm[left] = e.second.queue; + } + + return lqm; +} + aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // We keep all our offsets, counts etc. in a prototype RoseEngine which we // will copy into the real one once it is allocated: we can't do this @@ -5730,16 +5724,16 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { historyRequired); DEBUG_PRINTF("longLitLengthThreshold=%zu\n", longLitLengthThreshold); - groupByFragment(*this); + vector fragments = groupByFragment(*this); - auto anchored_dfas = buildAnchoredDfas(*this); + auto anchored_dfas = buildAnchoredDfas(*this, fragments); build_context bc; - bc.floatingMinLiteralMatchOffset = - findMinFloatingLiteralMatch(*this, anchored_dfas); + u32 floatingMinLiteralMatchOffset + = findMinFloatingLiteralMatch(*this, anchored_dfas); bc.longLitLengthThreshold = longLitLengthThreshold; bc.needs_catchup = needsCatchup(*this, anchored_dfas); - recordResources(bc.resources, *this); + recordResources(bc.resources, *this, fragments); if (!anchored_dfas.empty()) { bc.resources.has_anchored = true; } @@ -5777,7 +5771,12 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { throw ResourceLimitError(); } - assignStateIndices(*this, bc); + // Enforce role table resource limit. + if (num_vertices(g) > cc.grey.limitRoseRoleCount) { + throw ResourceLimitError(); + } + + bc.roleStateIndices = assignStateIndices(*this); u32 laggedRoseCount = 0; vector leftInfoTable; @@ -5786,17 +5785,17 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { &laggedRoseCount, &historyRequired); // Information only needed for program construction. - ProgramBuild prog_build; + ProgramBuild prog_build(floatingMinLiteralMatchOffset); prog_build.vertex_group_map = getVertexGroupMap(*this); prog_build.squashable_groups = getSquashableGroups(*this); tie(proto.anchoredProgramOffset, proto.anchored_count) = - writeAnchoredPrograms(*this, bc, prog_build); + writeAnchoredPrograms(*this, fragments, bc, prog_build); tie(proto.delayProgramOffset, proto.delay_count) = - writeDelayPrograms(*this, bc, prog_build); + writeDelayPrograms(*this, fragments, bc, prog_build); - buildLiteralPrograms(*this, bc, prog_build); + buildLiteralPrograms(*this, fragments, bc, prog_build); proto.eodProgramOffset = writeEodProgram(*this, bc, prog_build, eodNfaIterOffset); @@ -5808,22 +5807,17 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { proto.lastByteHistoryIterOffset = buildLastByteIter(g, bc); proto.eagerIterOffset = writeEagerQueueIter( - eager_queues, proto.leftfixBeginQueue, queue_count, bc); + eager_queues, proto.leftfixBeginQueue, queue_count, bc.engine_blob); addSomRevNfas(bc, proto, ssm); writeLookaroundTables(bc, proto); - writeDkeyInfo(rm, bc, proto); - writeLeftInfo(bc, proto, leftInfoTable); - - // Enforce role table resource limit. - if (num_vertices(g) > cc.grey.limitRoseRoleCount) { - throw ResourceLimitError(); - } + writeDkeyInfo(rm, bc.engine_blob, proto); + writeLeftInfo(bc.engine_blob, proto, leftInfoTable); // Build anchored matcher. size_t asize = 0; - auto atable = buildAnchoredMatcher(*this, anchored_dfas, &asize); + auto atable = buildAnchoredMatcher(*this, fragments, anchored_dfas, &asize); if (atable) { proto.amatcherOffset = bc.engine_blob.add(atable.get(), asize, 64); } @@ -5831,7 +5825,8 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Build floating HWLM matcher. rose_group fgroups = 0; size_t fsize = 0; - auto ftable = buildFloatingMatcher(*this, bc.longLitLengthThreshold, + auto ftable = buildFloatingMatcher(*this, fragments, + bc.longLitLengthThreshold, &fgroups, &fsize, &historyRequired); if (ftable) { proto.fmatcherOffset = bc.engine_blob.add(ftable.get(), fsize, 64); @@ -5840,22 +5835,22 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Build delay rebuild HWLM matcher. size_t drsize = 0; - auto drtable = - buildDelayRebuildMatcher(*this, bc.longLitLengthThreshold, &drsize); + auto drtable = buildDelayRebuildMatcher(*this, fragments, + bc.longLitLengthThreshold, &drsize); if (drtable) { proto.drmatcherOffset = bc.engine_blob.add(drtable.get(), drsize, 64); } // Build EOD-anchored HWLM matcher. size_t esize = 0; - auto etable = buildEodAnchoredMatcher(*this, &esize); + auto etable = buildEodAnchoredMatcher(*this, fragments, &esize); if (etable) { proto.ematcherOffset = bc.engine_blob.add(etable.get(), esize, 64); } // Build small-block HWLM matcher. size_t sbsize = 0; - auto sbtable = buildSmallBlockMatcher(*this, &sbsize); + auto sbtable = buildSmallBlockMatcher(*this, fragments, &sbsize); if (sbtable) { proto.sbmatcherOffset = bc.engine_blob.add(sbtable.get(), sbsize, 64); } @@ -5873,7 +5868,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { assert(!cc.streaming || historyRequired <= max(cc.grey.maxHistoryAvailable, cc.grey.somMaxRevNfaLength)); - fillStateOffsets(*this, bc.numStates, proto.anchorStateSize, + fillStateOffsets(*this, bc.roleStateIndices.size(), proto.anchorStateSize, proto.activeArrayCount, proto.activeLeftCount, laggedRoseCount, longLitStreamStateRequired, historyRequired, &proto.stateOffsets); @@ -5883,9 +5878,9 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { writeNfaInfo(*this, bc, proto, no_retrigger_queues); scatter_plan_raw state_scatter = buildStateScatterPlan( - sizeof(u8), bc.numStates, proto.activeLeftCount, proto.rosePrefixCount, - proto.stateOffsets, cc.streaming, proto.activeArrayCount, - proto.outfixBeginQueue, proto.outfixEndQueue); + sizeof(u8), bc.roleStateIndices.size(), proto.activeLeftCount, + proto.rosePrefixCount, proto.stateOffsets, cc.streaming, + proto.activeArrayCount, proto.outfixBeginQueue, proto.outfixEndQueue); u32 currOffset; /* relative to base of RoseEngine */ if (!bc.engine_blob.empty()) { @@ -5910,7 +5905,8 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { proto.needsCatchup = bc.needs_catchup ? 1 : 0; - proto.runtimeImpl = pickRuntimeImpl(*this, bc, proto.outfixEndQueue); + proto.runtimeImpl = pickRuntimeImpl(*this, bc.resources, + proto.outfixEndQueue); proto.mpvTriggeredByLeaf = anyEndfixMpvTriggers(*this); proto.queueCount = queue_count; @@ -5918,10 +5914,10 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { proto.handledKeyCount = prog_build.handledKeys.size(); proto.handledKeyFatbitSize = fatbit_size(proto.handledKeyCount); - proto.rolesWithStateCount = bc.numStates; + proto.rolesWithStateCount = bc.roleStateIndices.size(); proto.initMpvNfa = mpv_as_outfix ? 0 : MO_INVALID_IDX; - proto.stateSize = mmbit_size(bc.numStates); + proto.stateSize = mmbit_size(bc.roleStateIndices.size()); proto.delay_fatbit_size = fatbit_size(proto.delay_count); proto.anchored_fatbit_size = fatbit_size(proto.anchored_count); @@ -5938,7 +5934,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { proto.fmatcherMaxBiAnchoredWidth = findMaxBAWidth(*this, ROSE_FLOATING); proto.minWidth = hasBoundaryReports(boundary) ? 0 : minWidth; proto.minWidthExcludingBoundaries = minWidth; - proto.floatingMinLiteralMatchOffset = bc.floatingMinLiteralMatchOffset; + proto.floatingMinLiteralMatchOffset = floatingMinLiteralMatchOffset; proto.maxBiAnchoredWidth = findMaxBAWidth(*this); proto.noFloatingRoots = hasNoFloatingRoots(); @@ -5977,7 +5973,8 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { DEBUG_PRINTF("rose done %p\n", engine.get()); - dumpRose(*this, engine.get()); + dumpRose(*this, fragments, makeLeftQueueMap(g, bc.leftfix_info), + bc.suffixes, engine.get()); return engine; } diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 00586f65..791a68ab 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -1669,7 +1669,7 @@ bool roleOffsetsAreValid(const RoseGraph &g) { #endif // NDEBUG aligned_unique_ptr RoseBuildImpl::buildRose(u32 minWidth) { - dumpRoseGraph(*this, nullptr, "rose_early.dot"); + dumpRoseGraph(*this, "rose_early.dot"); // Early check for Rose implementability. assert(canImplementGraphs(*this)); @@ -1780,7 +1780,7 @@ aligned_unique_ptr RoseBuildImpl::buildRose(u32 minWidth) { assert(roleOffsetsAreValid(g)); assert(historiesAreValid(g)); - dumpRoseGraph(*this, nullptr, "rose_pre_norm.dot"); + dumpRoseGraph(*this, "rose_pre_norm.dot"); return buildFinalEngine(minWidth); } diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 30dccb1a..a52830b0 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -112,8 +112,11 @@ string rose_off::str(void) const { class RoseGraphWriter { public: - RoseGraphWriter(const RoseBuildImpl &b_in, const RoseEngine *t_in) : - build(b_in), t(t_in) { + RoseGraphWriter(const RoseBuildImpl &b_in, const map &frag_map_in, + const map &lqm_in, + const map &sqm_in, const RoseEngine *t_in) + : frag_map(frag_map_in), leftfix_queue_map(lqm_in), + suffix_queue_map(sqm_in), build(b_in), t(t_in) { for (const auto &m : build.ghost) { ghost.insert(m.second); } @@ -160,8 +163,8 @@ public: if (g[v].suffix) { suffix_id suff(g[v].suffix); os << "\\n" << render_kind(suff) << " (top " << g[v].suffix.top; - auto it = build.suffix_queue_map.find(suff); - if (it != end(build.suffix_queue_map)) { + auto it = suffix_queue_map.find(suff); + if (it != end(suffix_queue_map)) { os << ", queue " << it->second; } os << ")"; @@ -174,8 +177,8 @@ public: if (g[v].left) { left_id left(g[v].left); os << "\\n" << render_kind(left) << " (queue "; - auto it = build.leftfix_queue_map.find(left); - if (it != end(build.leftfix_queue_map)) { + auto it = leftfix_queue_map.find(left); + if (it != end(leftfix_queue_map)) { os << it->second; } else { os << "??"; @@ -248,8 +251,8 @@ private: // Render the literal associated with a vertex. void writeLiteral(ostream &os, u32 id) const { os << "lit=" << id; - if (id < build.literal_info.size()) { - os << "/" << build.literal_info[id].fragment_id << " "; + if (contains(frag_map, id)) { + os << "/" << frag_map.at(id) << " "; } else { os << "/nofrag "; } @@ -269,13 +272,32 @@ private: } set ghost; + const map &frag_map; + const map &leftfix_queue_map; + const map &suffix_queue_map; const RoseBuildImpl &build; const RoseEngine *t; }; } // namespace +static +map makeFragMap(const vector &fragments) { + map fm; + for (const auto &f : fragments) { + for (u32 id : f.lit_ids) { + fm[id] = f.fragment_id; + } + } + + return fm; +} + +static void dumpRoseGraph(const RoseBuildImpl &build, const RoseEngine *t, + const vector &fragments, + const map &leftfix_queue_map, + const map &suffix_queue_map, const char *filename) { const Grey &grey = build.cc.grey; @@ -293,10 +315,16 @@ void dumpRoseGraph(const RoseBuildImpl &build, const RoseEngine *t, DEBUG_PRINTF("dumping graph to %s\n", ss.str().c_str()); ofstream os(ss.str()); - RoseGraphWriter writer(build, t); + auto frag_map = makeFragMap(fragments); + RoseGraphWriter writer(build, frag_map, leftfix_queue_map, suffix_queue_map, + t); writeGraphviz(os, build.g, writer, get(boost::vertex_index, build.g)); } +void dumpRoseGraph(const RoseBuildImpl &build, const char *filename) { + dumpRoseGraph(build, nullptr, {}, {}, {}, filename); +} + namespace { struct CompareVertexRole { explicit CompareVertexRole(const RoseGraph &g_in) : g(g_in) {} @@ -321,11 +349,14 @@ void lit_graph_info(const RoseBuildImpl &build, const rose_literal_info &li, } static -void dumpRoseLiterals(const RoseBuildImpl &build, const char *filename) { +void dumpRoseLiterals(const RoseBuildImpl &build, + const vector &fragments, + const Grey &grey) { const RoseGraph &g = build.g; + map frag_map = makeFragMap(fragments); DEBUG_PRINTF("dumping literals\n"); - ofstream os(filename); + ofstream os(grey.dumpPath + "rose_literals.txt"); os << "ROSE LITERALS: a total of " << build.literals.right.size() << " literals and " << num_vertices(g) << " roles." << endl << endl; @@ -353,8 +384,11 @@ void dumpRoseLiterals(const RoseBuildImpl &build, const char *filename) { break; } - os << " ID " << id << "/" << lit_info.fragment_id << ": \"" - << escapeString(s.get_string()) << "\"" + os << " ID " << id; + if (contains(frag_map, id)) { + os << "/" << frag_map.at(id); + } + os << ": \"" << escapeString(s.get_string()) << "\"" << " (len " << s.length() << ","; if (s.any_nocase()) { os << " nocase,"; @@ -833,7 +867,7 @@ void dumpMultipathShufti(ofstream &os, u32 len, const u8 *lo, const u8 *hi, #define PROGRAM_CASE(name) \ case ROSE_INSTR_##name: { \ os << " " << std::setw(4) << std::setfill('0') << (pc - pc_base) \ - << ": " #name " (" << (int)ROSE_INSTR_##name << ")" << endl; \ + << ": " #name "\n"; \ const auto *ri = (const struct ROSE_STRUCT_##name *)pc; #define PROGRAM_NEXT_INSTRUCTION \ @@ -1444,13 +1478,13 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { #undef PROGRAM_NEXT_INSTRUCTION static -void dumpRoseLitPrograms(const RoseBuildImpl &build, const RoseEngine *t, - const string &filename) { +void dumpRoseLitPrograms(const vector &fragments, + const RoseEngine *t, const string &filename) { ofstream os(filename); // Collect all programs referenced by a literal fragment. vector programs; - for (const auto &frag : build.fragments) { + for (const auto &frag : fragments) { if (frag.lit_program_offset) { programs.push_back(frag.lit_program_offset); } @@ -2185,18 +2219,21 @@ void roseDumpComponents(const RoseEngine *t, bool dump_raw, } static -void roseDumpPrograms(const RoseBuildImpl &build, const RoseEngine *t, +void roseDumpPrograms(const vector &fragments, const RoseEngine *t, const string &base) { - dumpRoseLitPrograms(build, t, base + "/rose_lit_programs.txt"); + dumpRoseLitPrograms(fragments, t, base + "/rose_lit_programs.txt"); dumpRoseEodPrograms(t, base + "/rose_eod_programs.txt"); dumpRoseReportPrograms(t, base + "/rose_report_programs.txt"); dumpRoseAnchoredPrograms(t, base + "/rose_anchored_programs.txt"); dumpRoseDelayPrograms(t, base + "/rose_delay_programs.txt"); } -void dumpRose(const RoseBuildImpl &build, const RoseEngine *t) { +void dumpRose(const RoseBuildImpl &build, const vector &fragments, + const map &leftfix_queue_map, + const map &suffix_queue_map, + const RoseEngine *t) { const Grey &grey = build.cc.grey; - + if (!grey.dumpFlags) { return; } @@ -2218,16 +2255,14 @@ void dumpRose(const RoseBuildImpl &build, const RoseEngine *t) { fclose(f); roseDumpComponents(t, false, grey.dumpPath); - roseDumpPrograms(build, t, grey.dumpPath); + roseDumpPrograms(fragments, t, grey.dumpPath); // Graph. - dumpRoseGraph(build, t, "rose.dot"); + dumpRoseGraph(build, t, fragments, leftfix_queue_map, suffix_queue_map, + "rose.dot"); - // Literals. - ss.str(""); - ss.clear(); - ss << grey.dumpPath << "rose_literals.txt"; - dumpRoseLiterals(build, ss.str().c_str()); + // Literals + dumpRoseLiterals(build, fragments, grey); f = fopen((grey.dumpPath + "/rose_struct.txt").c_str(), "w"); roseDumpStructRaw(t, f); diff --git a/src/rose/rose_build_dump.h b/src/rose/rose_build_dump.h index 601f5914..d4c620a3 100644 --- a/src/rose/rose_build_dump.h +++ b/src/rose/rose_build_dump.h @@ -29,6 +29,9 @@ #ifndef ROSE_BUILD_DUMP_H #define ROSE_BUILD_DUMP_H +#include "ue2common.h" + +#include #include #include @@ -39,30 +42,40 @@ namespace ue2 { class RoseBuildImpl; struct Grey; struct hwlmLiteral; +struct LitFragment; +struct left_id; +struct suffix_id; #ifdef DUMP_SUPPORT // Dump the Rose graph in graphviz representation. -void dumpRoseGraph(const RoseBuildImpl &build, const RoseEngine *t, - const char *filename); +void dumpRoseGraph(const RoseBuildImpl &build, const char *filename); -void dumpRose(const RoseBuildImpl &build, const RoseEngine *t); +void dumpRose(const RoseBuildImpl &build, + const std::vector &fragments, + const std::map &leftfix_queue_map, + const std::map &suffix_queue_map, + const RoseEngine *t); void dumpMatcherLiterals(const std::vector &lits, const std::string &name, const Grey &grey); + #else static UNUSED -void dumpRoseGraph(const RoseBuildImpl &, const RoseEngine *, const char *) { +void dumpRoseGraph(const RoseBuildImpl &, const char *) { } static UNUSED -void dumpRose(const RoseBuildImpl &, const RoseEngine *) { +void dumpRose(const RoseBuildImpl &, const std::vector &, + const std::map &, const std::map &, + const RoseEngine *) { } static UNUSED void dumpMatcherLiterals(const std::vector &, const std::string &, const Grey &) { } + #endif } // namespace ue2 diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index cafd0505..b4821b2b 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -264,7 +264,6 @@ struct rose_literal_info { ue2::flat_set vertices; rose_group group_mask = 0; u32 undelayed_id = MO_INVALID_IDX; - u32 fragment_id = MO_INVALID_IDX; //!< ID corresponding to literal prog. bool squash_group = false; bool requires_benefits = false; }; @@ -437,15 +436,6 @@ private: std::set all_reports(const OutfixInfo &outfix); -struct LitFragment { - LitFragment(u32 fragment_id_in, rose_group groups_in) - : fragment_id(fragment_id_in), groups(groups_in) {} - u32 fragment_id; - rose_group groups; - u32 lit_program_offset = ROSE_INVALID_PROG_OFFSET; - u32 delay_program_offset = ROSE_INVALID_PROG_OFFSET; -}; - // Concrete impl class class RoseBuildImpl : public RoseBuild { public: @@ -576,19 +566,11 @@ public: u32 ematcher_region_size; /**< number of bytes the eod table runs over */ - /** \brief Mapping from leftfix to queue ID (used in dump code). */ - unordered_map leftfix_queue_map; - - /** \brief Mapping from suffix to queue ID (used in dump code). */ - unordered_map suffix_queue_map; - /** \brief Mapping from anchored literal ID to the original literal suffix * present when the literal was added to the literal matcher. Used for * overlap calculation in history assignment. */ std::map anchoredLitSuffix; - std::vector fragments; - unordered_set transient; unordered_map rose_squash_masks; diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index 5625437b..7f1467d7 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -350,9 +350,6 @@ void findMoreLiteralMasks(RoseBuildImpl &build) { const u32 id = e.first; const auto &lit = e.second; - // This pass takes place before fragment IDs are assigned to literals. - assert(build.literal_info.at(id).fragment_id == MO_INVALID_IDX); - if (lit.delay || build.isDelayed(id)) { continue; } @@ -673,6 +670,7 @@ struct MatcherProto { */ static MatcherProto makeMatcherProto(const RoseBuildImpl &build, + const vector &fragments, rose_literal_table table, bool delay_rebuild, size_t max_len, u32 max_offset = ROSE_BOUND_INF) { MatcherProto mp; @@ -682,92 +680,91 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, assert(build.cc.streaming); } - for (const auto &e : build.literals.right) { - const u32 id = e.first; - if (build.literal_info.at(id).fragment_id == MO_INVALID_IDX) { - continue; - } + for (const auto &f : fragments) { + for (u32 id : f.lit_ids) { + const rose_literal_id &lit = build.literals.right.at(id); - if (e.second.delay) { - continue; /* delay id's are virtual-ish */ - } + if (lit.table != table) { + continue; /* wrong table */ + } - if (e.second.table != table) { - continue; /* wrong table */ - } + if (lit.delay) { + continue; /* delay id's are virtual-ish */ + } - assert(id < build.literal_info.size()); - const rose_literal_info &info = build.literal_info[id]; - /* Note: requires_benefits are handled in the literal entries */ - const ue2_literal &lit = e.second.s; + assert(id < build.literal_info.size()); + const auto &info = build.literal_info.at(id); - DEBUG_PRINTF("lit='%s' (len %zu)\n", escapeString(lit).c_str(), - lit.length()); + /* Note: requires_benefits are handled in the literal entries */ + const ue2_literal &s = lit.s; - // When building the delay rebuild table, we only want to include - // literals that have delayed variants. - if (delay_rebuild && info.delayed_ids.empty()) { - DEBUG_PRINTF("not needed for delay rebuild\n"); - continue; - } + DEBUG_PRINTF("lit='%s' (len %zu)\n", escapeString(s).c_str(), + s.length()); - if (max_offset != ROSE_BOUND_INF) { - u64a min_report = literalMinReportOffset(build, e.second, info); - if (min_report > max_offset) { - DEBUG_PRINTF("min report offset=%llu exceeds max_offset=%u\n", - min_report, max_offset); + // When building the delay rebuild table, we only want to include + // literals that have delayed variants. + if (delay_rebuild && info.delayed_ids.empty()) { + DEBUG_PRINTF("not needed for delay rebuild\n"); continue; } + + if (max_offset != ROSE_BOUND_INF) { + u64a min_report = literalMinReportOffset(build, lit, info); + if (min_report > max_offset) { + DEBUG_PRINTF("min report offset=%llu exceeds " + "max_offset=%u\n", min_report, max_offset); + continue; + } + } + + const vector &msk = lit.msk; + const vector &cmp = lit.cmp; + bool noruns = isNoRunsLiteral(build, id, info, max_len); + + size_t lit_hist_len = 0; + if (build.cc.streaming) { + lit_hist_len = max(msk.size(), min(s.length(), max_len)); + lit_hist_len = lit_hist_len ? lit_hist_len - 1 : 0; + } + DEBUG_PRINTF("lit requires %zu bytes of history\n", lit_hist_len); + assert(lit_hist_len <= build.cc.grey.maxHistoryAvailable); + + auto lit_final = s; // copy + + if (lit_final.length() > ROSE_SHORT_LITERAL_LEN_MAX) { + DEBUG_PRINTF("truncating to tail of length %zu\n", + size_t{ROSE_SHORT_LITERAL_LEN_MAX}); + lit_final.erase(0, lit_final.length() + - ROSE_SHORT_LITERAL_LEN_MAX); + // We shouldn't have set a threshold below 8 chars. + assert(msk.size() <= ROSE_SHORT_LITERAL_LEN_MAX); + assert(!noruns); + } + + const auto &s_final = lit_final.get_string(); + bool nocase = lit_final.any_nocase(); + + DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, " + "cmp=%s\n", f.fragment_id, + escapeString(s_final).c_str(), (int)nocase, noruns, + dumpMask(msk).c_str(), dumpMask(cmp).c_str()); + + if (!maskIsConsistent(s_final, nocase, msk, cmp)) { + DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); + continue; + } + + mp.accel_lits.emplace_back(s.get_string(), s.any_nocase(), msk, cmp, + info.group_mask); + mp.history_required = max(mp.history_required, lit_hist_len); + + u32 prog_offset = delay_rebuild ? f.delay_program_offset + : f.lit_program_offset; + const auto &groups = f.groups; + + mp.lits.emplace_back(move(s_final), nocase, noruns, prog_offset, + groups, msk, cmp); } - - const vector &msk = e.second.msk; - const vector &cmp = e.second.cmp; - bool noruns = isNoRunsLiteral(build, id, info, max_len); - - size_t lit_hist_len = 0; - if (build.cc.streaming) { - lit_hist_len = max(msk.size(), min(lit.length(), max_len)); - lit_hist_len = lit_hist_len ? lit_hist_len - 1 : 0; - } - DEBUG_PRINTF("lit requires %zu bytes of history\n", lit_hist_len); - assert(lit_hist_len <= build.cc.grey.maxHistoryAvailable); - - auto lit_final = lit; // copy - - if (lit_final.length() > ROSE_SHORT_LITERAL_LEN_MAX) { - DEBUG_PRINTF("truncating to tail of length %zu\n", - size_t{ROSE_SHORT_LITERAL_LEN_MAX}); - lit_final.erase(0, lit_final.length() - ROSE_SHORT_LITERAL_LEN_MAX); - // We shouldn't have set a threshold below 8 chars. - assert(msk.size() <= ROSE_SHORT_LITERAL_LEN_MAX); - assert(!noruns); - } - - const auto &s = lit_final.get_string(); - bool nocase = lit_final.any_nocase(); - - DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, " - "cmp=%s\n", - info.fragment_id, escapeString(s).c_str(), (int)nocase, - noruns, dumpMask(msk).c_str(), dumpMask(cmp).c_str()); - - if (!maskIsConsistent(s, nocase, msk, cmp)) { - DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); - continue; - } - - mp.accel_lits.emplace_back(lit.get_string(), lit.any_nocase(), msk, cmp, - info.group_mask); - mp.history_required = max(mp.history_required, lit_hist_len); - - assert(info.fragment_id < build.fragments.size()); - const auto &frag = build.fragments.at(info.fragment_id); - u32 prog_offset = - delay_rebuild ? frag.delay_program_offset : frag.lit_program_offset; - const auto &groups = frag.groups; - - mp.lits.emplace_back(move(s), nocase, noruns, prog_offset, groups, msk, - cmp); } sort_and_unique(mp.lits); @@ -809,14 +806,15 @@ void buildAccel(const RoseBuildImpl &build, const MatcherProto &mp, } aligned_unique_ptr -buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, - rose_group *fgroups, size_t *fsize, - size_t *historyRequired) { +buildFloatingMatcher(const RoseBuildImpl &build, + const vector &fragments, + size_t longLitLengthThreshold, rose_group *fgroups, + size_t *fsize, size_t *historyRequired) { *fsize = 0; *fgroups = 0; - auto mp = - makeMatcherProto(build, ROSE_FLOATING, false, longLitLengthThreshold); + auto mp = makeMatcherProto(build, fragments, ROSE_FLOATING, false, + longLitLengthThreshold); if (mp.lits.empty()) { DEBUG_PRINTF("empty floating matcher\n"); return nullptr; @@ -847,6 +845,7 @@ buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, } aligned_unique_ptr buildDelayRebuildMatcher(const RoseBuildImpl &build, + const vector &fragments, size_t longLitLengthThreshold, size_t *drsize) { *drsize = 0; @@ -856,8 +855,8 @@ aligned_unique_ptr buildDelayRebuildMatcher(const RoseBuildImpl &build, return nullptr; } - auto mp = - makeMatcherProto(build, ROSE_FLOATING, true, longLitLengthThreshold); + auto mp = makeMatcherProto(build, fragments, ROSE_FLOATING, true, + longLitLengthThreshold); if (mp.lits.empty()) { DEBUG_PRINTF("empty delay rebuild matcher\n"); return nullptr; @@ -877,8 +876,9 @@ aligned_unique_ptr buildDelayRebuildMatcher(const RoseBuildImpl &build, return hwlm; } -aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, - size_t *sbsize) { +aligned_unique_ptr +buildSmallBlockMatcher(const RoseBuildImpl &build, + const vector &fragments, size_t *sbsize) { *sbsize = 0; if (build.cc.streaming) { @@ -893,7 +893,7 @@ aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, return nullptr; } - auto mp = makeMatcherProto(build, ROSE_FLOATING, false, + auto mp = makeMatcherProto(build, fragments, ROSE_FLOATING, false, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); if (mp.lits.empty()) { DEBUG_PRINTF("no floating table\n"); @@ -903,9 +903,10 @@ aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, return nullptr; } - auto mp_anchored = - makeMatcherProto(build, ROSE_ANCHORED_SMALL_BLOCK, false, - ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); + auto mp_anchored = makeMatcherProto(build, fragments, + ROSE_ANCHORED_SMALL_BLOCK, false, + ROSE_SMALL_BLOCK_LEN, + ROSE_SMALL_BLOCK_LEN); if (mp_anchored.lits.empty()) { DEBUG_PRINTF("no small-block anchored literals\n"); return nullptr; @@ -937,11 +938,12 @@ aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, return hwlm; } -aligned_unique_ptr buildEodAnchoredMatcher(const RoseBuildImpl &build, - size_t *esize) { +aligned_unique_ptr +buildEodAnchoredMatcher(const RoseBuildImpl &build, + const vector &fragments, size_t *esize) { *esize = 0; - auto mp = makeMatcherProto(build, ROSE_EOD_ANCHORED, false, + auto mp = makeMatcherProto(build, fragments, ROSE_EOD_ANCHORED, false, build.ematcher_region_size); if (mp.lits.empty()) { diff --git a/src/rose/rose_build_matchers.h b/src/rose/rose_build_matchers.h index cb56037d..494a3aeb 100644 --- a/src/rose/rose_build_matchers.h +++ b/src/rose/rose_build_matchers.h @@ -36,25 +36,45 @@ #include "rose_build_impl.h" +#include + struct Grey; struct HWLM; namespace ue2 { +struct LitFragment { + LitFragment(u32 fragment_id_in, rose_group groups_in, u32 lit_id) + : fragment_id(fragment_id_in), groups(groups_in), lit_ids({lit_id}) {} + LitFragment(u32 fragment_id_in, rose_group groups_in, + std::vector lit_ids_in) + : fragment_id(fragment_id_in), groups(groups_in), + lit_ids(std::move(lit_ids_in)) {} + u32 fragment_id; + rose_group groups; + std::vector lit_ids; + u32 lit_program_offset = ROSE_INVALID_PROG_OFFSET; + u32 delay_program_offset = ROSE_INVALID_PROG_OFFSET; +}; + aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, + const std::vector &fragments, size_t longLitLengthThreshold, rose_group *fgroups, size_t *fsize, size_t *historyRequired); aligned_unique_ptr buildDelayRebuildMatcher(const RoseBuildImpl &build, + const std::vector &fragments, size_t longLitLengthThreshold, size_t *drsize); aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, + const std::vector &fragments, size_t *sbsize); aligned_unique_ptr buildEodAnchoredMatcher(const RoseBuildImpl &build, + const std::vector &fragments, size_t *esize); void findMoreLiteralMasks(RoseBuildImpl &build); diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index 9e030e8e..2fb76c77 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -450,8 +450,7 @@ void RoseInstrSparseIterBegin::write(void *dest, RoseEngineBlob &blob, jump_offsets.push_back(offset_map.at(jump.second)); } - vector iter; - mmbBuildSparseIterator(iter, keys, num_keys); + auto iter = mmbBuildSparseIterator(keys, num_keys); assert(!iter.empty()); inst->iter_offset = blob.add_iterator(iter); inst->jump_table = blob.add(jump_offsets.begin(), jump_offsets.end()); @@ -485,8 +484,7 @@ void RoseInstrSparseIterAny::write(void *dest, RoseEngineBlob &blob, inst->fail_jump = calc_jump(offset_map, this, target); // Write the multibit sparse iterator. - vector iter; - mmbBuildSparseIterator(iter, keys, num_keys); + auto iter = mmbBuildSparseIterator(keys, num_keys); assert(!iter.empty()); inst->iter_offset = blob.add_iterator(iter); } diff --git a/src/util/multibit_build.cpp b/src/util/multibit_build.cpp index c726bdf9..ad6a0d6a 100644 --- a/src/util/multibit_build.cpp +++ b/src/util/multibit_build.cpp @@ -155,9 +155,9 @@ void bfs(vector &out, const TreeNode &tree) { /** \brief Construct a sparse iterator over the values in \a bits for a * multibit of size \a total_bits. */ -void mmbBuildSparseIterator(vector &out, - const vector &bits, u32 total_bits) { - assert(out.empty()); +vector mmbBuildSparseIterator(const vector &bits, + u32 total_bits) { + vector out; assert(!bits.empty()); assert(total_bits > 0); assert(total_bits <= MMB_MAX_BITS); @@ -186,6 +186,7 @@ void mmbBuildSparseIterator(vector &out, #endif DEBUG_PRINTF("iter has %zu records\n", out.size()); + return out; } template diff --git a/src/util/multibit_build.h b/src/util/multibit_build.h index 951f1fb4..2d7b5fc2 100644 --- a/src/util/multibit_build.h +++ b/src/util/multibit_build.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -61,8 +61,8 @@ u32 mmbit_size(u32 total_bits); /** \brief Construct a sparse iterator over the values in \a bits for a * multibit of size \a total_bits. */ -void mmbBuildSparseIterator(std::vector &out, - const std::vector &bits, u32 total_bits); +std::vector +mmbBuildSparseIterator(const std::vector &bits, u32 total_bits); struct scatter_plan_raw; diff --git a/unit/internal/multi_bit.cpp b/unit/internal/multi_bit.cpp index 30dce493..2b0c7c79 100644 --- a/unit/internal/multi_bit.cpp +++ b/unit/internal/multi_bit.cpp @@ -782,7 +782,6 @@ TEST_P(MultiBitTest, InitRangePlanChunked) { TEST(MultiBit, SparseIteratorBegin1) { const u32 test_size = 100; - vector it; vector bits; bits.push_back(1); @@ -791,7 +790,7 @@ TEST(MultiBit, SparseIteratorBegin1) { bits.push_back(35); bits.push_back(68); - mmbBuildSparseIterator(it, bits, test_size); + auto it = mmbBuildSparseIterator(bits, test_size); //ASSERT_EQ(4U, it.size()); // Trivial initial test: all bits in 'bits' are on, all others are off @@ -820,7 +819,6 @@ TEST(MultiBit, SparseIteratorBegin1) { TEST(MultiBit, SparseIteratorBegin2) { const u32 test_size = 40000; - vector it; vector bits; bits.push_back(1); @@ -830,7 +828,7 @@ TEST(MultiBit, SparseIteratorBegin2) { bits.push_back(8920); bits.push_back(37000); - mmbBuildSparseIterator(it, bits, test_size); + auto it = mmbBuildSparseIterator(bits, test_size); //ASSERT_EQ(12U, it.size()); // Trivial initial test: all bits in 'bits' are on, all others are off @@ -859,7 +857,6 @@ TEST(MultiBit, SparseIteratorBegin2) { TEST(MultiBit, SparseIteratorNext1) { const u32 test_size = 100; - vector it; vector bits; bits.push_back(1); @@ -868,7 +865,7 @@ TEST(MultiBit, SparseIteratorNext1) { bits.push_back(35); bits.push_back(68); - mmbBuildSparseIterator(it, bits, test_size); + auto it = mmbBuildSparseIterator(bits, test_size); // Trivial initial test: all bits in 'bits' are on, all others are off mmbit_holder ba(test_size); @@ -924,7 +921,6 @@ TEST(MultiBit, SparseIteratorNext1) { TEST(MultiBit, SparseIteratorNext2) { const u32 test_size = 40000; - vector it; vector bits; bits.push_back(1); @@ -939,7 +935,7 @@ TEST(MultiBit, SparseIteratorNext2) { bits.push_back(37000); bits.push_back(39999); - mmbBuildSparseIterator(it, bits, test_size); + auto it = mmbBuildSparseIterator(bits, test_size); // Trivial initial test: all bits in 'bits' are on, all others are off mmbit_holder ba(test_size); @@ -995,7 +991,6 @@ TEST(MultiBit, SparseIteratorNext2) { TEST(MultiBit, SparseIteratorNextSmall) { const u32 test_size = 15; - vector it; vector bits; bits.push_back(1); @@ -1005,7 +1000,7 @@ TEST(MultiBit, SparseIteratorNextSmall) { bits.push_back(12); bits.push_back(14); - mmbBuildSparseIterator(it, bits, test_size); + auto it = mmbBuildSparseIterator(bits, test_size); // Trivial initial test: all bits in 'bits' are on, all others are off mmbit_holder ba(test_size); @@ -1064,13 +1059,12 @@ TEST_P(MultiBitTest, SparseIteratorBeginAll) { ASSERT_TRUE(ba != nullptr); // Put all our bits into the sparse iterator. - vector it; vector bits; bits.reserve(test_size / stride); for (u64a i = 0; i < test_size; i += stride) { bits.push_back(i); } - mmbBuildSparseIterator(it, bits, test_size); + auto it = mmbBuildSparseIterator(bits, test_size); // Switch all bits on in state. mmbit_clear(ba, test_size); @@ -1104,12 +1098,11 @@ TEST_P(MultiBitTest, SparseIteratorBeginThirds) { } // Put all our bits into the sparse iterator - vector it; vector bits(test_size); for (u32 i = 0; i != test_size; i++) { bits[i] = i; } - mmbBuildSparseIterator(it, bits, test_size); + auto it = mmbBuildSparseIterator(bits, test_size); // Switch every third bits on in state mmbit_clear(ba, test_size); @@ -1139,13 +1132,12 @@ TEST_P(MultiBitTest, SparseIteratorNextAll) { ASSERT_TRUE(ba != nullptr); // Put all our bits into the sparse iterator. - vector it; vector bits; bits.reserve(test_size / stride); for (u64a i = 0; i < test_size; i += stride) { bits.push_back(i); } - mmbBuildSparseIterator(it, bits, test_size); + auto it = mmbBuildSparseIterator(bits, test_size); // Switch all bits on in state mmbit_clear(ba, test_size); @@ -1182,14 +1174,13 @@ TEST_P(MultiBitTest, SparseIteratorNextExactStrided) { // Put all our bits into the sparse iterator and switch them on in the // state. mmbit_clear(ba, test_size); - vector it; vector bits; bits.reserve(test_size / stride); for (u64a i = 0; i < test_size; i += stride) { bits.push_back(i); mmbit_set(ba, test_size, i); } - mmbBuildSparseIterator(it, bits, test_size); + auto it = mmbBuildSparseIterator(bits, test_size); // Iterate over all bits. vector state(mmbit_sparse_iter_state_size(test_size)); @@ -1214,13 +1205,12 @@ TEST_P(MultiBitTest, SparseIteratorNextNone) { ASSERT_TRUE(ba != nullptr); // Put all our bits into the sparse iterator. - vector it; vector bits; bits.reserve(test_size / stride); for (u64a i = 0; i < test_size; i += stride) { bits.push_back(i); } - mmbBuildSparseIterator(it, bits, test_size); + auto it = mmbBuildSparseIterator(bits, test_size); // Switch only the first bit on mmbit_clear(ba, test_size); @@ -1243,13 +1233,12 @@ TEST_P(MultiBitTest, SparseIteratorUnsetAll) { ASSERT_TRUE(ba != nullptr); // Put all our bits into the sparse iterator - vector it; vector bits; bits.reserve(test_size / stride); for (u64a i = 0; i < test_size; i += stride) { bits.push_back(i); } - mmbBuildSparseIterator(it, bits, test_size); + auto it = mmbBuildSparseIterator(bits, test_size); // Switch all bits on mmbit_clear(ba, test_size); @@ -1283,9 +1272,8 @@ TEST_P(MultiBitTest, SparseIteratorUnsetHalves) { odd.push_back(i); } - vector it_even, it_odd; - mmbBuildSparseIterator(it_even, even, test_size); - mmbBuildSparseIterator(it_odd, odd, test_size); + auto it_even = mmbBuildSparseIterator(even, test_size); + auto it_odd = mmbBuildSparseIterator(odd, test_size); // Switch all bits on mmbit_clear(ba, test_size); From cf82924a3978b1002176cfa40cc839833c99cd7c Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 30 Mar 2017 16:33:11 +1100 Subject: [PATCH 199/326] depth: make constructor explicit --- src/nfa/castlecompile.cpp | 2 +- src/nfagraph/ng_anchored_dots.cpp | 16 +- src/nfagraph/ng_calc_components.cpp | 7 +- src/nfagraph/ng_expr_info.cpp | 2 +- src/nfagraph/ng_prefilter.cpp | 11 +- src/nfagraph/ng_repeat.cpp | 12 +- src/nfagraph/ng_som_util.cpp | 4 +- src/nfagraph/ng_width.cpp | 6 +- src/rose/rose_build_convert.cpp | 4 +- src/rose/rose_build_impl.h | 4 +- src/rose/rose_build_misc.cpp | 4 +- src/rose/rose_graph.h | 6 +- src/util/depth.h | 32 +++- unit/internal/depth.cpp | 11 +- unit/internal/nfagraph_repeat.cpp | 46 ++--- unit/internal/nfagraph_width.cpp | 40 ++--- unit/internal/repeat.cpp | 262 ++++++++++++++-------------- 17 files changed, 248 insertions(+), 221 deletions(-) diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index a7fe1e90..20239f56 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -501,7 +501,7 @@ buildCastle(const CastleProto &proto, // possibly means that we've got a repeat that we can't trigger. We do // need to cope with it though. if (contains(triggers, top)) { - min_period = minPeriod(triggers.at(top), cr, &is_reset); + min_period = depth(minPeriod(triggers.at(top), cr, &is_reset)); } if (min_period > pr.bounds.max) { diff --git a/src/nfagraph/ng_anchored_dots.cpp b/src/nfagraph/ng_anchored_dots.cpp index ed9c7f48..9a13376d 100644 --- a/src/nfagraph/ng_anchored_dots.cpp +++ b/src/nfagraph/ng_anchored_dots.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -208,7 +208,7 @@ void reformAnchoredRepeatsComponent(NGHolder &g, /* get bounds */ depth min; - depth max = 1; + depth max(1); if (selfLoop) { // A self-loop indicates that this is a '.+' or '.*' @@ -229,9 +229,9 @@ void reformAnchoredRepeatsComponent(NGHolder &g, } } - min = 0; + min = depth(0); } else { - min = 1; + min = depth(1); } *startBegin = min; @@ -326,8 +326,8 @@ void reformUnanchoredRepeatsComponent(NGHolder &g, } /* get bounds */ - depth min = 1; - depth max = 1; + depth min(1); + depth max(1); if (selfLoop) { // A self-loop indicates that this is a '.+' or '.*' @@ -349,7 +349,7 @@ void reformUnanchoredRepeatsComponent(NGHolder &g, DEBUG_PRINTF("min greater than one, skipping\n"); return; } - min = 0; + min = depth(0); } *startBegin += min; @@ -502,7 +502,7 @@ void collapseVariableDotRepeat(NGHolder &g, NFAVertex start, startEnd->str().c_str()); if (start == g.start && startEnd->is_infinite()) { - *startEnd = dots.size(); + *startEnd = depth(dots.size()); } else if (startEnd->is_finite()) { *startEnd += dots.size(); } diff --git a/src/nfagraph/ng_calc_components.cpp b/src/nfagraph/ng_calc_components.cpp index e0689366..54221c7b 100644 --- a/src/nfagraph/ng_calc_components.cpp +++ b/src/nfagraph/ng_calc_components.cpp @@ -372,15 +372,16 @@ deque> calcComponents(unique_ptr g, } bool shell_comp = false; - splitIntoComponents(std::move(g), comps, MAX_HEAD_SHELL_DEPTH, - MAX_TAIL_SHELL_DEPTH, &shell_comp); + splitIntoComponents(std::move(g), comps, depth(MAX_HEAD_SHELL_DEPTH), + depth(MAX_TAIL_SHELL_DEPTH), &shell_comp); if (shell_comp) { DEBUG_PRINTF("re-running on shell comp\n"); assert(!comps.empty()); auto sc = std::move(comps.back()); comps.pop_back(); - splitIntoComponents(std::move(sc), comps, 0, 0, &shell_comp); + splitIntoComponents(std::move(sc), comps, depth(0), depth(0), + &shell_comp); } DEBUG_PRINTF("finished; split into %zu components\n", comps.size()); diff --git a/src/nfagraph/ng_expr_info.cpp b/src/nfagraph/ng_expr_info.cpp index 1f601c61..9417b674 100644 --- a/src/nfagraph/ng_expr_info.cpp +++ b/src/nfagraph/ng_expr_info.cpp @@ -84,7 +84,7 @@ void checkVertex(const ReportManager &rm, const NGHolder &g, NFAVertex v, return; } if (is_any_start(v, g)) { - info.min = 0; + info.min = depth(0); info.max = max(info.max, depth(0)); return; } diff --git a/src/nfagraph/ng_prefilter.cpp b/src/nfagraph/ng_prefilter.cpp index 012b4e8d..3cd9d06d 100644 --- a/src/nfagraph/ng_prefilter.cpp +++ b/src/nfagraph/ng_prefilter.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,7 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief Prefilter Reductions. * * This file contains routines for reducing the size of an NFA graph that we @@ -92,13 +93,13 @@ struct RegionInfo { u32 id; //!< region id deque vertices; //!< vertices in the region CharReach reach; //!< union of region reach - depth minWidth = 0; //!< min width of region subgraph - depth maxWidth = depth::infinity(); //!< max width of region subgraph + depth minWidth{0}; //!< min width of region subgraph + depth maxWidth{depth::infinity()}; //!< max width of region subgraph bool atBoundary = false; //!< region is next to an accept // Bigger score is better. size_t score() const { - // FIXME: charreach should be a signal? + // TODO: charreach should be a signal? size_t numVertices = vertices.size(); if (atBoundary) { return numVertices - min(PENALTY_BOUNDARY, numVertices); diff --git a/src/nfagraph/ng_repeat.cpp b/src/nfagraph/ng_repeat.cpp index 96e3266f..c51618ea 100644 --- a/src/nfagraph/ng_repeat.cpp +++ b/src/nfagraph/ng_repeat.cpp @@ -105,8 +105,8 @@ typedef boost::filtered_graph> RepeatGraph; struct ReachSubgraph { vector vertices; - depth repeatMin = 0; - depth repeatMax = 0; + depth repeatMin{0}; + depth repeatMax{0}; u32 minPeriod = 1; bool is_reset = false; enum RepeatType historyType = REPEAT_RING; @@ -586,8 +586,8 @@ bool processSubgraph(const NGHolder &g, ReachSubgraph &rsi, range.first, range.second); return false; } - rsi.repeatMin = range.first; - rsi.repeatMax = range.second; + rsi.repeatMin = depth(range.first); + rsi.repeatMax = depth(range.second); // If we've got a self-loop anywhere, we've got inf max. if (anySelfLoop(g, rsi.vertices.begin(), rsi.vertices.end())) { @@ -2106,7 +2106,7 @@ void populateFixedTopInfo(const map &fixed_depth_tops, td = depth::infinity(); break; } - depth td_t = fixed_depth_tops.at(top); + depth td_t(fixed_depth_tops.at(top)); if (td == td_t) { continue; } else if (td == depth::infinity()) { @@ -2479,7 +2479,7 @@ bool isPureRepeat(const NGHolder &g, PureRepeat &repeat) { // have the same report set as the vertices in the repeat. if (repeat.bounds.min == depth(1) && g[g.start].reports == g[v].reports) { - repeat.bounds.min = 0; + repeat.bounds.min = depth(0); DEBUG_PRINTF("graph is %s repeat\n", repeat.bounds.str().c_str()); } else { DEBUG_PRINTF("not a supported repeat\n"); diff --git a/src/nfagraph/ng_som_util.cpp b/src/nfagraph/ng_som_util.cpp index c4337341..78a39119 100644 --- a/src/nfagraph/ng_som_util.cpp +++ b/src/nfagraph/ng_som_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -94,7 +94,7 @@ vector getDistancesFromSOM(const NGHolder &g_orig) { if (v_orig == g_orig.startDs || is_virtual_start(v_orig, g_orig)) { // StartDs and virtual starts always have zero depth. - d = DepthMinMax(0, 0); + d = DepthMinMax(depth(0), depth(0)); } else { u32 new_idx = g[v_new].index; d = temp_depths.at(new_idx); diff --git a/src/nfagraph/ng_width.cpp b/src/nfagraph/ng_width.cpp index d596b7b5..c2e9eb1a 100644 --- a/src/nfagraph/ng_width.cpp +++ b/src/nfagraph/ng_width.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -157,12 +157,12 @@ depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter, if (colors.at(NODE_ACCEPT) == boost::white_color) { acceptDepth = depth::unreachable(); } else { - acceptDepth = -1 * distance.at(NODE_ACCEPT); + acceptDepth = depth(-1 * distance.at(NODE_ACCEPT)); } if (colors.at(NODE_ACCEPT_EOD) == boost::white_color) { acceptEodDepth = depth::unreachable(); } else { - acceptEodDepth = -1 * distance.at(NODE_ACCEPT_EOD); + acceptEodDepth = depth(-1 * distance.at(NODE_ACCEPT_EOD)); } depth d; diff --git a/src/rose/rose_build_convert.cpp b/src/rose/rose_build_convert.cpp index 14eec7c7..89eac225 100644 --- a/src/rose/rose_build_convert.cpp +++ b/src/rose/rose_build_convert.cpp @@ -551,7 +551,7 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, && is_subset_of(exits, base_succ) && is_subset_of(base_succ, exits_and_repeat_verts)) { /* we have a jump edge */ - ri.repeatMin = 0; + ri.repeatMin = depth(0); } else { return false; } @@ -802,7 +802,7 @@ void convertAnchPrefixToBounds(RoseBuildImpl &tbi) { DepthMinMax bounds(pr.bounds); // copy if (delay_adj > bounds.min) { - bounds.min = 0; + bounds.min = depth(0); } else { bounds.min -= delay_adj; } diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index b4821b2b..93c0f18c 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -425,8 +425,8 @@ struct OutfixInfo { RevAccInfo rev_info; u32 maxBAWidth = 0; //!< max bi-anchored width - depth minWidth = depth::infinity(); - depth maxWidth = 0; + depth minWidth{depth::infinity()}; + depth maxWidth{0}; u64a maxOffset = 0; bool in_sbmatcher = false; //!< handled by small-block matcher. diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index c9403896..ef650714 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -970,7 +970,7 @@ void RoseSuffixInfo::reset(void) { rdfa.reset(); haig.reset(); tamarama.reset(); - dfa_min_width = 0; + dfa_min_width = depth(0); dfa_max_width = depth::infinity(); } @@ -1181,7 +1181,7 @@ void LeftEngInfo::reset(void) { tamarama.reset(); lag = 0; leftfix_report = MO_INVALID_IDX; - dfa_min_width = 0; + dfa_min_width = depth(0); dfa_max_width = depth::infinity(); } diff --git a/src/rose/rose_graph.h b/src/rose/rose_graph.h index c3af749f..b7e092bb 100644 --- a/src/rose/rose_graph.h +++ b/src/rose/rose_graph.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -85,7 +85,7 @@ struct LeftEngInfo { std::shared_ptr tamarama; u32 lag = 0U; ReportID leftfix_report = MO_INVALID_IDX; - depth dfa_min_width = 0; + depth dfa_min_width{0}; depth dfa_max_width = depth::infinity(); bool operator==(const LeftEngInfo &other) const { @@ -125,7 +125,7 @@ struct RoseSuffixInfo { std::shared_ptr haig; std::shared_ptr rdfa; std::shared_ptr tamarama; - depth dfa_min_width = 0; + depth dfa_min_width{0}; depth dfa_max_width = depth::infinity(); bool operator==(const RoseSuffixInfo &b) const; diff --git a/src/util/depth.h b/src/util/depth.h index 977fd0c3..b1fe2b1b 100644 --- a/src/util/depth.h +++ b/src/util/depth.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -54,9 +54,10 @@ struct DepthOverflowError {}; */ class depth { public: - depth() : val(val_unreachable) {} + /** \brief The default depth is special value "unreachable". */ + depth() = default; - depth(u32 v) : val(v) { + explicit depth(u32 v) : val(v) { if (v > max_value()) { DEBUG_PRINTF("depth %u too large to represent!\n", v); throw DepthOverflowError(); @@ -196,6 +197,29 @@ public: return *this; } + depth operator-(s32 d) const { + if (is_unreachable()) { + return unreachable(); + } + if (is_infinite()) { + return infinity(); + } + + s64a rv = val - d; + if (rv < 0 || (u64a)rv >= val_infinity) { + DEBUG_PRINTF("depth %lld too large to represent!\n", rv); + throw DepthOverflowError(); + } + + return depth((u32)rv); + } + + depth operator-=(s32 d) { + depth rv = *this - d; + *this = rv; + return *this; + } + #ifdef DUMP_SUPPORT /** \brief Render as a string, useful for debugging. */ std::string str() const; @@ -209,7 +233,7 @@ private: static constexpr u32 val_infinity = (1u << 31) - 1; static constexpr u32 val_unreachable = 1u << 31; - u32 val; + u32 val = val_unreachable; }; /** diff --git a/unit/internal/depth.cpp b/unit/internal/depth.cpp index a004643b..ad9ffe38 100644 --- a/unit/internal/depth.cpp +++ b/unit/internal/depth.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -112,9 +112,10 @@ TEST(depth, add_finite) { ASSERT_EQ(depth(900), depth(1000) + s32{-100}); // overflow must throw + depth max_depth(depth::max_value()); depth d; - ASSERT_THROW(d = depth::max_value() + depth(1), DepthOverflowError); - ASSERT_THROW(d = depth::max_value() + 1, DepthOverflowError); + ASSERT_THROW(d = max_depth + depth(1), DepthOverflowError); + ASSERT_THROW(d = max_depth + 1, DepthOverflowError); // underflow must throw ASSERT_THROW(d = depth(0) + s32{-1}, DepthOverflowError); @@ -267,11 +268,11 @@ TEST(depth, unordered_set) { ue2::unordered_set depths; for (const auto &val : finite_values) { - depths.insert(val); + depths.emplace(val); } for (const auto &val : finite_values) { - ASSERT_TRUE(depths.find(val) != depths.end()); + ASSERT_TRUE(depths.find(depth(val)) != depths.end()); } ASSERT_TRUE(depths.find(depth::infinity()) == depths.end()); diff --git a/unit/internal/nfagraph_repeat.cpp b/unit/internal/nfagraph_repeat.cpp index b34d1271..941873ec 100644 --- a/unit/internal/nfagraph_repeat.cpp +++ b/unit/internal/nfagraph_repeat.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -66,28 +66,28 @@ struct PureRepeatTest { class NFAPureRepeatTest : public TestWithParam { }; static const PureRepeatTest pureRepeatTests[] = { - { "^.*", 0, depth::infinity() }, - { "^.+", 1, depth::infinity() }, - { "^.", 1, 1 }, - { "^..", 2, 2 }, - { "^.?.", 1, 2 }, - { "^.{1,2}", 1, 2 }, - { "^.{1,3}", 1, 3 }, - { "^.{1,10}", 1, 10 }, - { "^.{1,200}", 1, 200 }, - { "^.{200}", 200, 200 }, - { "^.{0,}", 0, depth::infinity() }, - { "^.{1,}", 1, depth::infinity() }, - { "^.{2,}", 2, depth::infinity() }, - { "^.{10,}", 10, depth::infinity() }, - { "^.{200,}", 200, depth::infinity() }, - { "^.{5000,}", 5000, depth::infinity() }, - { "^.{0,1}", 0, 1 }, - { "^.{0,2}", 0, 2 }, - { "^.{0,100}", 0, 100 }, - { "^.{0,5000}", 0, 5000 }, - { "^x{10}x{20,30}", 30, 40 }, - { "^..?..?..?..?..?", 5, 10 } + { "^.*", depth(0), depth::infinity() }, + { "^.+", depth(1), depth::infinity() }, + { "^.", depth(1), depth(1) }, + { "^..", depth(2), depth(2) }, + { "^.?.", depth(1), depth(2) }, + { "^.{1,2}", depth(1), depth(2) }, + { "^.{1,3}", depth(1), depth(3) }, + { "^.{1,10}", depth(1), depth(10) }, + { "^.{1,200}", depth(1), depth(200) }, + { "^.{200}", depth(200), depth(200) }, + { "^.{0,}", depth(0), depth::infinity() }, + { "^.{1,}", depth(1), depth::infinity() }, + { "^.{2,}", depth(2), depth::infinity() }, + { "^.{10,}", depth(10), depth::infinity() }, + { "^.{200,}", depth(200), depth::infinity() }, + { "^.{5000,}", depth(5000), depth::infinity() }, + { "^.{0,1}", depth(0), depth(1) }, + { "^.{0,2}", depth(0), depth(2) }, + { "^.{0,100}", depth(0), depth(100) }, + { "^.{0,5000}", depth(0), depth(5000) }, + { "^x{10}x{20,30}", depth(30), depth(40) }, + { "^..?..?..?..?..?", depth(5), depth(10) } }; INSTANTIATE_TEST_CASE_P(PureRepeat, NFAPureRepeatTest, diff --git a/unit/internal/nfagraph_width.cpp b/unit/internal/nfagraph_width.cpp index 5cfb4c87..7ccdca37 100644 --- a/unit/internal/nfagraph_width.cpp +++ b/unit/internal/nfagraph_width.cpp @@ -52,26 +52,26 @@ struct WidthTest { class NFAWidthTest : public TestWithParam { }; static const WidthTest widthTests[] = { - { "()", 0, 0 }, - { "a", 1, 1 }, - { "a?b", 1, 2 }, - { "foobar", 6, 6 }, - { "foo(bar)?", 3, 6 }, - { "(a|ab|abc|abcd)", 1, 4 }, - { "foo.*bar", 6, depth::infinity() }, - { "foo(bar)*", 3, depth::infinity() }, - { "foo(bar)+", 6, depth::infinity() }, - { "foo(bar){1,3}", 6, 12 }, - { "(abcd)+", 4, depth::infinity() }, - { "foo\\z", 3, 3 }, - { "^foo", 3, 3 }, - { "^foo|bar.*baz", 3, depth::infinity() }, - { "^foobar.*|baz", 3, depth::infinity() }, - { "foo(\\z|bar)", 3, 6 }, - { "foo(|bar\\z)", 3, 6 }, - { "foo.{0,15}bar", 6, 21 }, - { "foo.{0,15}.*bar", 6, depth::infinity() }, - { "(?smi)^(aa[^a]aa$|a|a+\\Z|a)", 1, depth::infinity() } + { "()", depth(0), depth(0) }, + { "a", depth(1), depth(1) }, + { "a?b", depth(1), depth(2) }, + { "foobar", depth(6), depth(6) }, + { "foo(bar)?", depth(3), depth(6) }, + { "(a|ab|abc|abcd)", depth(1), depth(4) }, + { "foo.*bar", depth(6), depth::infinity() }, + { "foo(bar)*", depth(3), depth::infinity() }, + { "foo(bar)+", depth(6), depth::infinity() }, + { "foo(bar){1,3}", depth(6), depth(12) }, + { "(abcd)+", depth(4), depth::infinity() }, + { "foo\\z", depth(3), depth(3) }, + { "^foo", depth(3), depth(3) }, + { "^foo|bar.*baz", depth(3), depth::infinity() }, + { "^foobar.*|baz", depth(3), depth::infinity() }, + { "foo(\\z|bar)", depth(3), depth(6) }, + { "foo(|bar\\z)", depth(3), depth(6) }, + { "foo.{0,15}bar", depth(6), depth(21) }, + { "foo.{0,15}.*bar", depth(6), depth::infinity() }, + { "(?smi)^(aa[^a]aa$|a|a+\\Z|a)", depth(1), depth::infinity() } }; INSTANTIATE_TEST_CASE_P(NFAWidth, NFAWidthTest, ValuesIn(widthTests)); diff --git a/unit/internal/repeat.cpp b/unit/internal/repeat.cpp index 7f245e62..546d7d4f 100644 --- a/unit/internal/repeat.cpp +++ b/unit/internal/repeat.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -106,96 +106,96 @@ private: static const RepeatTestInfo repeatTests[] = { // Fixed repeats -- ring model - { REPEAT_RING, 2, 2 }, - { REPEAT_RING, 4, 4 }, - { REPEAT_RING, 10, 10 }, - { REPEAT_RING, 16, 16 }, - { REPEAT_RING, 20, 20 }, - { REPEAT_RING, 30, 30 }, - { REPEAT_RING, 50, 50 }, - { REPEAT_RING, 64, 64 }, - { REPEAT_RING, 65, 65 }, - { REPEAT_RING, 100, 100 }, - { REPEAT_RING, 200, 200 }, - { REPEAT_RING, 1000, 1000 }, - { REPEAT_RING, 4100, 4100 }, - { REPEAT_RING, 16000, 16000 }, + { REPEAT_RING, depth(2), depth(2) }, + { REPEAT_RING, depth(4), depth(4) }, + { REPEAT_RING, depth(10), depth(10) }, + { REPEAT_RING, depth(16), depth(16) }, + { REPEAT_RING, depth(20), depth(20) }, + { REPEAT_RING, depth(30), depth(30) }, + { REPEAT_RING, depth(50), depth(50) }, + { REPEAT_RING, depth(64), depth(64) }, + { REPEAT_RING, depth(65), depth(65) }, + { REPEAT_RING, depth(100), depth(100) }, + { REPEAT_RING, depth(200), depth(200) }, + { REPEAT_RING, depth(1000), depth(1000) }, + { REPEAT_RING, depth(4100), depth(4100) }, + { REPEAT_RING, depth(16000), depth(16000) }, // {0, N} repeats -- last model - { REPEAT_LAST, 0, 4 }, - { REPEAT_LAST, 0, 10 }, - { REPEAT_LAST, 0, 20 }, - { REPEAT_LAST, 0, 30 }, - { REPEAT_LAST, 0, 50 }, - { REPEAT_LAST, 0, 100 }, - { REPEAT_LAST, 0, 200 }, - { REPEAT_LAST, 0, 1000 }, - { REPEAT_LAST, 0, 16000 }, + { REPEAT_LAST, depth(0), depth(4) }, + { REPEAT_LAST, depth(0), depth(10) }, + { REPEAT_LAST, depth(0), depth(20) }, + { REPEAT_LAST, depth(0), depth(30) }, + { REPEAT_LAST, depth(0), depth(50) }, + { REPEAT_LAST, depth(0), depth(100) }, + { REPEAT_LAST, depth(0), depth(200) }, + { REPEAT_LAST, depth(0), depth(1000) }, + { REPEAT_LAST, depth(0), depth(16000) }, // {0, N} repeats -- ring model (though we use 'last' model in practice) - { REPEAT_RING, 0, 2 }, - { REPEAT_RING, 0, 4 }, - { REPEAT_RING, 0, 10 }, - { REPEAT_RING, 0, 20 }, - { REPEAT_RING, 0, 30 }, - { REPEAT_RING, 0, 50 }, - { REPEAT_RING, 0, 64 }, - { REPEAT_RING, 0, 65 }, - { REPEAT_RING, 0, 100 }, - { REPEAT_RING, 0, 200 }, - { REPEAT_RING, 0, 1000 }, - { REPEAT_RING, 0, 16000 }, + { REPEAT_RING, depth(0), depth(2) }, + { REPEAT_RING, depth(0), depth(4) }, + { REPEAT_RING, depth(0), depth(10) }, + { REPEAT_RING, depth(0), depth(20) }, + { REPEAT_RING, depth(0), depth(30) }, + { REPEAT_RING, depth(0), depth(50) }, + { REPEAT_RING, depth(0), depth(64) }, + { REPEAT_RING, depth(0), depth(65) }, + { REPEAT_RING, depth(0), depth(100) }, + { REPEAT_RING, depth(0), depth(200) }, + { REPEAT_RING, depth(0), depth(1000) }, + { REPEAT_RING, depth(0), depth(16000) }, // {N, M} repeats -- ring model - { REPEAT_RING, 2, 3 }, - { REPEAT_RING, 1, 4 }, - { REPEAT_RING, 5, 10 }, - { REPEAT_RING, 10, 20 }, - { REPEAT_RING, 10, 50 }, - { REPEAT_RING, 50, 60 }, - { REPEAT_RING, 100, 200 }, - { REPEAT_RING, 1, 200 }, - { REPEAT_RING, 10, 16000 }, - { REPEAT_RING, 10000, 16000 }, + { REPEAT_RING, depth(2), depth(3) }, + { REPEAT_RING, depth(1), depth(4) }, + { REPEAT_RING, depth(5), depth(10) }, + { REPEAT_RING, depth(10), depth(20) }, + { REPEAT_RING, depth(10), depth(50) }, + { REPEAT_RING, depth(50), depth(60) }, + { REPEAT_RING, depth(100), depth(200) }, + { REPEAT_RING, depth(1), depth(200) }, + { REPEAT_RING, depth(10), depth(16000) }, + { REPEAT_RING, depth(10000), depth(16000) }, // {N, M} repeats -- range model - { REPEAT_RANGE, 1, 4 }, - { REPEAT_RANGE, 5, 10 }, - { REPEAT_RANGE, 10, 20 }, - { REPEAT_RANGE, 10, 50 }, - { REPEAT_RANGE, 50, 60 }, - { REPEAT_RANGE, 100, 200 }, - { REPEAT_RANGE, 1, 200 }, - { REPEAT_RANGE, 10, 16000 }, - { REPEAT_RANGE, 10000, 16000 }, + { REPEAT_RANGE, depth(1), depth(4) }, + { REPEAT_RANGE, depth(5), depth(10) }, + { REPEAT_RANGE, depth(10), depth(20) }, + { REPEAT_RANGE, depth(10), depth(50) }, + { REPEAT_RANGE, depth(50), depth(60) }, + { REPEAT_RANGE, depth(100), depth(200) }, + { REPEAT_RANGE, depth(1), depth(200) }, + { REPEAT_RANGE, depth(10), depth(16000) }, + { REPEAT_RANGE, depth(10000), depth(16000) }, // {N,M} repeats -- small bitmap model - { REPEAT_BITMAP, 1, 2 }, - { REPEAT_BITMAP, 5, 10 }, - { REPEAT_BITMAP, 10, 20 }, - { REPEAT_BITMAP, 20, 40 }, - { REPEAT_BITMAP, 1, 63 }, - { REPEAT_BITMAP, 50, 63 }, + { REPEAT_BITMAP, depth(1), depth(2) }, + { REPEAT_BITMAP, depth(5), depth(10) }, + { REPEAT_BITMAP, depth(10), depth(20) }, + { REPEAT_BITMAP, depth(20), depth(40) }, + { REPEAT_BITMAP, depth(1), depth(63) }, + { REPEAT_BITMAP, depth(50), depth(63) }, // {N,M} repeats -- trailer model - { REPEAT_TRAILER, 1, 2 }, - { REPEAT_TRAILER, 8, 8 }, - { REPEAT_TRAILER, 0, 8 }, - { REPEAT_TRAILER, 10, 20 }, - { REPEAT_TRAILER, 1, 32 }, - { REPEAT_TRAILER, 64, 64 }, - { REPEAT_TRAILER, 1, 64 }, - { REPEAT_TRAILER, 1, 100 }, - { REPEAT_TRAILER, 1, 2000 }, - { REPEAT_TRAILER, 50, 200 }, - { REPEAT_TRAILER, 50, 1000 }, - { REPEAT_TRAILER, 64, 1024 }, + { REPEAT_TRAILER, depth(1), depth(2) }, + { REPEAT_TRAILER, depth(8), depth(8) }, + { REPEAT_TRAILER, depth(0), depth(8) }, + { REPEAT_TRAILER, depth(10), depth(20) }, + { REPEAT_TRAILER, depth(1), depth(32) }, + { REPEAT_TRAILER, depth(64), depth(64) }, + { REPEAT_TRAILER, depth(1), depth(64) }, + { REPEAT_TRAILER, depth(1), depth(100) }, + { REPEAT_TRAILER, depth(1), depth(2000) }, + { REPEAT_TRAILER, depth(50), depth(200) }, + { REPEAT_TRAILER, depth(50), depth(1000) }, + { REPEAT_TRAILER, depth(64), depth(1024) }, // {N,} repeats -- first model - { REPEAT_FIRST, 0, depth::infinity() }, - { REPEAT_FIRST, 1, depth::infinity() }, - { REPEAT_FIRST, 4, depth::infinity() }, - { REPEAT_FIRST, 10, depth::infinity() }, - { REPEAT_FIRST, 50, depth::infinity() }, - { REPEAT_FIRST, 100, depth::infinity() }, - { REPEAT_FIRST, 1000, depth::infinity() }, - { REPEAT_FIRST, 3000, depth::infinity() }, - { REPEAT_FIRST, 10000, depth::infinity() }, + { REPEAT_FIRST, depth(0), depth::infinity() }, + { REPEAT_FIRST, depth(1), depth::infinity() }, + { REPEAT_FIRST, depth(4), depth::infinity() }, + { REPEAT_FIRST, depth(10), depth::infinity() }, + { REPEAT_FIRST, depth(50), depth::infinity() }, + { REPEAT_FIRST, depth(100), depth::infinity() }, + { REPEAT_FIRST, depth(1000), depth::infinity() }, + { REPEAT_FIRST, depth(3000), depth::infinity() }, + { REPEAT_FIRST, depth(10000), depth::infinity() }, // {,} repeats -- always - { REPEAT_ALWAYS, 0, depth::infinity() }, + { REPEAT_ALWAYS, depth(0), depth::infinity() }, }; INSTANTIATE_TEST_CASE_P(Repeat, RepeatTest, ValuesIn(repeatTests)); @@ -508,55 +508,55 @@ const u32 sparsePeriods[] = { static const RepeatTestInfo sparseRepeats[] = { // Fixed repeats - { REPEAT_SPARSE_OPTIMAL_P, 10, 10 }, - { REPEAT_SPARSE_OPTIMAL_P, 20, 20 }, - { REPEAT_SPARSE_OPTIMAL_P, 40, 40 }, - { REPEAT_SPARSE_OPTIMAL_P, 80, 80 }, - { REPEAT_SPARSE_OPTIMAL_P, 100, 100 }, - { REPEAT_SPARSE_OPTIMAL_P, 150, 150 }, - { REPEAT_SPARSE_OPTIMAL_P, 200, 200 }, - { REPEAT_SPARSE_OPTIMAL_P, 250, 250 }, - { REPEAT_SPARSE_OPTIMAL_P, 300, 300 }, - { REPEAT_SPARSE_OPTIMAL_P, 350, 350 }, - { REPEAT_SPARSE_OPTIMAL_P, 400, 400 }, - { REPEAT_SPARSE_OPTIMAL_P, 500, 500 }, - { REPEAT_SPARSE_OPTIMAL_P, 600, 600 }, - { REPEAT_SPARSE_OPTIMAL_P, 800, 800 }, - { REPEAT_SPARSE_OPTIMAL_P, 1000, 1000 }, - { REPEAT_SPARSE_OPTIMAL_P, 1500, 1500 }, - { REPEAT_SPARSE_OPTIMAL_P, 2000, 2000 }, - { REPEAT_SPARSE_OPTIMAL_P, 2500, 2500 }, - { REPEAT_SPARSE_OPTIMAL_P, 3000, 3000 }, - { REPEAT_SPARSE_OPTIMAL_P, 3500, 3500 }, - { REPEAT_SPARSE_OPTIMAL_P, 4000, 4000 }, - { REPEAT_SPARSE_OPTIMAL_P, 4500, 4500 }, - { REPEAT_SPARSE_OPTIMAL_P, 5000, 5000 }, - { REPEAT_SPARSE_OPTIMAL_P, 65534, 65534 }, + { REPEAT_SPARSE_OPTIMAL_P, depth(10), depth(10) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(20), depth(20) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(40), depth(40) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(80), depth(80) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(100), depth(100) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(150), depth(150) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(200), depth(200) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(250), depth(250) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(300), depth(300) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(350), depth(350) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(400), depth(400) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(500), depth(500) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(600), depth(600) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(800), depth(800) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(1000), depth(1000) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(1500), depth(1500) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(2000), depth(2000) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(2500), depth(2500) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(3000), depth(3000) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(3500), depth(3500) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(4000), depth(4000) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(4500), depth(4500) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(5000), depth(5000) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(65534), depth(65534) }, // {N, M} repeats - { REPEAT_SPARSE_OPTIMAL_P, 10, 20 }, - { REPEAT_SPARSE_OPTIMAL_P, 20, 40 }, - { REPEAT_SPARSE_OPTIMAL_P, 40, 80 }, - { REPEAT_SPARSE_OPTIMAL_P, 80, 100 }, - { REPEAT_SPARSE_OPTIMAL_P, 100, 120 }, - { REPEAT_SPARSE_OPTIMAL_P, 150, 180 }, - { REPEAT_SPARSE_OPTIMAL_P, 200, 400 }, - { REPEAT_SPARSE_OPTIMAL_P, 250, 500 }, - { REPEAT_SPARSE_OPTIMAL_P, 300, 400 }, - { REPEAT_SPARSE_OPTIMAL_P, 350, 500 }, - { REPEAT_SPARSE_OPTIMAL_P, 400, 500 }, - { REPEAT_SPARSE_OPTIMAL_P, 500, 600 }, - { REPEAT_SPARSE_OPTIMAL_P, 600, 700 }, - { REPEAT_SPARSE_OPTIMAL_P, 800, 1000 }, - { REPEAT_SPARSE_OPTIMAL_P, 1000, 1200 }, - { REPEAT_SPARSE_OPTIMAL_P, 1500, 1800 }, - { REPEAT_SPARSE_OPTIMAL_P, 2000, 4000 }, - { REPEAT_SPARSE_OPTIMAL_P, 2500, 3000 }, - { REPEAT_SPARSE_OPTIMAL_P, 3000, 3500 }, - { REPEAT_SPARSE_OPTIMAL_P, 3500, 4000 }, - { REPEAT_SPARSE_OPTIMAL_P, 4000, 8000 }, - { REPEAT_SPARSE_OPTIMAL_P, 4500, 8000 }, - { REPEAT_SPARSE_OPTIMAL_P, 5000, 5001 }, - { REPEAT_SPARSE_OPTIMAL_P, 60000, 65534 } + { REPEAT_SPARSE_OPTIMAL_P, depth(10), depth(20) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(20), depth(40) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(40), depth(80) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(80), depth(100) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(100), depth(120) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(150), depth(180) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(200), depth(400) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(250), depth(500) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(300), depth(400) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(350), depth(500) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(400), depth(500) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(500), depth(600) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(600), depth(700) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(800), depth(1000) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(1000), depth(1200) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(1500), depth(1800) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(2000), depth(4000) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(2500), depth(3000) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(3000), depth(3500) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(3500), depth(4000) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(4000), depth(8000) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(4500), depth(8000) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(5000), depth(5001) }, + { REPEAT_SPARSE_OPTIMAL_P, depth(60000), depth(65534) } }; static From 5d1922327e7067b4d58e7f321c90ca8f761f4a78 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 30 Mar 2017 16:57:43 +1100 Subject: [PATCH 200/326] depth: use totally_ordered --- src/util/depth.h | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/src/util/depth.h b/src/util/depth.h index b1fe2b1b..bd9d67a6 100644 --- a/src/util/depth.h +++ b/src/util/depth.h @@ -34,6 +34,7 @@ #define DEPTH_H #include "ue2common.h" +#include "util/operators.h" #ifdef DUMP_SUPPORT #include @@ -52,7 +53,7 @@ struct DepthOverflowError {}; * \brief Type used to represent depth information; value is either a count, * or the special values "infinity" and "unreachable". */ -class depth { +class depth : totally_ordered { public: /** \brief The default depth is special value "unreachable". */ depth() = default; @@ -93,11 +94,7 @@ public: } bool operator<(const depth &d) const { return val < d.val; } - bool operator>(const depth &d) const { return val > d.val; } - bool operator<=(const depth &d) const { return val <= d.val; } - bool operator>=(const depth &d) const { return val >= d.val; } bool operator==(const depth &d) const { return val == d.val; } - bool operator!=(const depth &d) const { return val != d.val; } // The following comparison operators exist for use against integer types // that are bigger than what we can safely convert to depth (such as those @@ -239,11 +236,11 @@ private: /** * \brief Encapsulates a min/max pair. */ -struct DepthMinMax { - depth min; - depth max; +struct DepthMinMax : totally_ordered { + depth min{depth::infinity()}; + depth max{0}; - DepthMinMax() : min(depth::infinity()), max(depth(0)) {} + DepthMinMax() = default; DepthMinMax(const depth &mn, const depth &mx) : min(mn), max(mx) {} bool operator<(const DepthMinMax &b) const { @@ -257,10 +254,6 @@ struct DepthMinMax { return min == b.min && max == b.max; } - bool operator!=(const DepthMinMax &b) const { - return !(*this == b); - } - #ifdef DUMP_SUPPORT /** \brief Render as a string, useful for debugging. */ std::string str() const; From aa4ae755d8f3d2fa7707793c3590a348b71afb92 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 30 Mar 2017 17:00:52 +1100 Subject: [PATCH 201/326] depth: use hash_all() --- src/util/depth.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/util/depth.h b/src/util/depth.h index bd9d67a6..9af1ded8 100644 --- a/src/util/depth.h +++ b/src/util/depth.h @@ -34,14 +34,13 @@ #define DEPTH_H #include "ue2common.h" +#include "util/hash.h" #include "util/operators.h" #ifdef DUMP_SUPPORT #include #endif -#include - namespace ue2 { /** @@ -258,13 +257,11 @@ struct DepthMinMax : totally_ordered { /** \brief Render as a string, useful for debugging. */ std::string str() const; #endif + }; inline size_t hash_value(const DepthMinMax &d) { - size_t val = 0; - boost::hash_combine(val, d.min); - boost::hash_combine(val, d.max); - return val; + return hash_all(d.min, d.max); } /** From 894e6835e6483c4edff88dba68ddf418f0ef7d50 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Tue, 4 Apr 2017 11:38:27 +1000 Subject: [PATCH 202/326] ensure that engines added to the leftQueueMap are real rather than lookarounds --- src/rose/rose_build_bytecode.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 41113457..fbefdc59 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -133,6 +133,8 @@ namespace ue2 { namespace /* anon */ { +static constexpr u32 INVALID_QUEUE = ~0U; + struct left_build_info { // Constructor for an engine implementation. left_build_info(u32 q, u32 l, u32 t, rose_group sm, @@ -146,7 +148,7 @@ struct left_build_info { explicit left_build_info(const vector> &looks) : has_lookaround(true), lookaround(looks) {} - u32 queue = 0; /* uniquely idents the left_build_info */ + u32 queue = INVALID_QUEUE; /* uniquely idents the left_build_info */ u32 lag = 0; u32 transient = 0; rose_group squash_mask = ~rose_group{0}; @@ -155,6 +157,7 @@ struct left_build_info { u8 countingMiracleCount = 0; CharReach countingMiracleReach; u32 countingMiracleOffset = 0; /* populated later when laying out bytecode */ + /* leftfix can be completely implemented with lookaround */ bool has_lookaround = false; vector> lookaround; // alternative implementation to the NFA }; @@ -5693,6 +5696,11 @@ map makeLeftQueueMap(const RoseGraph &g, const map &leftfix_info) { map lqm; for (const auto &e : leftfix_info) { + if (e.second.has_lookaround) { + continue; + } + DEBUG_PRINTF("%zu: using queue %u\n", g[e.first].index, e.second.queue); + assert(e.second.queue != INVALID_QUEUE); left_id left(g[e.first].left); assert(!contains(lqm, left) || lqm[left] == e.second.queue); lqm[left] = e.second.queue; From 7920b1086b4575eaf349b7656d39fe0ce7cb2422 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Tue, 4 Apr 2017 15:43:57 +1000 Subject: [PATCH 203/326] default the move special functions --- src/util/noncopyable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/util/noncopyable.h b/src/util/noncopyable.h index da3851a3..cd4f2e02 100644 --- a/src/util/noncopyable.h +++ b/src/util/noncopyable.h @@ -39,10 +39,10 @@ namespace ue2 { /** \brief Class that makes derived classes non-copyable. */ struct noncopyable { noncopyable() = default; - // Copy constructor. noncopyable(const noncopyable &) = delete; - // Copy-assignment operator. + noncopyable(noncopyable &&) = default; noncopyable &operator=(const noncopyable &) = delete; + noncopyable &operator=(noncopyable &&) = default; }; } // namespace ue2 From 8713cfbd9e0764d4a30c4b6127948c0ab761fb64 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 3 Apr 2017 11:40:42 +1000 Subject: [PATCH 204/326] limex: add CANNOT_DIE flag and loop without test --- src/nfa/limex_compile.cpp | 4 ++++ src/nfa/limex_internal.h | 1 + src/nfa/limex_runtime_impl.h | 24 +++++++++++++++++++++++- 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index e064420d..92ec4205 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -2237,6 +2237,10 @@ struct Factory { limex->shiftCount = shiftCount; writeShiftMasks(args, limex); + if (hasInitDsStates(args.h, args.state_ids)) { + setLimexFlag(limex, LIMEX_FLAG_CANNOT_DIE); + } + // Determine the state required for our state vector. findStateSize(args, limex); diff --git a/src/nfa/limex_internal.h b/src/nfa/limex_internal.h index ccbf3422..db703f03 100644 --- a/src/nfa/limex_internal.h +++ b/src/nfa/limex_internal.h @@ -85,6 +85,7 @@ #define LIMEX_FLAG_COMPRESS_STATE 1 /**< pack state into stream state */ #define LIMEX_FLAG_COMPRESS_MASKED 2 /**< use reach mask-based compression */ +#define LIMEX_FLAG_CANNOT_DIE 4 /**< limex cannot have no states on */ enum LimExTrigger { LIMEX_TRIGGER_NONE = 0, diff --git a/src/nfa/limex_runtime_impl.h b/src/nfa/limex_runtime_impl.h index ca761924..b9002c91 100644 --- a/src/nfa/limex_runtime_impl.h +++ b/src/nfa/limex_runtime_impl.h @@ -216,11 +216,32 @@ char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, size_t min_accel_offset = 0; if (!limex->accelCount || length < ACCEL_MIN_LEN) { min_accel_offset = length; - goto without_accel; + if (limex->flags & LIMEX_FLAG_CANNOT_DIE) { + goto cannot_die; + } else { + goto without_accel; + } } else { goto with_accel; } +cannot_die: + for (; i != min_accel_offset; i++) { + DUMP_INPUT(i); + + STATE_T succ; + NFA_EXEC_GET_LIM_SUCC(limex, s, succ); + + if (RUN_EXCEPTIONS_FN(limex, exceptions, s, EXCEPTION_MASK, i, offset, + &succ, final_loc, ctx, flags, 0, first_match)) { + return MO_HALT_MATCHING; + } + + u8 c = input[i]; + s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); + } + goto finished; + without_accel: for (; i != min_accel_offset; i++) { DUMP_INPUT(i); @@ -292,6 +313,7 @@ with_accel: s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); } +finished: ctx->s = s; if ((first_match || (flags & CALLBACK_OUTPUT)) && limex->acceptCount) { From 578277e535e08f3b4aa33f0d6ed18966396ad23e Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 3 Apr 2017 12:30:30 +1000 Subject: [PATCH 205/326] limex_dump: dump LimEx type and flags in text --- src/nfa/limex_dump.cpp | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/nfa/limex_dump.cpp b/src/nfa/limex_dump.cpp index 852639ea..797e87ba 100644 --- a/src/nfa/limex_dump.cpp +++ b/src/nfa/limex_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -290,6 +290,20 @@ static void dumpLimexText(const limex_type *limex, FILE *f) { u32 size = limex_traits::size; + fprintf(f, "%u-bit LimEx NFA (%u shifts, %u exceptions)\n", size, + limex->shiftCount, limex->exceptionCount); + fprintf(f, "flags: "); + if (limex->flags & LIMEX_FLAG_COMPRESS_STATE) { + fprintf(f, "COMPRESS_STATE "); + } + if (limex->flags & LIMEX_FLAG_COMPRESS_MASKED) { + fprintf(f, "COMPRESS_MASKED "); + } + if (limex->flags & LIMEX_FLAG_CANNOT_DIE) { + fprintf(f, "CANNOT_DIE "); + } + fprintf(f, "\n\n"); + dumpMask(f, "init", (const u8 *)&limex->init, size); dumpMask(f, "init_dot_star", (const u8 *)&limex->initDS, size); dumpMask(f, "accept", (const u8 *)&limex->accept, size); From 152552423fc4c7f55300cfd0d7586937746945e5 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 3 Apr 2017 13:05:17 +1000 Subject: [PATCH 206/326] limex: refactor non-accel loops into function --- src/nfa/limex_runtime_impl.h | 98 ++++++++++++++++++++++-------------- 1 file changed, 59 insertions(+), 39 deletions(-) diff --git a/src/nfa/limex_runtime_impl.h b/src/nfa/limex_runtime_impl.h index b9002c91..2c9647d0 100644 --- a/src/nfa/limex_runtime_impl.h +++ b/src/nfa/limex_runtime_impl.h @@ -60,6 +60,7 @@ #define RUN_ACCEL_FN JOIN(LIMEX_API_ROOT, _Run_Accel) #define RUN_EXCEPTIONS_FN JOIN(LIMEX_API_ROOT, _Run_Exceptions) #define REV_STREAM_FN JOIN(LIMEX_API_ROOT, _Rev_Stream) +#define LOOP_NOACCEL_FN JOIN(LIMEX_API_ROOT, _Loop_No_Accel) #define STREAM_FN JOIN(LIMEX_API_ROOT, _Stream) #define STREAMCB_FN JOIN(LIMEX_API_ROOT, _Stream_CB) #define STREAMFIRST_FN JOIN(LIMEX_API_ROOT, _Stream_First) @@ -191,6 +192,50 @@ size_t RUN_ACCEL_FN(const STATE_T s, UNUSED const STATE_T accelMask, } while (0) +/** + * \brief LimEx NFAS inner loop without accel. + * + * Note that the "all zeroes" early death check is only performed if can_die is + * true. + * + */ +static really_inline +char LOOP_NOACCEL_FN(const IMPL_NFA_T *limex, const u8 *input, size_t *loc, + size_t length, STATE_T *s_ptr, struct CONTEXT_T *ctx, + u64a offset, const char flags, u64a *final_loc, + const char first_match, const char can_die) { + const ENG_STATE_T *reach = get_reach_table(limex); +#if SIZE < 256 + const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask); +#endif + const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); + STATE_T s = *s_ptr; + + size_t i = *loc; + for (; i != length; i++) { + DUMP_INPUT(i); + if (can_die && ISZERO_STATE(s)) { + DEBUG_PRINTF("no states are switched on, early exit\n"); + break; + } + + STATE_T succ; + NFA_EXEC_GET_LIM_SUCC(limex, s, succ); + + if (RUN_EXCEPTIONS_FN(limex, exceptions, s, EXCEPTION_MASK, i, offset, + &succ, final_loc, ctx, flags, 0, first_match)) { + return MO_HALT_MATCHING; + } + + u8 c = input[i]; + s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); + } + + *loc = i; + *s_ptr = s; + return MO_CONTINUE_MATCHING; +} + static really_inline char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, struct CONTEXT_T *ctx, u64a offset, const char flags, @@ -216,51 +261,26 @@ char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, size_t min_accel_offset = 0; if (!limex->accelCount || length < ACCEL_MIN_LEN) { min_accel_offset = length; - if (limex->flags & LIMEX_FLAG_CANNOT_DIE) { - goto cannot_die; - } else { - goto without_accel; - } + goto without_accel; } else { goto with_accel; } -cannot_die: - for (; i != min_accel_offset; i++) { - DUMP_INPUT(i); - - STATE_T succ; - NFA_EXEC_GET_LIM_SUCC(limex, s, succ); - - if (RUN_EXCEPTIONS_FN(limex, exceptions, s, EXCEPTION_MASK, i, offset, - &succ, final_loc, ctx, flags, 0, first_match)) { - return MO_HALT_MATCHING; - } - - u8 c = input[i]; - s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); - } - goto finished; - without_accel: - for (; i != min_accel_offset; i++) { - DUMP_INPUT(i); - if (ISZERO_STATE(s)) { - DEBUG_PRINTF("no states are switched on, early exit\n"); - ctx->s = s; - return MO_CONTINUE_MATCHING; - } - - STATE_T succ; - NFA_EXEC_GET_LIM_SUCC(limex, s, succ); - - if (RUN_EXCEPTIONS_FN(limex, exceptions, s, EXCEPTION_MASK, i, offset, - &succ, final_loc, ctx, flags, 0, first_match)) { + if (limex->flags & LIMEX_FLAG_CANNOT_DIE) { + const char can_die = 0; + if (LOOP_NOACCEL_FN(limex, input, &i, min_accel_offset, &s, ctx, offset, + flags, final_loc, first_match, + can_die) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else { + const char can_die = 1; + if (LOOP_NOACCEL_FN(limex, input, &i, min_accel_offset, &s, ctx, offset, + flags, final_loc, first_match, + can_die) == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } - - u8 c = input[i]; - s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); } with_accel: @@ -313,7 +333,6 @@ with_accel: s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); } -finished: ctx->s = s; if ((first_match || (flags & CALLBACK_OUTPUT)) && limex->acceptCount) { @@ -1022,6 +1041,7 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)( #undef RUN_ACCEL_FN #undef RUN_EXCEPTIONS_FN #undef REV_STREAM_FN +#undef LOOP_NOACCEL_FN #undef STREAM_FN #undef STREAMCB_FN #undef STREAMFIRST_FN From 9a82689d00667b118b4814ec0bbfb3ae1bde0145 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 4 Apr 2017 14:36:52 +1000 Subject: [PATCH 207/326] limex: more general CANNOT_DIE analysis --- src/nfa/limex_compile.cpp | 82 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 81 insertions(+), 1 deletion(-) diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 92ec4205..7590273c 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -66,6 +66,7 @@ #include #include +#include #include using namespace std; @@ -1640,6 +1641,84 @@ u32 findBestNumOfVarShifts(const build_info &args, return bestNumOfVarShifts; } +static +bool cannotDie(const build_info &args, const set &tops) { + const auto &h = args.h; + + // When this top is activated, all of the vertices in 'tops' are switched + // on. If any of those lead to a graph that cannot die, then this top + // cannot die. + + // For each top, we use a depth-first search to traverse the graph from the + // top, looking for a cyclic path consisting of vertices of dot reach. If + // one exists, than the NFA cannot die after this top is triggered. + + vector colours(num_vertices(h)); + auto colour_map = boost::make_iterator_property_map(colours.begin(), + get(vertex_index, h)); + + struct CycleFound {}; + struct CannotDieVisitor : public boost::default_dfs_visitor { + void back_edge(const NFAEdge &e, const NGHolder &g) const { + DEBUG_PRINTF("back-edge %zu,%zu\n", g[source(e, g)].index, + g[target(e, g)].index); + if (g[target(e, g)].char_reach.all()) { + assert(g[source(e, g)].char_reach.all()); + throw CycleFound(); + } + } + }; + + try { + for (const auto &top : tops) { + DEBUG_PRINTF("checking top vertex %zu\n", h[top].index); + + // Constrain the search to the top vertices and any dot vertices it + // can reach. + auto term_func = [&](NFAVertex v, const NGHolder &g) { + if (v == top) { + return false; + } + if (!g[v].char_reach.all()) { + return true; + } + if (contains(args.br_cyclic, v) && + args.br_cyclic.at(v).repeatMax != depth::infinity()) { + // Bounded repeat vertices without inf max can be turned + // off. + return true; + } + return false; + }; + + boost::depth_first_visit(h, top, CannotDieVisitor(), colour_map, + term_func); + } + } catch (const CycleFound &) { + DEBUG_PRINTF("cycle found\n"); + return true; + } + + return false; +} + +/** \brief True if this NFA cannot ever be in no states at all. */ +static +bool cannotDie(const build_info &args) { + const auto &h = args.h; + const auto &state_ids = args.state_ids; + + // If we have a startDs we're actually using, we can't die. + if (state_ids.at(h.startDs) != NO_STATE) { + DEBUG_PRINTF("is using startDs\n"); + return true; + } + + return all_of_in(args.tops | map_values, [&](const set &verts) { + return cannotDie(args, verts); + }); +} + template struct Factory { // typedefs for readability, for types derived from traits @@ -2237,7 +2316,8 @@ struct Factory { limex->shiftCount = shiftCount; writeShiftMasks(args, limex); - if (hasInitDsStates(args.h, args.state_ids)) { + if (cannotDie(args)) { + DEBUG_PRINTF("nfa cannot die\n"); setLimexFlag(limex, LIMEX_FLAG_CANNOT_DIE); } From 6499d306ecaafc3470c17c093b500c7834900fe1 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 20 Mar 2017 11:51:15 +1100 Subject: [PATCH 208/326] bytecode_ptr: new smart pointer type bytecode_ptr is an aligned_unique_ptr that can be queried for its size and alignment. --- src/util/bytecode_ptr.h | 126 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 src/util/bytecode_ptr.h diff --git a/src/util/bytecode_ptr.h b/src/util/bytecode_ptr.h new file mode 100644 index 00000000..2cc4277e --- /dev/null +++ b/src/util/bytecode_ptr.h @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief bytecode_ptr: Smart pointer that knows its length and alignment. + */ + +#ifndef UTIL_BYTECODE_PTR_H +#define UTIL_BYTECODE_PTR_H + +#include "util/alloc.h" + +#include +#include + +namespace ue2 { + +/** + * \brief Smart pointer that knows its length and alignment. + * + * This is intended to be used for flat aligned memory regions that will + * eventually end up copied into the Hyperscan bytecode. + */ +template class bytecode_ptr { +public: + bytecode_ptr() = default; + explicit bytecode_ptr(size_t size, size_t align = alignof(T)) + : bytes(size), alignment(align) { + // posix_memalign doesn't like us asking for smaller alignment. + size_t mem_align = std::max(align, sizeof(void *)); + ptr.reset(static_cast(aligned_malloc_internal(size, mem_align))); + if (!ptr) { + throw std::bad_alloc(); + } + std::memset(ptr.get(), 0, bytes); + } + + bytecode_ptr(std::nullptr_t) {} + + T *get() { return ptr.get(); }; + const T *get() const { return ptr.get(); }; + + T &operator*() { return *ptr; } + const T &operator*() const { return *ptr; } + + T *operator->() { return ptr.get(); } + const T *operator->() const { return ptr.get(); } + + explicit operator bool() const { return ptr != nullptr; } + + /** \brief Move converter for shared_ptr. */ + template ::value>::type> + operator std::shared_ptr() && { + auto d = ptr.get_deleter(); + return std::shared_ptr(ptr.release(), d); + } + + void reset(T* p = nullptr) { ptr.reset(p); } + + void swap(bytecode_ptr &other) { + using std::swap; + swap(ptr, other.ptr); + swap(bytes, other.bytes); + swap(alignment, other.alignment); + } + + /** \brief Returns size of the memory region in bytes. */ + size_t size() const { return bytes; } + + /** \brief Returns alignment of the memory region in bytes. */ + size_t align() const { return alignment; } + + bool operator==(const bytecode_ptr &a) const { return ptr == a.ptr; } + bool operator!=(const bytecode_ptr &a) const { return ptr != a.ptr; } + bool operator<(const bytecode_ptr &a) const { return ptr < a.ptr; } + bool operator<=(const bytecode_ptr &a) const { return ptr <= a.ptr; } + bool operator>(const bytecode_ptr &a) const { return ptr > a.ptr; } + bool operator>=(const bytecode_ptr &a) const { return ptr >= a.ptr; } + +private: + /** \brief Deleter function for std::unique_ptr. */ + template struct deleter { + void operator()(DT *p) const { aligned_free_internal(p); } + }; + + std::unique_ptr> ptr = nullptr; //!< Underlying pointer. + size_t bytes = 0; //!< Size of memory region in bytes. + size_t alignment = 0; //!< Alignment of memory region in bytes. +}; + +template +inline bytecode_ptr make_bytecode_ptr(size_t size, + size_t align = alignof(T)) { + return bytecode_ptr(size, align); +} + +} // namespace ue2 + +#endif // UTIL_BYTECODE_PTR_H From 97516eccb68bbe37d37397c5efb81a6483a0fe8c Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 20 Mar 2017 13:53:40 +1100 Subject: [PATCH 209/326] fdr: use bytecode_ptr internally --- src/fdr/fdr_compile.cpp | 22 +++++++++++----------- src/fdr/fdr_compile_internal.h | 18 +++++++++--------- src/fdr/fdr_confirm_compile.cpp | 7 ++++--- src/fdr/flood_compile.cpp | 10 +++++----- src/fdr/teddy_compile.cpp | 14 +++++++------- 5 files changed, 36 insertions(+), 35 deletions(-) diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index dc0cc0f3..8346e520 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -151,18 +151,18 @@ aligned_unique_ptr FDRCompiler::setupFDR() { auto confirmTmp = setupFullConfs(lits, eng, bucketToLits, make_small); assert(ISALIGNED_16(tabSize)); - assert(ISALIGNED_16(confirmTmp.second)); - assert(ISALIGNED_16(floodControlTmp.second)); + assert(ISALIGNED_16(confirmTmp.size())); + assert(ISALIGNED_16(floodControlTmp.size())); size_t headerSize = ROUNDUP_16(sizeof(FDR)); - size_t size = ROUNDUP_16(headerSize + tabSize + confirmTmp.second + - floodControlTmp.second); + size_t size = ROUNDUP_16(headerSize + tabSize + confirmTmp.size() + + floodControlTmp.size()); DEBUG_PRINTF("sizes base=%zu tabSize=%zu confirm=%zu floodControl=%zu " "total=%zu\n", - headerSize, tabSize, confirmTmp.second, floodControlTmp.second, + headerSize, tabSize, confirmTmp.size(), floodControlTmp.size(), size); - aligned_unique_ptr fdr = aligned_zmalloc_unique(size); + auto fdr = aligned_zmalloc_unique(size); assert(fdr); // otherwise would have thrown std::bad_alloc fdr->size = size; @@ -171,16 +171,16 @@ aligned_unique_ptr FDRCompiler::setupFDR() { createInitialState(fdr.get()); u8 *fdr_base = (u8 *)fdr.get(); - u8 * ptr = fdr_base + ROUNDUP_16(sizeof(FDR)); + u8 *ptr = fdr_base + ROUNDUP_16(sizeof(FDR)); copy(tab.begin(), tab.end(), ptr); ptr += tabSize; - memcpy(ptr, confirmTmp.first.get(), confirmTmp.second); - ptr += confirmTmp.second; + memcpy(ptr, confirmTmp.get(), confirmTmp.size()); + ptr += confirmTmp.size(); fdr->floodOffset = verify_u32(ptr - fdr_base); - memcpy(ptr, floodControlTmp.first.get(), floodControlTmp.second); - ptr += floodControlTmp.second; + memcpy(ptr, floodControlTmp.get(), floodControlTmp.size()); + ptr += floodControlTmp.size(); /* we are allowing domains 9 to 15 only */ assert(eng.bits > 8 && eng.bits < 16); diff --git a/src/fdr/fdr_compile_internal.h b/src/fdr/fdr_compile_internal.h index 73de4d42..756fe8e7 100644 --- a/src/fdr/fdr_compile_internal.h +++ b/src/fdr/fdr_compile_internal.h @@ -31,7 +31,7 @@ #include "ue2common.h" #include "hwlm/hwlm_literal.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include #include @@ -57,20 +57,20 @@ class FDREngineDescription; struct hwlmStreamingControl; struct Grey; -std::pair, size_t> setupFullConfs( - const std::vector &lits, const EngineDescription &eng, - std::map> &bucketToLits, - bool make_small); +bytecode_ptr setupFullConfs(const std::vector &lits, + const EngineDescription &eng, + std::map> &bucketToLits, + bool make_small); // all suffixes include an implicit max_bucket_width suffix to ensure that // we always read a full-scale flood "behind" us in terms of what's in our // state; if we don't have a flood that's long enough we won't be in the // right state yet to allow blindly advancing -std::pair, size_t> -setupFDRFloodControl(const std::vector &lits, - const EngineDescription &eng, const Grey &grey); +bytecode_ptr setupFDRFloodControl(const std::vector &lits, + const EngineDescription &eng, + const Grey &grey); -std::pair, size_t> +bytecode_ptr fdrBuildTableStreaming(const std::vector &lits, hwlmStreamingControl &stream_control); diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index e5969261..367ba693 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -342,7 +342,7 @@ getFDRConfirm(const vector &lits, bool make_small, return {move(fdrc), actual_size}; } -pair, size_t> +bytecode_ptr setupFullConfs(const vector &lits, const EngineDescription &eng, map> &bucketToLits, @@ -374,7 +374,7 @@ setupFullConfs(const vector &lits, u32 totalConfSwitchSize = nBuckets * sizeof(u32); u32 totalSize = ROUNDUP_16(totalConfSwitchSize + totalConfirmSize); - auto buf = aligned_zmalloc_unique(totalSize); + auto buf = make_bytecode_ptr(totalSize, 16); assert(buf); // otherwise would have thrown std::bad_alloc u32 *confBase = (u32 *)buf.get(); @@ -389,7 +389,8 @@ setupFullConfs(const vector &lits, ptr += p.second; confBase[idx] = confirm_offset; } - return {move(buf), totalSize}; + + return buf; } } // namespace ue2 diff --git a/src/fdr/flood_compile.cpp b/src/fdr/flood_compile.cpp index b6d23c9d..9b948419 100644 --- a/src/fdr/flood_compile.cpp +++ b/src/fdr/flood_compile.cpp @@ -91,9 +91,9 @@ void addFlood(vector &tmpFlood, u8 c, const hwlmLiteral &lit, } } -pair, size_t> -setupFDRFloodControl(const vector &lits, - const EngineDescription &eng, const Grey &grey) { +bytecode_ptr setupFDRFloodControl(const vector &lits, + const EngineDescription &eng, + const Grey &grey) { vector tmpFlood(N_CHARS); u32 default_suffix = eng.getDefaultFloodSuffixLength(); @@ -207,7 +207,7 @@ setupFDRFloodControl(const vector &lits, size_t floodStructSize = sizeof(FDRFlood) * nDistinctFloods; size_t totalSize = ROUNDUP_16(floodHeaderSize + floodStructSize); - auto buf = aligned_zmalloc_unique(totalSize); + auto buf = make_bytecode_ptr(totalSize, 16); assert(buf); // otherwise would have thrown std::bad_alloc u32 *floodHeader = (u32 *)buf.get(); @@ -227,7 +227,7 @@ setupFDRFloodControl(const vector &lits, DEBUG_PRINTF("made a flood structure with %zu + %zu = %zu\n", floodHeaderSize, floodStructSize, totalSize); - return {move(buf), totalSize}; + return buf; } } // namespace ue2 diff --git a/src/fdr/teddy_compile.cpp b/src/fdr/teddy_compile.cpp index 607024d1..a5856110 100644 --- a/src/fdr/teddy_compile.cpp +++ b/src/fdr/teddy_compile.cpp @@ -315,11 +315,11 @@ aligned_unique_ptr TeddyCompiler::build() { size_t size = ROUNDUP_N(sizeof(Teddy) + maskLen + - confirmTmp.second + - floodControlTmp.second, + confirmTmp.size() + + floodControlTmp.size(), 16 * maskWidth); - aligned_unique_ptr fdr = aligned_zmalloc_unique(size); + auto fdr = aligned_zmalloc_unique(size); assert(fdr); // otherwise would have thrown std::bad_alloc Teddy *teddy = (Teddy *)fdr.get(); // ugly u8 *teddy_base = (u8 *)teddy; @@ -329,12 +329,12 @@ aligned_unique_ptr TeddyCompiler::build() { teddy->maxStringLen = verify_u32(maxLen(lits)); u8 *ptr = teddy_base + sizeof(Teddy) + maskLen; - memcpy(ptr, confirmTmp.first.get(), confirmTmp.second); - ptr += confirmTmp.second; + memcpy(ptr, confirmTmp.get(), confirmTmp.size()); + ptr += confirmTmp.size(); teddy->floodOffset = verify_u32(ptr - teddy_base); - memcpy(ptr, floodControlTmp.first.get(), floodControlTmp.second); - ptr += floodControlTmp.second; + memcpy(ptr, floodControlTmp.get(), floodControlTmp.size()); + ptr += floodControlTmp.size(); u8 *baseMsk = teddy_base + sizeof(Teddy); From 2698e534e49d0920c9b38747bf0ef82db34760b2 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 20 Mar 2017 14:51:36 +1100 Subject: [PATCH 210/326] fdr_confirm_compile: use bytecode_ptr --- src/fdr/fdr_confirm_compile.cpp | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index 367ba693..5e1a540e 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -45,8 +45,7 @@ using namespace std; namespace ue2 { -using BC2CONF = map, size_t>>; +using BC2CONF = map>; // return the number of bytes beyond a length threshold in all strings in lits static @@ -148,9 +147,9 @@ void fillLitInfo(const vector &lits, vector &tmpLitInfo, //#define FDR_CONFIRM_DUMP 1 -static pair, size_t> -getFDRConfirm(const vector &lits, bool make_small, - bool make_confirm) { +static +bytecode_ptr getFDRConfirm(const vector &lits, + bool make_small, bool make_confirm) { vector tmpLitInfo(lits.size()); CONF_TYPE andmsk; fillLitInfo(lits, tmpLitInfo, andmsk); @@ -285,7 +284,7 @@ getFDRConfirm(const vector &lits, bool make_small, sizeof(LitInfo) * lits.size() + totalLitSize; size = ROUNDUP_N(size, alignof(FDRConfirm)); - auto fdrc = aligned_zmalloc_unique(size); + auto fdrc = make_bytecode_ptr(size); assert(fdrc); // otherwise would have thrown std::bad_alloc fdrc->andmsk = andmsk; @@ -339,7 +338,7 @@ getFDRConfirm(const vector &lits, bool make_small, alignof(FDRConfirm)); assert(actual_size <= size); - return {move(fdrc), actual_size}; + return fdrc; } bytecode_ptr @@ -365,7 +364,7 @@ setupFullConfs(const vector &lits, DEBUG_PRINTF("b %d sz %zu\n", b, vl.size()); auto fc = getFDRConfirm(vl, make_small, makeConfirm); - totalConfirmSize += fc.second; + totalConfirmSize += fc.size(); bc2Conf.emplace(b, move(fc)); } } @@ -382,11 +381,11 @@ setupFullConfs(const vector &lits, for (const auto &m : bc2Conf) { const BucketIndex &idx = m.first; - const pair, size_t> &p = m.second; + const bytecode_ptr &p = m.second; // confirm offset is relative to the base of this structure, now u32 confirm_offset = verify_u32(ptr - buf.get()); - memcpy(ptr, p.first.get(), p.second); - ptr += p.second; + memcpy(ptr, p.get(), p.size()); + ptr += p.size(); confBase[idx] = confirm_offset; } From befdbb781dfa430a594d467718a5f8e0d0d19c75 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 20 Mar 2017 15:11:44 +1100 Subject: [PATCH 211/326] rose_build_engine_blob: add func for bytecode_ptr --- src/rose/rose_build_engine_blob.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/rose/rose_build_engine_blob.h b/src/rose/rose_build_engine_blob.h index 61b6b440..69e8201e 100644 --- a/src/rose/rose_build_engine_blob.h +++ b/src/rose/rose_build_engine_blob.h @@ -33,6 +33,7 @@ #include "ue2common.h" #include "util/alloc.h" +#include "util/bytecode_ptr.h" #include "util/container.h" #include "util/multibit_build.h" #include "util/noncopyable.h" @@ -76,6 +77,11 @@ public: return verify_u32(rv); } + template + u32 add(const bytecode_ptr &a) { + return add(a.get(), a.size(), a.align()); + } + template u32 add(const T &a) { static_assert(std::is_pod::value, "should be pod"); From 7288da22bd83de23314224de25af1149675ebade Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 20 Mar 2017 15:35:50 +1100 Subject: [PATCH 212/326] limex_compile: use bytecode_ptr --- src/nfa/limex_compile.cpp | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 7590273c..7290c39e 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -47,6 +47,7 @@ #include "repeatcompile.h" #include "util/alloc.h" #include "util/bitutils.h" +#include "util/bytecode_ptr.h" #include "util/charreach.h" #include "util/compile_context.h" #include "util/container.h" @@ -1782,8 +1783,8 @@ struct Factory { static void buildRepeats(const build_info &args, - vector, size_t>> &out, - u32 *scratchStateSize, u32 *streamState) { + vector> &out, + u32 *scratchStateSize, u32 *streamState) { out.reserve(args.repeats.size()); u32 repeat_idx = 0; @@ -1794,7 +1795,7 @@ struct Factory { u32 tableOffset, tugMaskOffset; size_t len = repeatAllocSize(br, &tableOffset, &tugMaskOffset); - auto info = aligned_zmalloc_unique(len); + auto info = make_bytecode_ptr(len); char *info_ptr = (char *)info.get(); // Collect state space info. @@ -1848,7 +1849,7 @@ struct Factory { *streamState += streamStateLen; *scratchStateSize += sizeof(RepeatControl); - out.emplace_back(move(info), len); + out.emplace_back(move(info)); } } @@ -2156,8 +2157,7 @@ struct Factory { } static - void writeRepeats(const vector, - size_t>> &repeats, + void writeRepeats(const vector> &repeats, vector &repeatOffsets, implNFA_t *limex, const u32 repeatOffsetsOffset, const u32 repeatOffset) { const u32 num_repeats = verify_u32(repeats.size()); @@ -2170,10 +2170,9 @@ struct Factory { for (u32 i = 0; i < num_repeats; i++) { repeatOffsets[i] = offset; - assert(repeats[i].first); - memcpy((char *)limex + offset, repeats[i].first.get(), - repeats[i].second); - offset += repeats[i].second; + assert(repeats[i]); + memcpy((char *)limex + offset, repeats[i].get(), repeats[i].size()); + offset += repeats[i].size(); } // Write repeat offset lookup table. @@ -2200,13 +2199,13 @@ struct Factory { } // Build bounded repeat structures. - vector, size_t>> repeats; + vector> repeats; u32 repeats_full_state = 0; u32 repeats_stream_state = 0; buildRepeats(args, repeats, &repeats_full_state, &repeats_stream_state); size_t repeatSize = 0; for (size_t i = 0; i < repeats.size(); i++) { - repeatSize += repeats[i].second; + repeatSize += repeats[i].size(); } // We track report lists that have already been written into the global From a5ed9a9330daff3a07514ee1f025ec60c8749547 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 20 Mar 2017 16:30:08 +1100 Subject: [PATCH 213/326] smallwrite_build: use bytecode_ptr --- src/rose/rose_build_bytecode.cpp | 2 +- src/smallwrite/smallwrite_build.cpp | 8 ++++---- src/smallwrite/smallwrite_build.h | 4 +++- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index fbefdc59..b9df1af6 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -5606,7 +5606,7 @@ aligned_unique_ptr addSmallWriteEngine(const RoseBuildImpl &build, } const size_t mainSize = roseSize(rose.get()); - const size_t smallWriteSize = smwrSize(smwr_engine.get()); + const size_t smallWriteSize = smwr_engine.size(); DEBUG_PRINTF("adding smwr engine, size=%zu\n", smallWriteSize); const size_t smwrOffset = ROUNDUP_CL(mainSize); diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index 43a502f7..534f4011 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -46,6 +46,7 @@ #include "nfagraph/ng_util.h" #include "smallwrite/smallwrite_internal.h" #include "util/alloc.h" +#include "util/bytecode_ptr.h" #include "util/charreach.h" #include "util/compile_context.h" #include "util/container.h" @@ -74,7 +75,7 @@ public: const CompileContext &cc); // Construct a runtime implementation. - aligned_unique_ptr build(u32 roseQuality) override; + bytecode_ptr build(u32 roseQuality) override; void add(const NGHolder &g, const ExpressionInfo &expr) override; void add(const ue2_literal &literal, ReportID r) override; @@ -473,8 +474,7 @@ unique_ptr makeSmallWriteBuilder(size_t num_patterns, return ue2::make_unique(num_patterns, rm, cc); } -aligned_unique_ptr -SmallWriteBuildImpl::build(u32 roseQuality) { +bytecode_ptr SmallWriteBuildImpl::build(u32 roseQuality) { if (!rdfa && cand_literals.empty()) { DEBUG_PRINTF("no smallwrite engine\n"); poisoned = true; @@ -505,7 +505,7 @@ SmallWriteBuildImpl::build(u32 roseQuality) { } u32 size = sizeof(SmallWriteEngine) + nfa->length; - auto smwr = aligned_zmalloc_unique(size); + auto smwr = make_bytecode_ptr(size); smwr->size = size; smwr->start_offset = start_offset; diff --git a/src/smallwrite/smallwrite_build.h b/src/smallwrite/smallwrite_build.h index 906a83c2..82a03e38 100644 --- a/src/smallwrite/smallwrite_build.h +++ b/src/smallwrite/smallwrite_build.h @@ -37,8 +37,10 @@ #include "ue2common.h" #include "util/alloc.h" +#include "util/bytecode_ptr.h" #include "util/noncopyable.h" +#include #include struct SmallWriteEngine; @@ -59,7 +61,7 @@ public: virtual ~SmallWriteBuild(); // Construct a runtime implementation. - virtual ue2::aligned_unique_ptr build(u32 roseQuality) = 0; + virtual bytecode_ptr build(u32 roseQuality) = 0; virtual void add(const NGHolder &g, const ExpressionInfo &expr) = 0; virtual void add(const ue2_literal &literal, ReportID r) = 0; From e90ad34f67aefe75b8f5aab9d6fa2cf393c97486 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 20 Mar 2017 16:31:01 +1100 Subject: [PATCH 214/326] smallwrite_build: delete smwrSize --- src/smallwrite/smallwrite_build.cpp | 5 ----- src/smallwrite/smallwrite_build.h | 2 -- 2 files changed, 7 deletions(-) diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index 534f4011..914349b8 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -533,9 +533,4 @@ set SmallWriteBuildImpl::all_reports() const { return reports; } -size_t smwrSize(const SmallWriteEngine *smwr) { - assert(smwr); - return smwr->size; -} - } // namespace ue2 diff --git a/src/smallwrite/smallwrite_build.h b/src/smallwrite/smallwrite_build.h index 82a03e38..92222d62 100644 --- a/src/smallwrite/smallwrite_build.h +++ b/src/smallwrite/smallwrite_build.h @@ -74,8 +74,6 @@ std::unique_ptr makeSmallWriteBuilder(size_t num_patterns, const ReportManager &rm, const CompileContext &cc); -size_t smwrSize(const SmallWriteEngine *t); - } // namespace ue2 #endif // SMWR_BUILD_H From 999628311221595cd814bf53cc7e2e9ee20a2a2d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 20 Mar 2017 17:18:41 +1100 Subject: [PATCH 215/326] noodle_build: use bytecode_ptr --- src/hwlm/hwlm_build.cpp | 2 +- src/hwlm/noodle_build.cpp | 7 +++---- src/hwlm/noodle_build.h | 12 ++++++------ 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/hwlm/hwlm_build.cpp b/src/hwlm/hwlm_build.cpp index a05ca1a2..cd98feb4 100644 --- a/src/hwlm/hwlm_build.cpp +++ b/src/hwlm/hwlm_build.cpp @@ -143,7 +143,7 @@ aligned_unique_ptr hwlmBuild(const vector &lits, const hwlmLiteral &lit = lits.front(); auto noodle = noodBuildTable(lit); if (noodle) { - engSize = noodSize(noodle.get()); + engSize = noodle.size(); } eng = move(noodle); } else { diff --git a/src/hwlm/noodle_build.cpp b/src/hwlm/noodle_build.cpp index d2b4e3f2..6412d3f5 100644 --- a/src/hwlm/noodle_build.cpp +++ b/src/hwlm/noodle_build.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,7 +35,6 @@ #include "hwlm_literal.h" #include "noodle_internal.h" -#include "util/alloc.h" #include "util/compare.h" #include "util/verify_types.h" #include "ue2common.h" @@ -67,7 +66,7 @@ size_t findNoodFragOffset(const hwlmLiteral &lit) { return offset; } -aligned_unique_ptr noodBuildTable(const hwlmLiteral &lit) { +bytecode_ptr noodBuildTable(const hwlmLiteral &lit) { if (!lit.msk.empty()) { DEBUG_PRINTF("noodle can't handle supplementary masks\n"); return nullptr; @@ -75,7 +74,7 @@ aligned_unique_ptr noodBuildTable(const hwlmLiteral &lit) { const auto &s = lit.s; size_t noodle_len = sizeof(noodTable) + s.length(); - auto n = aligned_zmalloc_unique(noodle_len); + auto n = make_bytecode_ptr(noodle_len); assert(n); size_t key_offset = findNoodFragOffset(lit); diff --git a/src/hwlm/noodle_build.h b/src/hwlm/noodle_build.h index 1a41695f..b5725f08 100644 --- a/src/hwlm/noodle_build.h +++ b/src/hwlm/noodle_build.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,11 +30,11 @@ * \brief Noodle literal matcher: build code. */ -#ifndef NOODLE_BUILD_H_048A1A6D585A9A -#define NOODLE_BUILD_H_048A1A6D585A9A +#ifndef NOODLE_BUILD_H +#define NOODLE_BUILD_H #include "ue2common.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" struct noodTable; @@ -43,7 +43,7 @@ namespace ue2 { struct hwlmLiteral; /** \brief Construct a Noodle matcher for the given literal. */ -ue2::aligned_unique_ptr noodBuildTable(const hwlmLiteral &lit); +bytecode_ptr noodBuildTable(const hwlmLiteral &lit); size_t noodSize(const noodTable *n); @@ -61,5 +61,5 @@ void noodPrintStats(const noodTable *n, FILE *f); #endif // DUMP_SUPPORT -#endif /* NOODLE_BUILD_H_048A1A6D585A9A */ +#endif /* NOODLE_BUILD_H */ From 3590f73151fcaac7b6bf90f2ebbf09254161225a Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 20 Mar 2017 17:26:49 +1100 Subject: [PATCH 216/326] fdr: use bytecode_ptr in fdr/teddy compilers --- src/fdr/fdr_compile.cpp | 41 ++++++++++++++++++--------------------- src/fdr/fdr_compile.h | 14 ++++++------- src/fdr/teddy_compile.cpp | 21 ++++++++++---------- src/fdr/teddy_compile.h | 9 +++++---- 4 files changed, 42 insertions(+), 43 deletions(-) diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index 8346e520..36bcda69 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -30,8 +30,9 @@ * \brief FDR literal matcher: build API. */ -#include "fdr_internal.h" #include "fdr_compile.h" + +#include "fdr_internal.h" #include "fdr_confirm.h" #include "fdr_compile_internal.h" #include "fdr_engine_description.h" @@ -40,7 +41,6 @@ #include "grey.h" #include "ue2common.h" #include "hwlm/hwlm_build.h" -#include "util/alloc.h" #include "util/compare.h" #include "util/dump_mask.h" #include "util/math.h" @@ -86,7 +86,7 @@ private: void dumpMasks(const u8 *defaultMask); #endif void setupTab(); - aligned_unique_ptr setupFDR(); + bytecode_ptr setupFDR(); void createInitialState(FDR *fdr); public: @@ -95,7 +95,7 @@ public: : eng(eng_in), grey(grey_in), tab(eng_in.getTabSizeBytes()), lits(move(lits_in)), make_small(make_small_in) {} - aligned_unique_ptr build(); + bytecode_ptr build(); }; u8 *FDRCompiler::tabIndexToMask(u32 indexInTable) { @@ -144,7 +144,7 @@ void FDRCompiler::createInitialState(FDR *fdr) { } } -aligned_unique_ptr FDRCompiler::setupFDR() { +bytecode_ptr FDRCompiler::setupFDR() { size_t tabSize = eng.getTabSizeBytes(); auto floodControlTmp = setupFDRFloodControl(lits, eng, grey); @@ -162,7 +162,7 @@ aligned_unique_ptr FDRCompiler::setupFDR() { headerSize, tabSize, confirmTmp.size(), floodControlTmp.size(), size); - auto fdr = aligned_zmalloc_unique(size); + auto fdr = make_bytecode_ptr(size, 64); assert(fdr); // otherwise would have thrown std::bad_alloc fdr->size = size; @@ -528,7 +528,7 @@ void FDRCompiler::setupTab() { #endif } -aligned_unique_ptr FDRCompiler::build() { +bytecode_ptr FDRCompiler::build() { assignStringsToBuckets(); setupTab(); return setupFDR(); @@ -537,10 +537,9 @@ aligned_unique_ptr FDRCompiler::build() { } // namespace static -aligned_unique_ptr fdrBuildTableInternal(const vector &lits, - bool make_small, - const target_t &target, - const Grey &grey, u32 hint) { +bytecode_ptr fdrBuildTableInternal(const vector &lits, + bool make_small, const target_t &target, + const Grey &grey, u32 hint) { DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2"); if (grey.fdrAllowTeddy) { @@ -553,10 +552,8 @@ aligned_unique_ptr fdrBuildTableInternal(const vector &lits, } } - const unique_ptr des = - (hint == HINT_INVALID) ? chooseEngine(target, lits, make_small) - : getFdrDescription(hint); - + auto des = (hint == HINT_INVALID) ? chooseEngine(target, lits, make_small) + : getFdrDescription(hint); if (!des) { return nullptr; } @@ -571,18 +568,18 @@ aligned_unique_ptr fdrBuildTableInternal(const vector &lits, return fc.build(); } -aligned_unique_ptr fdrBuildTable(const vector &lits, - bool make_small, const target_t &target, - const Grey &grey) { +bytecode_ptr fdrBuildTable(const vector &lits, + bool make_small, const target_t &target, + const Grey &grey) { return fdrBuildTableInternal(lits, make_small, target, grey, HINT_INVALID); } #if !defined(RELEASE_BUILD) -aligned_unique_ptr fdrBuildTableHinted(const vector &lits, - bool make_small, u32 hint, - const target_t &target, - const Grey &grey) { +bytecode_ptr fdrBuildTableHinted(const vector &lits, + bool make_small, u32 hint, + const target_t &target, + const Grey &grey) { return fdrBuildTableInternal(lits, make_small, target, grey, hint); } diff --git a/src/fdr/fdr_compile.h b/src/fdr/fdr_compile.h index a135a6e1..58047600 100644 --- a/src/fdr/fdr_compile.h +++ b/src/fdr/fdr_compile.h @@ -34,7 +34,7 @@ #define FDR_COMPILE_H #include "ue2common.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include @@ -46,15 +46,15 @@ struct hwlmLiteral; struct Grey; struct target_t; -ue2::aligned_unique_ptr -fdrBuildTable(const std::vector &lits, bool make_small, - const target_t &target, const Grey &grey); +bytecode_ptr fdrBuildTable(const std::vector &lits, + bool make_small, const target_t &target, + const Grey &grey); #if !defined(RELEASE_BUILD) -ue2::aligned_unique_ptr -fdrBuildTableHinted(const std::vector &lits, bool make_small, - u32 hint, const target_t &target, const Grey &grey); +bytecode_ptr fdrBuildTableHinted(const std::vector &lits, + bool make_small, u32 hint, + const target_t &target, const Grey &grey); #endif diff --git a/src/fdr/teddy_compile.cpp b/src/fdr/teddy_compile.cpp index a5856110..33a1050c 100644 --- a/src/fdr/teddy_compile.cpp +++ b/src/fdr/teddy_compile.cpp @@ -26,11 +26,15 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include "teddy_compile.h" + #include "fdr.h" #include "fdr_internal.h" #include "fdr_compile_internal.h" #include "fdr_confirm.h" #include "fdr_engine_description.h" +#include "teddy_internal.h" +#include "teddy_engine_description.h" #include "grey.h" #include "ue2common.h" #include "util/alloc.h" @@ -40,9 +44,6 @@ #include "util/target_info.h" #include "util/verify_types.h" -#include "teddy_compile.h" -#include "teddy_internal.h" -#include "teddy_engine_description.h" #include #include @@ -77,7 +78,7 @@ public: : eng(eng_in), grey(grey_in), lits(lits_in), make_small(make_small_in) { } - aligned_unique_ptr build(); + bytecode_ptr build(); bool pack(map > &bucketToLits); }; @@ -277,7 +278,7 @@ bool TeddyCompiler::pack(map TeddyCompiler::build() { +bytecode_ptr TeddyCompiler::build() { if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) { DEBUG_PRINTF("too many literals: %zu\n", lits.size()); return nullptr; @@ -319,7 +320,7 @@ aligned_unique_ptr TeddyCompiler::build() { floodControlTmp.size(), 16 * maskWidth); - auto fdr = aligned_zmalloc_unique(size); + auto fdr = make_bytecode_ptr(size, 64); assert(fdr); // otherwise would have thrown std::bad_alloc Teddy *teddy = (Teddy *)fdr.get(); // ugly u8 *teddy_base = (u8 *)teddy; @@ -418,10 +419,10 @@ aligned_unique_ptr TeddyCompiler::build() { } // namespace -aligned_unique_ptr teddyBuildTableHinted(const vector &lits, - bool make_small, u32 hint, - const target_t &target, - const Grey &grey) { +bytecode_ptr teddyBuildTableHinted(const vector &lits, + bool make_small, u32 hint, + const target_t &target, + const Grey &grey) { unique_ptr des; if (hint == HINT_INVALID) { des = chooseTeddyEngine(target, lits); diff --git a/src/fdr/teddy_compile.h b/src/fdr/teddy_compile.h index 07eb18f6..22e87405 100644 --- a/src/fdr/teddy_compile.h +++ b/src/fdr/teddy_compile.h @@ -34,7 +34,7 @@ #define TEDDY_COMPILE_H #include "ue2common.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include @@ -46,9 +46,10 @@ namespace ue2 { struct Grey; struct hwlmLiteral; -ue2::aligned_unique_ptr -teddyBuildTableHinted(const std::vector &lits, bool make_small, - u32 hint, const target_t &target, const Grey &grey); +bytecode_ptr teddyBuildTableHinted(const std::vector &lits, + bool make_small, u32 hint, + const target_t &target, + const Grey &grey); } // namespace ue2 From 73ef7f1e448fd2899b6fb88e0551dd4ceaff87d9 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 31 Mar 2017 11:25:04 +1100 Subject: [PATCH 217/326] mpvcompile: use bytecode_ptr --- src/nfa/mpvcompile.cpp | 10 +++++----- src/nfa/mpvcompile.h | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/nfa/mpvcompile.cpp b/src/nfa/mpvcompile.cpp index 87fb462e..51787512 100644 --- a/src/nfa/mpvcompile.cpp +++ b/src/nfa/mpvcompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -309,9 +309,9 @@ const mpv_counter_info &findCounter(const vector &counters, return counters.front(); } -aligned_unique_ptr mpvCompile(const vector &puffs_in, - const vector &triggered_puffs, - const ReportManager &rm) { +bytecode_ptr mpvCompile(const vector &puffs_in, + const vector &triggered_puffs, + const ReportManager &rm) { assert(!puffs_in.empty() || !triggered_puffs.empty()); u32 puffette_count = puffs_in.size() + triggered_puffs.size(); @@ -343,7 +343,7 @@ aligned_unique_ptr mpvCompile(const vector &puffs_in, DEBUG_PRINTF("%u puffs, len = %u\n", puffette_count, len); - aligned_unique_ptr nfa = aligned_zmalloc_unique(len); + auto nfa = make_bytecode_ptr(len); mpv_puffette *pa_base = (mpv_puffette *) ((char *)nfa.get() + sizeof(NFA) + sizeof(mpv) diff --git a/src/nfa/mpvcompile.h b/src/nfa/mpvcompile.h index fb91ac64..4f820e43 100644 --- a/src/nfa/mpvcompile.h +++ b/src/nfa/mpvcompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,7 +30,7 @@ #define MPV_COMPILE_H #include "ue2common.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include "util/charreach.h" #include @@ -61,9 +61,9 @@ struct raw_puff { * puffs in the triggered_puffs vector are enabled when an TOP_N event is * delivered corresponding to their index in the vector */ -aligned_unique_ptr mpvCompile(const std::vector &puffs, - const std::vector &triggered_puffs, - const ReportManager &rm); +bytecode_ptr mpvCompile(const std::vector &puffs, + const std::vector &triggered_puffs, + const ReportManager &rm); } // namespace ue2 From 813f1e3fb9db8d6c36fe1a07d01f0c57cd3d79ca Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 31 Mar 2017 13:22:04 +1100 Subject: [PATCH 218/326] rose: use bytecode_ptr --- src/compiler/compiler.cpp | 4 ++-- src/rose/rose_build.h | 4 ++-- src/rose/rose_build_bytecode.cpp | 12 ++++++------ src/rose/rose_build_compile.cpp | 2 +- src/rose/rose_build_impl.h | 6 +++--- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index 47bf514c..b2deae32 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -56,7 +56,7 @@ #include "parser/utf8_validate.h" #include "rose/rose_build.h" #include "som/slot_manager_dump.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include "util/compile_error.h" #include "util/target_info.h" #include "util/verify_types.h" @@ -288,7 +288,7 @@ void addExpression(NG &ng, unsigned index, const char *expression, } static -aligned_unique_ptr generateRoseEngine(NG &ng) { +bytecode_ptr generateRoseEngine(NG &ng) { const u32 minWidth = ng.minWidth.is_finite() ? verify_u32(ng.minWidth) : ROSE_BOUND_INF; auto rose = ng.rose->buildRose(minWidth); diff --git a/src/rose/rose_build.h b/src/rose/rose_build.h index 2949fcc9..a14ea8ff 100644 --- a/src/rose/rose_build.h +++ b/src/rose/rose_build.h @@ -40,7 +40,7 @@ #include "ue2common.h" #include "rose_common.h" #include "rose_in_graph.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include "util/charreach.h" #include "util/noncopyable.h" #include "util/ue2_containers.h" @@ -113,7 +113,7 @@ public: bool eod) = 0; /** \brief Construct a runtime implementation. */ - virtual ue2::aligned_unique_ptr buildRose(u32 minWidth) = 0; + virtual bytecode_ptr buildRose(u32 minWidth) = 0; virtual std::unique_ptr generateDedupeAux() const = 0; diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index b9df1af6..4447646a 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -5589,8 +5589,8 @@ u32 writeEagerQueueIter(const set &eager, u32 leftfixBeginQueue, } static -aligned_unique_ptr addSmallWriteEngine(const RoseBuildImpl &build, - aligned_unique_ptr rose) { +bytecode_ptr addSmallWriteEngine(const RoseBuildImpl &build, + bytecode_ptr rose) { assert(rose); if (roseIsPureLiteral(rose.get())) { @@ -5612,7 +5612,7 @@ aligned_unique_ptr addSmallWriteEngine(const RoseBuildImpl &build, const size_t smwrOffset = ROUNDUP_CL(mainSize); const size_t newSize = smwrOffset + smallWriteSize; - auto rose2 = aligned_zmalloc_unique(newSize); + auto rose2 = make_bytecode_ptr(newSize, 64); char *ptr = (char *)rose2.get(); memcpy(ptr, rose.get(), mainSize); memcpy(ptr + smwrOffset, smwr_engine.get(), smallWriteSize); @@ -5709,7 +5709,7 @@ map makeLeftQueueMap(const RoseGraph &g, return lqm; } -aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { +bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // We keep all our offsets, counts etc. in a prototype RoseEngine which we // will copy into the real one once it is allocated: we can't do this // until we know how big it will be. @@ -5963,8 +5963,8 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { proto.size = currOffset; - // Time to allocate the real RoseEngine structure. - auto engine = aligned_zmalloc_unique(currOffset); + // Time to allocate the real RoseEngine structure, at cacheline alignment. + auto engine = make_bytecode_ptr(currOffset, 64); assert(engine); // will have thrown bad_alloc otherwise. // Copy in our prototype engine data. diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 791a68ab..c0096a97 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -1668,7 +1668,7 @@ bool roleOffsetsAreValid(const RoseGraph &g) { } #endif // NDEBUG -aligned_unique_ptr RoseBuildImpl::buildRose(u32 minWidth) { +bytecode_ptr RoseBuildImpl::buildRose(u32 minWidth) { dumpRoseGraph(*this, "rose_early.dot"); // Early check for Rose implementability. diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 93c0f18c..21db7a8e 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -38,7 +38,7 @@ #include "nfa/nfa_internal.h" #include "nfagraph/ng_holder.h" #include "nfagraph/ng_revacc.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include "util/order_check.h" #include "util/queue_index_factory.h" #include "util/ue2_containers.h" @@ -471,8 +471,8 @@ public: bool eod) override; // Construct a runtime implementation. - aligned_unique_ptr buildRose(u32 minWidth) override; - aligned_unique_ptr buildFinalEngine(u32 minWidth); + bytecode_ptr buildRose(u32 minWidth) override; + bytecode_ptr buildFinalEngine(u32 minWidth); void setSom() override { hasSom = true; } From 7b17d418e7d830e6e85eb7a41424d9208a53231f Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 3 Apr 2017 13:56:49 +1000 Subject: [PATCH 219/326] hwlm: use bytecode_ptr for HWLM structures --- src/hwlm/hwlm_build.cpp | 10 +++---- src/hwlm/hwlm_build.h | 9 +++--- src/rose/rose_build_bytecode.cpp | 21 +++++-------- src/rose/rose_build_matchers.cpp | 51 ++++++++++++-------------------- src/rose/rose_build_matchers.h | 32 ++++++++++---------- 5 files changed, 52 insertions(+), 71 deletions(-) diff --git a/src/hwlm/hwlm_build.cpp b/src/hwlm/hwlm_build.cpp index cd98feb4..824ac3fd 100644 --- a/src/hwlm/hwlm_build.cpp +++ b/src/hwlm/hwlm_build.cpp @@ -97,9 +97,9 @@ bool isNoodleable(const vector &lits, return true; } -aligned_unique_ptr hwlmBuild(const vector &lits, - bool make_small, const CompileContext &cc, - UNUSED hwlm_group_t expected_groups) { +bytecode_ptr hwlmBuild(const vector &lits, bool make_small, + const CompileContext &cc, + UNUSED hwlm_group_t expected_groups) { assert(!lits.empty()); dumpLits(lits); @@ -151,7 +151,7 @@ aligned_unique_ptr hwlmBuild(const vector &lits, engType = HWLM_ENGINE_FDR; auto fdr = fdrBuildTable(lits, make_small, cc.target_info, cc.grey); if (fdr) { - engSize = fdrSize(fdr.get()); + engSize = fdr.size(); } eng = move(fdr); } @@ -165,7 +165,7 @@ aligned_unique_ptr hwlmBuild(const vector &lits, throw ResourceLimitError(); } - auto h = aligned_zmalloc_unique(ROUNDUP_CL(sizeof(HWLM)) + engSize); + auto h = make_bytecode_ptr(ROUNDUP_CL(sizeof(HWLM)) + engSize, 64); h->type = engType; memcpy(HWLM_DATA(h.get()), eng.get(), engSize); diff --git a/src/hwlm/hwlm_build.h b/src/hwlm/hwlm_build.h index 5dd7dbc9..f2691496 100644 --- a/src/hwlm/hwlm_build.h +++ b/src/hwlm/hwlm_build.h @@ -35,7 +35,7 @@ #include "hwlm.h" #include "ue2common.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include #include @@ -60,10 +60,9 @@ struct hwlmLiteral; * may result in a nullptr return value, or a std::bad_alloc exception being * thrown. */ -aligned_unique_ptr -hwlmBuild(const std::vector &lits, bool make_small, - const CompileContext &cc, - hwlm_group_t expected_groups = HWLM_ALL_GROUPS); +bytecode_ptr hwlmBuild(const std::vector &lits, + bool make_small, const CompileContext &cc, + hwlm_group_t expected_groups = HWLM_ALL_GROUPS); /** * Returns an estimate of the number of repeated characters on the end of a diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 4447646a..f7c8cf06 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -5832,35 +5832,30 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Build floating HWLM matcher. rose_group fgroups = 0; - size_t fsize = 0; auto ftable = buildFloatingMatcher(*this, fragments, - bc.longLitLengthThreshold, - &fgroups, &fsize, &historyRequired); + bc.longLitLengthThreshold, &fgroups, &historyRequired); if (ftable) { - proto.fmatcherOffset = bc.engine_blob.add(ftable.get(), fsize, 64); + proto.fmatcherOffset = bc.engine_blob.add(ftable); bc.resources.has_floating = true; } // Build delay rebuild HWLM matcher. - size_t drsize = 0; auto drtable = buildDelayRebuildMatcher(*this, fragments, - bc.longLitLengthThreshold, &drsize); + bc.longLitLengthThreshold); if (drtable) { - proto.drmatcherOffset = bc.engine_blob.add(drtable.get(), drsize, 64); + proto.drmatcherOffset = bc.engine_blob.add(drtable); } // Build EOD-anchored HWLM matcher. - size_t esize = 0; - auto etable = buildEodAnchoredMatcher(*this, fragments, &esize); + auto etable = buildEodAnchoredMatcher(*this, fragments); if (etable) { - proto.ematcherOffset = bc.engine_blob.add(etable.get(), esize, 64); + proto.ematcherOffset = bc.engine_blob.add(etable); } // Build small-block HWLM matcher. - size_t sbsize = 0; - auto sbtable = buildSmallBlockMatcher(*this, fragments, &sbsize); + auto sbtable = buildSmallBlockMatcher(*this, fragments); if (sbtable) { - proto.sbmatcherOffset = bc.engine_blob.add(sbtable.get(), sbsize, 64); + proto.sbmatcherOffset = bc.engine_blob.add(sbtable); } proto.activeArrayCount = proto.leftfixBeginQueue; diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index 7f1467d7..64a1c919 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -805,12 +805,11 @@ void buildAccel(const RoseBuildImpl &build, const MatcherProto &mp, buildForwardAccel(&hwlm, mp.accel_lits, build.getInitialGroups()); } -aligned_unique_ptr -buildFloatingMatcher(const RoseBuildImpl &build, - const vector &fragments, - size_t longLitLengthThreshold, rose_group *fgroups, - size_t *fsize, size_t *historyRequired) { - *fsize = 0; +bytecode_ptr buildFloatingMatcher(const RoseBuildImpl &build, + const vector &fragments, + size_t longLitLengthThreshold, + rose_group *fgroups, + size_t *historyRequired) { *fgroups = 0; auto mp = makeMatcherProto(build, fragments, ROSE_FLOATING, false, @@ -838,18 +837,14 @@ buildFloatingMatcher(const RoseBuildImpl &build, *historyRequired = max(*historyRequired, mp.history_required); } - *fsize = hwlmSize(hwlm.get()); - assert(*fsize); - DEBUG_PRINTF("built floating literal table size %zu bytes\n", *fsize); + DEBUG_PRINTF("built floating literal table size %zu bytes\n", hwlm.size()); return hwlm; } -aligned_unique_ptr buildDelayRebuildMatcher(const RoseBuildImpl &build, - const vector &fragments, - size_t longLitLengthThreshold, - size_t *drsize) { - *drsize = 0; - +bytecode_ptr +buildDelayRebuildMatcher(const RoseBuildImpl &build, + const vector &fragments, + size_t longLitLengthThreshold) { if (!build.cc.streaming) { DEBUG_PRINTF("not streaming\n"); return nullptr; @@ -870,17 +865,13 @@ aligned_unique_ptr buildDelayRebuildMatcher(const RoseBuildImpl &build, buildAccel(build, mp, *hwlm); - *drsize = hwlmSize(hwlm.get()); - assert(*drsize); - DEBUG_PRINTF("built delay rebuild table size %zu bytes\n", *drsize); + DEBUG_PRINTF("built delay rebuild table size %zu bytes\n", hwlm.size()); return hwlm; } -aligned_unique_ptr +bytecode_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, - const vector &fragments, size_t *sbsize) { - *sbsize = 0; - + const vector &fragments) { if (build.cc.streaming) { DEBUG_PRINTF("streaming mode\n"); return nullptr; @@ -932,17 +923,14 @@ buildSmallBlockMatcher(const RoseBuildImpl &build, buildAccel(build, mp, *hwlm); - *sbsize = hwlmSize(hwlm.get()); - assert(*sbsize); - DEBUG_PRINTF("built small block literal table size %zu bytes\n", *sbsize); + DEBUG_PRINTF("built small block literal table size %zu bytes\n", + hwlm.size()); return hwlm; } -aligned_unique_ptr +bytecode_ptr buildEodAnchoredMatcher(const RoseBuildImpl &build, - const vector &fragments, size_t *esize) { - *esize = 0; - + const vector &fragments) { auto mp = makeMatcherProto(build, fragments, ROSE_EOD_ANCHORED, false, build.ematcher_region_size); @@ -962,9 +950,8 @@ buildEodAnchoredMatcher(const RoseBuildImpl &build, buildAccel(build, mp, *hwlm); - *esize = hwlmSize(hwlm.get()); - assert(*esize); - DEBUG_PRINTF("built eod-anchored literal table size %zu bytes\n", *esize); + DEBUG_PRINTF("built eod-anchored literal table size %zu bytes\n", + hwlm.size()); return hwlm; } diff --git a/src/rose/rose_build_matchers.h b/src/rose/rose_build_matchers.h index 494a3aeb..2b1afc8c 100644 --- a/src/rose/rose_build_matchers.h +++ b/src/rose/rose_build_matchers.h @@ -35,6 +35,7 @@ #define ROSE_BUILD_MATCHERS_H #include "rose_build_impl.h" +#include "util/bytecode_ptr.h" #include @@ -57,25 +58,24 @@ struct LitFragment { u32 delay_program_offset = ROSE_INVALID_PROG_OFFSET; }; -aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, - const std::vector &fragments, - size_t longLitLengthThreshold, - rose_group *fgroups, - size_t *fsize, - size_t *historyRequired); +bytecode_ptr +buildFloatingMatcher(const RoseBuildImpl &build, + const std::vector &fragments, + size_t longLitLengthThreshold, rose_group *fgroups, + size_t *historyRequired); -aligned_unique_ptr buildDelayRebuildMatcher(const RoseBuildImpl &build, - const std::vector &fragments, - size_t longLitLengthThreshold, - size_t *drsize); +bytecode_ptr +buildDelayRebuildMatcher(const RoseBuildImpl &build, + const std::vector &fragments, + size_t longLitLengthThreshold); -aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, - const std::vector &fragments, - size_t *sbsize); +bytecode_ptr +buildSmallBlockMatcher(const RoseBuildImpl &build, + const std::vector &fragments); -aligned_unique_ptr buildEodAnchoredMatcher(const RoseBuildImpl &build, - const std::vector &fragments, - size_t *esize); +bytecode_ptr +buildEodAnchoredMatcher(const RoseBuildImpl &build, + const std::vector &fragments); void findMoreLiteralMasks(RoseBuildImpl &build); From 905ac780617ddc0f6da4bc21ccff120dbe046891 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 3 Apr 2017 16:47:21 +1000 Subject: [PATCH 220/326] rose_build_anchored: use bytecode_ptr --- src/rose/rose_build_anchored.cpp | 10 ++++------ src/rose/rose_build_anchored.h | 6 +++--- src/rose/rose_build_bytecode.cpp | 7 +++---- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index 6c7bb1c1..065990ec 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -869,14 +869,13 @@ vector buildAnchoredDfas(RoseBuildImpl &build, return dfas; } -aligned_unique_ptr +bytecode_ptr buildAnchoredMatcher(RoseBuildImpl &build, const vector &fragments, - vector &dfas, size_t *asize) { + vector &dfas) { const CompileContext &cc = build.cc; if (dfas.empty()) { DEBUG_PRINTF("empty\n"); - *asize = 0; return nullptr; } @@ -892,8 +891,7 @@ buildAnchoredMatcher(RoseBuildImpl &build, const vector &fragments, throw ResourceLimitError(); } - *asize = total_size; - auto atable = aligned_zmalloc_unique(total_size); + auto atable = make_bytecode_ptr(total_size, 64); char *curr = (char *)atable.get(); u32 state_offset = 0; @@ -919,7 +917,7 @@ buildAnchoredMatcher(RoseBuildImpl &build, const vector &fragments, ami->anchoredMinDistance = start_offset[i]; } - DEBUG_PRINTF("success %zu\n", *asize); + DEBUG_PRINTF("success %zu\n", atable.size()); return atable; } diff --git a/src/rose/rose_build_anchored.h b/src/rose/rose_build_anchored.h index dd59ca32..37d268ac 100644 --- a/src/rose/rose_build_anchored.h +++ b/src/rose/rose_build_anchored.h @@ -32,7 +32,7 @@ #include "ue2common.h" #include "rose_build_impl.h" #include "nfagraph/ng_holder.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include #include @@ -59,10 +59,10 @@ std::vector buildAnchoredDfas(RoseBuildImpl &build, * Remap the literal final_ids used for raw_dfa reports to the program offsets * given in litPrograms. */ -aligned_unique_ptr +bytecode_ptr buildAnchoredMatcher(RoseBuildImpl &build, const std::vector &fragments, - std::vector &dfas, size_t *asize); + std::vector &dfas); u32 anchoredStateSize(const anchored_matcher_info &atable); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index f7c8cf06..c0747d97 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -5824,10 +5824,9 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { writeLeftInfo(bc.engine_blob, proto, leftInfoTable); // Build anchored matcher. - size_t asize = 0; - auto atable = buildAnchoredMatcher(*this, fragments, anchored_dfas, &asize); + auto atable = buildAnchoredMatcher(*this, fragments, anchored_dfas); if (atable) { - proto.amatcherOffset = bc.engine_blob.add(atable.get(), asize, 64); + proto.amatcherOffset = bc.engine_blob.add(atable); } // Build floating HWLM matcher. @@ -5952,7 +5951,7 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { proto.initialGroups = getInitialGroups(); proto.floating_group_mask = fgroups; proto.totalNumLiterals = verify_u32(literal_info.size()); - proto.asize = verify_u32(asize); + proto.asize = verify_u32(atable.size()); proto.ematcherRegionSize = ematcher_region_size; proto.longLitStreamState = verify_u32(longLitStreamStateRequired); From a197074c5d897e50918389b75365d7acd87b618c Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 3 Apr 2017 17:21:37 +1000 Subject: [PATCH 221/326] nfa: switch to using bytecode_ptr --- src/nfa/castlecompile.cpp | 8 ++-- src/nfa/castlecompile.h | 9 ++-- src/nfa/goughcompile.cpp | 13 +++-- src/nfa/goughcompile.h | 12 ++--- src/nfa/limex_compile.cpp | 30 ++++++------ src/nfa/limex_compile.h | 14 +++--- src/nfa/mcclellancompile.cpp | 33 +++++++------ src/nfa/mcclellancompile.h | 12 ++--- src/nfa/mcsheng_compile.cpp | 16 +++---- src/nfa/mcsheng_compile.h | 15 ++---- src/nfa/shengcompile.cpp | 11 ++--- src/nfa/shengcompile.h | 16 +++---- src/nfa/tamaramacompile.cpp | 17 +++---- src/nfa/tamaramacompile.h | 22 +++++---- src/nfagraph/ng_lbr.cpp | 73 ++++++++++++++--------------- src/nfagraph/ng_lbr.h | 17 ++++--- src/nfagraph/ng_limex.cpp | 22 +++++---- src/nfagraph/ng_limex.h | 43 ++++++++++------- src/nfagraph/ng_som.cpp | 15 +++--- src/rose/rose_build_anchored.cpp | 4 +- src/rose/rose_build_bytecode.cpp | 40 ++++++++-------- src/smallwrite/smallwrite_build.cpp | 15 +++--- src/som/slot_manager.cpp | 8 ++-- src/som/slot_manager.h | 13 ++--- src/util/bytecode_ptr.h | 3 +- unit/internal/lbr.cpp | 2 +- unit/internal/limex_nfa.cpp | 9 ++-- 27 files changed, 251 insertions(+), 241 deletions(-) diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index 20239f56..00f75159 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -26,9 +26,11 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief Castle: multi-tenant repeat engine, compiler code. */ + #include "castlecompile.h" #include "castle_internal.h" @@ -439,7 +441,7 @@ void buildSubcastles(const CastleProto &proto, vector &subs, } } -aligned_unique_ptr +bytecode_ptr buildCastle(const CastleProto &proto, const map>> &triggers, const CompileContext &cc, const ReportManager &rm) { @@ -577,7 +579,7 @@ buildCastle(const CastleProto &proto, total_size = ROUNDUP_N(total_size, alignof(mmbit_sparse_iter)); total_size += byte_length(stale_iter); // stale sparse iter - aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); + auto nfa = make_bytecode_ptr(total_size); nfa->type = verify_u8(CASTLE_NFA); nfa->length = verify_u32(total_size); nfa->nPositions = verify_u32(subs.size()); diff --git a/src/nfa/castlecompile.h b/src/nfa/castlecompile.h index 938e57c4..9f44692d 100644 --- a/src/nfa/castlecompile.h +++ b/src/nfa/castlecompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,7 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief Castle: multi-tenant repeat engine, compiler code. */ @@ -36,7 +37,7 @@ #include "nfa_kind.h" #include "ue2common.h" #include "nfagraph/ng_repeat.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include "util/depth.h" #include "util/ue2_containers.h" @@ -120,7 +121,7 @@ void remapCastleTops(CastleProto &proto, std::map &top_map); * NOTE: Tops must be contiguous, i.e. \ref remapCastleTops must have been run * first. */ -ue2::aligned_unique_ptr +bytecode_ptr buildCastle(const CastleProto &proto, const std::map>> &triggers, const CompileContext &cc, const ReportManager &rm); diff --git a/src/nfa/goughcompile.cpp b/src/nfa/goughcompile.cpp index 314b6fd0..62360561 100644 --- a/src/nfa/goughcompile.cpp +++ b/src/nfa/goughcompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,7 +35,6 @@ #include "grey.h" #include "mcclellancompile.h" #include "nfa_internal.h" -#include "util/alloc.h" #include "util/compile_context.h" #include "util/container.h" #include "util/graph_range.h" @@ -1036,9 +1035,9 @@ void update_accel_prog_offset(const gough_build_strat &gbs, } } -aligned_unique_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, - const CompileContext &cc, - const ReportManager &rm) { +bytecode_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, + const CompileContext &cc, + const ReportManager &rm) { assert(somPrecision == 2 || somPrecision == 4 || somPrecision == 8 || !cc.streaming); @@ -1071,7 +1070,7 @@ aligned_unique_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, map accel_allowed; find_allowed_accel_states(*cfg, blocks, &accel_allowed); gough_build_strat gbs(raw, *cfg, rm, accel_allowed); - aligned_unique_ptr basic_dfa = mcclellanCompile_i(raw, gbs, cc); + auto basic_dfa = mcclellanCompile_i(raw, gbs, cc); assert(basic_dfa); if (!basic_dfa) { return nullptr; @@ -1117,7 +1116,7 @@ aligned_unique_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, gi.stream_som_loc_width = somPrecision; u32 gough_size = ROUNDUP_N(curr_offset, 16); - aligned_unique_ptr gough_dfa = aligned_zmalloc_unique(gough_size); + auto gough_dfa = make_bytecode_ptr(gough_size); memcpy(gough_dfa.get(), basic_dfa.get(), basic_dfa->length); memcpy((char *)gough_dfa.get() + haig_offset, &gi, sizeof(gi)); diff --git a/src/nfa/goughcompile.h b/src/nfa/goughcompile.h index 54f98cef..72469f3c 100644 --- a/src/nfa/goughcompile.h +++ b/src/nfa/goughcompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,7 +32,7 @@ #include "mcclellancompile.h" #include "nfa_kind.h" #include "ue2common.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include "util/ue2_containers.h" #include "util/order_check.h" @@ -88,10 +88,10 @@ struct raw_som_dfa : public raw_dfa { * som */ }; -aligned_unique_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, - const CompileContext &cc, - const ReportManager &rm); +bytecode_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, + const CompileContext &cc, + const ReportManager &rm); } // namespace ue2 -#endif +#endif // GOUGHCOMPILE_H diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 7290c39e..235d7168 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -26,9 +26,11 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief Main NFA build code. */ + #include "limex_compile.h" #include "accel.h" @@ -2193,7 +2195,7 @@ struct Factory { } static - aligned_unique_ptr generateNfa(const build_info &args) { + bytecode_ptr generateNfa(const build_info &args) { if (args.num_states > NFATraits::maxStates) { return nullptr; } @@ -2295,7 +2297,7 @@ struct Factory { size_t nfaSize = sizeof(NFA) + offset; DEBUG_PRINTF("nfa size %zu\n", nfaSize); - auto nfa = aligned_zmalloc_unique(nfaSize); + auto nfa = make_bytecode_ptr(nfaSize); assert(nfa); // otherwise we would have thrown std::bad_alloc implNFA_t *limex = (implNFA_t *)getMutableImplNfa(nfa.get()); @@ -2381,7 +2383,7 @@ struct Factory { template struct generateNfa { - static aligned_unique_ptr call(const build_info &args) { + static bytecode_ptr call(const build_info &args) { return Factory::generateNfa(args); } }; @@ -2478,17 +2480,15 @@ u32 max_state(const ue2::unordered_map &state_ids) { return rv; } -aligned_unique_ptr generate(NGHolder &h, - const ue2::unordered_map &states, - const vector &repeats, - const map &reportSquashMap, - const map &squashMap, - const map> &tops, - const set &zombies, - bool do_accel, - bool stateCompression, - u32 hint, - const CompileContext &cc) { +bytecode_ptr generate(NGHolder &h, + const ue2::unordered_map &states, + const vector &repeats, + const map &reportSquashMap, + const map &squashMap, + const map> &tops, + const set &zombies, bool do_accel, + bool stateCompression, u32 hint, + const CompileContext &cc) { const u32 num_states = max_state(states) + 1; DEBUG_PRINTF("total states: %u\n", num_states); diff --git a/src/nfa/limex_compile.h b/src/nfa/limex_compile.h index 21cb7608..a12ae9f6 100644 --- a/src/nfa/limex_compile.h +++ b/src/nfa/limex_compile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,7 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief Main NFA build code. */ @@ -37,10 +38,10 @@ #include #include -#include "ue2common.h" #include "nfagraph/ng_holder.h" #include "nfagraph/ng_squash.h" // for NFAStateSet -#include "util/alloc.h" +#include "ue2common.h" +#include "util/bytecode_ptr.h" #include "util/ue2_containers.h" struct NFA; @@ -50,7 +51,8 @@ namespace ue2 { struct BoundedRepeatData; struct CompileContext; -/** \brief Construct a LimEx NFA from an NGHolder. +/** + * \brief Construct a LimEx NFA from an NGHolder. * * \param g Input NFA graph. Must have state IDs assigned. * \param repeats Bounded repeat information, if any. @@ -66,7 +68,7 @@ struct CompileContext; * \return a built NFA, or nullptr if no NFA could be constructed for this * graph. */ -aligned_unique_ptr generate(NGHolder &g, +bytecode_ptr generate(NGHolder &g, const ue2::unordered_map &states, const std::vector &repeats, const std::map &reportSquashMap, diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 7a73c9d4..e07d7a2a 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -456,9 +456,8 @@ bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) { } static -aligned_unique_ptr mcclellanCompile16(dfa_info &info, - const CompileContext &cc, - set *accel_states) { +bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, + set *accel_states) { DEBUG_PRINTF("building mcclellan 16\n"); vector reports; /* index in ri for the appropriate report list */ @@ -497,7 +496,7 @@ aligned_unique_ptr mcclellanCompile16(dfa_info &info, accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); + auto nfa = make_bytecode_ptr(total_size); char *nfa_base = (char *)nfa.get(); populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset, @@ -685,9 +684,8 @@ void allocateFSN8(dfa_info &info, } static -aligned_unique_ptr mcclellanCompile8(dfa_info &info, - const CompileContext &cc, - set *accel_states) { +bytecode_ptr mcclellanCompile8(dfa_info &info, const CompileContext &cc, + set *accel_states) { DEBUG_PRINTF("building mcclellan 8\n"); vector reports; @@ -717,12 +715,13 @@ aligned_unique_ptr mcclellanCompile8(dfa_info &info, accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); + auto nfa = bytecode_ptr(total_size); char *nfa_base = (char *)nfa.get(); mcclellan *m = (mcclellan *)getMutableImplNfa(nfa.get()); - allocateFSN8(info, accel_escape_info, &m->accel_limit_8, &m->accept_limit_8); + allocateFSN8(info, accel_escape_info, &m->accel_limit_8, + &m->accept_limit_8); populateBasicInfo(sizeof(u8), info, total_size, aux_offset, accel_offset, accel_escape_info.size(), arb, single, nfa.get()); @@ -939,9 +938,9 @@ bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) { return false; } -aligned_unique_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, - const CompileContext &cc, - set *accel_states) { +bytecode_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, + const CompileContext &cc, + set *accel_states) { u16 total_daddy = 0; dfa_info info(strat); bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256; @@ -965,7 +964,7 @@ aligned_unique_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat & info.size() * info.impl_alpha_size, info.size(), info.impl_alpha_size); - aligned_unique_ptr nfa; + bytecode_ptr nfa; if (!using8bit) { nfa = mcclellanCompile16(info, cc, accel_states); } else { @@ -980,9 +979,9 @@ aligned_unique_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat & return nfa; } -aligned_unique_ptr mcclellanCompile(raw_dfa &raw, const CompileContext &cc, - const ReportManager &rm, - set *accel_states) { +bytecode_ptr mcclellanCompile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm, + set *accel_states) { mcclellan_build_strat mbs(raw, rm); return mcclellanCompile_i(raw, mbs, cc, accel_states); } diff --git a/src/nfa/mcclellancompile.h b/src/nfa/mcclellancompile.h index 8d8dfb19..be0a18c5 100644 --- a/src/nfa/mcclellancompile.h +++ b/src/nfa/mcclellancompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,7 +32,7 @@ #include "accel_dfa_build_strat.h" #include "rdfa.h" #include "ue2common.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include "util/ue2_containers.h" #include @@ -55,7 +55,7 @@ public: std::vector &reports /* out */, std::vector &reports_eod /* out */, u8 *isSingleReport /* out */, - ReportID *arbReport /* out */) const override; + ReportID *arbReport /* out */) const override; size_t accelSize(void) const override; u32 max_allowed_offset_accel() const override; u32 max_stop_char() const override; @@ -67,13 +67,13 @@ private: /* accel_states: (optional) on success, is filled with the set of accelerable * states */ -ue2::aligned_unique_ptr +bytecode_ptr mcclellanCompile(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm, std::set *accel_states = nullptr); /* used internally by mcclellan/haig/gough compile process */ -ue2::aligned_unique_ptr +bytecode_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, const CompileContext &cc, std::set *accel_states = nullptr); @@ -89,4 +89,4 @@ bool has_accel_mcclellan(const NFA *nfa); } // namespace ue2 -#endif +#endif // MCCLELLANCOMPILE_H diff --git a/src/nfa/mcsheng_compile.cpp b/src/nfa/mcsheng_compile.cpp index 7b4e58ab..c7133d08 100644 --- a/src/nfa/mcsheng_compile.cpp +++ b/src/nfa/mcsheng_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -821,7 +821,7 @@ void fill_in_sherman(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) { } static -aligned_unique_ptr mcshengCompile16(dfa_info &info, dstate_id_t sheng_end, +bytecode_ptr mcshengCompile16(dfa_info &info, dstate_id_t sheng_end, const map &accel_escape_info, const Grey &grey) { DEBUG_PRINTF("building mcsheng 16\n"); @@ -872,7 +872,7 @@ aligned_unique_ptr mcshengCompile16(dfa_info &info, dstate_id_t sheng_end, accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); + auto nfa = make_bytecode_ptr(total_size); mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get()); populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset, @@ -967,7 +967,7 @@ void allocateImplId8(dfa_info &info, dstate_id_t sheng_end, } static -aligned_unique_ptr mcshengCompile8(dfa_info &info, dstate_id_t sheng_end, +bytecode_ptr mcshengCompile8(dfa_info &info, dstate_id_t sheng_end, const map &accel_escape_info) { DEBUG_PRINTF("building mcsheng 8\n"); @@ -998,7 +998,7 @@ aligned_unique_ptr mcshengCompile8(dfa_info &info, dstate_id_t sheng_end, accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); + auto nfa = make_bytecode_ptr(total_size); mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get()); allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8, @@ -1019,8 +1019,8 @@ aligned_unique_ptr mcshengCompile8(dfa_info &info, dstate_id_t sheng_end, return nfa; } -aligned_unique_ptr mcshengCompile(raw_dfa &raw, const CompileContext &cc, - const ReportManager &rm) { +bytecode_ptr mcshengCompile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm) { if (!cc.grey.allowMcSheng) { return nullptr; } @@ -1044,7 +1044,7 @@ aligned_unique_ptr mcshengCompile(raw_dfa &raw, const CompileContext &cc, return nullptr; } - aligned_unique_ptr nfa; + bytecode_ptr nfa; if (!using8bit) { nfa = mcshengCompile16(info, sheng_end, accel_escape_info, cc.grey); } else { diff --git a/src/nfa/mcsheng_compile.h b/src/nfa/mcsheng_compile.h index d1ae1e32..487ab45f 100644 --- a/src/nfa/mcsheng_compile.h +++ b/src/nfa/mcsheng_compile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,13 +29,8 @@ #ifndef MCSHENGCOMPILE_H #define MCSHENGCOMPILE_H -#include "accel_dfa_build_strat.h" -#include "rdfa.h" #include "ue2common.h" -#include "util/alloc.h" -#include "util/ue2_containers.h" - -#include +#include "util/bytecode_ptr.h" struct NFA; @@ -43,10 +38,10 @@ namespace ue2 { class ReportManager; struct CompileContext; +struct raw_dfa; -ue2::aligned_unique_ptr -mcshengCompile(raw_dfa &raw, const CompileContext &cc, - const ReportManager &rm); +bytecode_ptr mcshengCompile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm); bool has_accel_mcsheng(const NFA *nfa); diff --git a/src/nfa/shengcompile.cpp b/src/nfa/shengcompile.cpp index 53f2c131..b2996aa5 100644 --- a/src/nfa/shengcompile.cpp +++ b/src/nfa/shengcompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -450,10 +450,9 @@ bool has_accel_sheng(const NFA *) { return true; /* consider the sheng region as accelerated */ } -aligned_unique_ptr shengCompile(raw_dfa &raw, - const CompileContext &cc, - const ReportManager &rm, - set *accel_states) { +bytecode_ptr shengCompile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm, + set *accel_states) { if (!cc.grey.allowSheng) { DEBUG_PRINTF("Sheng is not allowed!\n"); return nullptr; @@ -508,7 +507,7 @@ aligned_unique_ptr shengCompile(raw_dfa &raw, DEBUG_PRINTF("NFA: %u, aux: %u, reports: %u, accel: %u, total: %u\n", nfa_size, total_aux, total_reports, total_accel, total_size); - aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); + auto nfa = make_bytecode_ptr(total_size); populateBasicInfo(nfa.get(), info, accelInfo, nfa_size, reports_offset, accel_offset, total_size, total_size - sizeof(NFA)); diff --git a/src/nfa/shengcompile.h b/src/nfa/shengcompile.h index 873b7c75..6afc1dd1 100644 --- a/src/nfa/shengcompile.h +++ b/src/nfa/shengcompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,12 +26,12 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef SHENGCOMPILE_H_ -#define SHENGCOMPILE_H_ +#ifndef SHENGCOMPILE_H +#define SHENGCOMPILE_H #include "accel_dfa_build_strat.h" #include "rdfa.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include "util/charreach.h" #include "util/ue2_containers.h" @@ -62,9 +62,9 @@ private: raw_dfa &rdfa; }; -aligned_unique_ptr -shengCompile(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm, - std::set *accel_states = nullptr); +bytecode_ptr shengCompile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm, + std::set *accel_states = nullptr); struct sheng_escape_info { CharReach outs; @@ -77,4 +77,4 @@ bool has_accel_sheng(const NFA *nfa); } // namespace ue2 -#endif /* SHENGCOMPILE_H_ */ +#endif /* SHENGCOMPILE_H */ diff --git a/src/nfa/tamaramacompile.cpp b/src/nfa/tamaramacompile.cpp index c28caacb..6d253411 100644 --- a/src/nfa/tamaramacompile.cpp +++ b/src/nfa/tamaramacompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,9 +26,9 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file - * \brief Tamarama: container engine for exclusive engines, - * compiler code. +/** + * \file + * \brief Tamarama: container engine for exclusive engines, compiler code. */ #include "config.h" @@ -111,8 +111,9 @@ void copyInSubnfas(const char *base_offset, NFA &nfa, * returns via out_top_remap, a mapping indicating how tops in the subengines in * relate to the tamarama's tops. */ -aligned_unique_ptr buildTamarama(const TamaInfo &tamaInfo, const u32 queue, - map, u32> &out_top_remap) { +bytecode_ptr +buildTamarama(const TamaInfo &tamaInfo, const u32 queue, + map, u32> &out_top_remap) { vector top_base; remapTops(tamaInfo, top_base, out_top_remap); @@ -133,7 +134,7 @@ aligned_unique_ptr buildTamarama(const TamaInfo &tamaInfo, const u32 queue, // use subSize as a sentinel value for no active subengines, // so add one to subSize here u32 activeIdxSize = calcPackedBytes(subSize + 1); - aligned_unique_ptr nfa = aligned_zmalloc_unique(total_size); + auto nfa = make_bytecode_ptr(total_size); nfa->type = verify_u8(TAMARAMA_NFA); nfa->length = verify_u32(total_size); nfa->queueIndex = queue; @@ -148,7 +149,7 @@ aligned_unique_ptr buildTamarama(const TamaInfo &tamaInfo, const u32 queue, copy_bytes(ptr, top_base); ptr += byte_length(top_base); - u32 *offsets = (u32*)ptr; + u32 *offsets = (u32 *)ptr; char *sub_nfa_offset = ptr + sizeof(u32) * subSize; copyInSubnfas(base_offset, *nfa, tamaInfo, offsets, sub_nfa_offset, activeIdxSize); diff --git a/src/nfa/tamaramacompile.h b/src/nfa/tamaramacompile.h index 048b966b..7fcea3ec 100644 --- a/src/nfa/tamaramacompile.h +++ b/src/nfa/tamaramacompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,15 +26,16 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file - * \brief Tamarama: container engine for exclusive engines, compiler code. +/** + * \file + * \brief Tamarama: container engine for exclusive engines, compiler code. */ #ifndef NFA_TAMARAMACOMPILE_H #define NFA_TAMARAMACOMPILE_H #include "ue2common.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include #include @@ -45,7 +46,7 @@ struct NFA; namespace ue2 { /** - * \brief A TamaProto that contains top remapping and reports info + * \brief A TamaProto that contains top remapping and reports info. */ struct TamaProto { void add(const NFA *n, const u32 id, const u32 top, @@ -59,7 +60,7 @@ struct TamaProto { }; /** - * \brief Contruction info for a Tamarama engine: + * \brief Construction info for a Tamarama engine: * contains at least two subengines. * * A TamaInfo is converted into a single NFA, with each top triggering a @@ -70,7 +71,7 @@ struct TamaInfo { static constexpr size_t max_occupancy = 65536; // arbitrary limit /** \brief Add a new subengine. */ - void add(NFA* sub, const std::set &top); + void add(NFA *sub, const std::set &top); /** \brief All the subengines */ std::vector subengines; @@ -86,9 +87,10 @@ std::set all_reports(const TamaProto &proto); * returns via out_top_remap, a mapping indicating how tops in the subengines in * relate to the tamarama's tops. */ -ue2::aligned_unique_ptr buildTamarama(const TamaInfo &tamaInfo, - const u32 queue, - std::map, u32> &out_top_remap); +bytecode_ptr +buildTamarama(const TamaInfo &tamaInfo, const u32 queue, + std::map, u32> &out_top_remap); + } // namespace ue2 #endif // NFA_TAMARAMACOMPILE_H diff --git a/src/nfagraph/ng_lbr.cpp b/src/nfagraph/ng_lbr.cpp index d832bdaa..11262ae1 100644 --- a/src/nfagraph/ng_lbr.cpp +++ b/src/nfagraph/ng_lbr.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,7 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief Large Bounded Repeat (LBR) engine build code. */ @@ -128,25 +129,24 @@ void fillNfa(NFA *nfa, lbr_common *c, ReportID report, const depth &repeatMin, } template static -aligned_unique_ptr makeLbrNfa(NFAEngineType nfa_type, - enum RepeatType rtype, - const depth &repeatMax) { +bytecode_ptr makeLbrNfa(NFAEngineType nfa_type, enum RepeatType rtype, + const depth &repeatMax) { size_t tableLen = 0; if (rtype == REPEAT_SPARSE_OPTIMAL_P) { tableLen = sizeof(u64a) * (repeatMax + 1); } size_t len = sizeof(NFA) + sizeof(LbrStruct) + sizeof(RepeatInfo) + tableLen + sizeof(u64a); - aligned_unique_ptr nfa = aligned_zmalloc_unique(len); + auto nfa = make_bytecode_ptr(len); nfa->type = verify_u8(nfa_type); nfa->length = verify_u32(len); return nfa; } static -aligned_unique_ptr buildLbrDot(const CharReach &cr, const depth &repeatMin, - const depth &repeatMax, u32 minPeriod, - bool is_reset, ReportID report) { +bytecode_ptr buildLbrDot(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { if (!cr.all()) { return nullptr; } @@ -164,10 +164,9 @@ aligned_unique_ptr buildLbrDot(const CharReach &cr, const depth &repeatMin, } static -aligned_unique_ptr buildLbrVerm(const CharReach &cr, - const depth &repeatMin, - const depth &repeatMax, u32 minPeriod, - bool is_reset, ReportID report) { +bytecode_ptr buildLbrVerm(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { const CharReach escapes(~cr); if (escapes.count() != 1) { @@ -188,10 +187,9 @@ aligned_unique_ptr buildLbrVerm(const CharReach &cr, } static -aligned_unique_ptr buildLbrNVerm(const CharReach &cr, - const depth &repeatMin, - const depth &repeatMax, u32 minPeriod, - bool is_reset, ReportID report) { +bytecode_ptr buildLbrNVerm(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { const CharReach escapes(cr); if (escapes.count() != 1) { @@ -212,10 +210,9 @@ aligned_unique_ptr buildLbrNVerm(const CharReach &cr, } static -aligned_unique_ptr buildLbrShuf(const CharReach &cr, - const depth &repeatMin, - const depth &repeatMax, u32 minPeriod, - bool is_reset, ReportID report) { +bytecode_ptr buildLbrShuf(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, is_reset); auto nfa = makeLbrNfa(LBR_NFA_SHUF, rtype, repeatMax); @@ -233,10 +230,9 @@ aligned_unique_ptr buildLbrShuf(const CharReach &cr, } static -aligned_unique_ptr buildLbrTruf(const CharReach &cr, - const depth &repeatMin, - const depth &repeatMax, u32 minPeriod, - bool is_reset, ReportID report) { +bytecode_ptr buildLbrTruf(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, is_reset); auto nfa = makeLbrNfa(LBR_NFA_TRUF, rtype, repeatMax); @@ -252,10 +248,9 @@ aligned_unique_ptr buildLbrTruf(const CharReach &cr, } static -aligned_unique_ptr constructLBR(const CharReach &cr, - const depth &repeatMin, - const depth &repeatMax, u32 minPeriod, - bool is_reset, ReportID report) { +bytecode_ptr constructLBR(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { DEBUG_PRINTF("bounds={%s,%s}, cr=%s (count %zu), report=%u\n", repeatMin.str().c_str(), repeatMax.str().c_str(), describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count(), @@ -263,8 +258,8 @@ aligned_unique_ptr constructLBR(const CharReach &cr, assert(repeatMin <= repeatMax); assert(repeatMax.is_reachable()); - aligned_unique_ptr nfa - = buildLbrDot(cr, repeatMin, repeatMax, minPeriod, is_reset, report); + auto nfa = + buildLbrDot(cr, repeatMin, repeatMax, minPeriod, is_reset, report); if (!nfa) { nfa = buildLbrVerm(cr, repeatMin, repeatMax, minPeriod, is_reset, @@ -291,10 +286,10 @@ aligned_unique_ptr constructLBR(const CharReach &cr, return nfa; } -aligned_unique_ptr constructLBR(const CastleProto &proto, - const vector> &triggers, - const CompileContext &cc, - const ReportManager &rm) { +bytecode_ptr constructLBR(const CastleProto &proto, + const vector> &triggers, + const CompileContext &cc, + const ReportManager &rm) { if (!cc.grey.allowLbr) { return nullptr; } @@ -330,10 +325,10 @@ aligned_unique_ptr constructLBR(const CastleProto &proto, } /** \brief Construct an LBR engine from the given graph \p g. */ -aligned_unique_ptr constructLBR(const NGHolder &g, - const vector> &triggers, - const CompileContext &cc, - const ReportManager &rm) { +bytecode_ptr constructLBR(const NGHolder &g, + const vector> &triggers, + const CompileContext &cc, + const ReportManager &rm) { if (!cc.grey.allowLbr) { return nullptr; } diff --git a/src/nfagraph/ng_lbr.h b/src/nfagraph/ng_lbr.h index 99cb0fcb..1eec9653 100644 --- a/src/nfagraph/ng_lbr.h +++ b/src/nfagraph/ng_lbr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,7 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief Large Bounded Repeat (LBR) engine build code. */ @@ -34,7 +35,7 @@ #define NG_LBR_H #include "ue2common.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include #include @@ -51,14 +52,16 @@ struct CompileContext; struct Grey; /** \brief Construct an LBR engine from the given graph \p g. */ -aligned_unique_ptr +bytecode_ptr constructLBR(const NGHolder &g, const std::vector> &triggers, const CompileContext &cc, const ReportManager &rm); -/** \brief Construct an LBR engine from the given CastleProto, which should - * contain only one repeat. */ -aligned_unique_ptr +/** + * \brief Construct an LBR engine from the given CastleProto, which should + * contain only one repeat. + */ +bytecode_ptr constructLBR(const CastleProto &proto, const std::vector> &triggers, const CompileContext &cc, const ReportManager &rm); diff --git a/src/nfagraph/ng_limex.cpp b/src/nfagraph/ng_limex.cpp index 7f157c33..283bba22 100644 --- a/src/nfagraph/ng_limex.cpp +++ b/src/nfagraph/ng_limex.cpp @@ -26,9 +26,11 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief Limex NFA construction code. */ + #include "ng_limex.h" #include "grey.h" @@ -623,7 +625,7 @@ void remapReportsToPrograms(NGHolder &h, const ReportManager &rm) { } static -aligned_unique_ptr +bytecode_ptr constructNFA(const NGHolder &h_in, const ReportManager *rm, const map &fixed_depth_tops, const map>> &triggers, @@ -682,7 +684,7 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm, zombies, do_accel, compress_state, hint, cc); } -aligned_unique_ptr +bytecode_ptr constructNFA(const NGHolder &h_in, const ReportManager *rm, const map &fixed_depth_tops, const map>> &triggers, @@ -696,7 +698,7 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm, #ifndef RELEASE_BUILD // Variant that allows a hint to be specified. -aligned_unique_ptr +bytecode_ptr constructNFA(const NGHolder &h_in, const ReportManager *rm, const map &fixed_depth_tops, const map>> &triggers, @@ -709,8 +711,8 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm, #endif // RELEASE_BUILD static -aligned_unique_ptr constructReversedNFA_i(const NGHolder &h_in, u32 hint, - const CompileContext &cc) { +bytecode_ptr constructReversedNFA_i(const NGHolder &h_in, u32 hint, + const CompileContext &cc) { // Make a mutable copy of the graph that we can renumber etc. NGHolder h; cloneHolder(h, h_in); @@ -739,16 +741,16 @@ aligned_unique_ptr constructReversedNFA_i(const NGHolder &h_in, u32 hint, zombies, false, false, hint, cc); } -aligned_unique_ptr constructReversedNFA(const NGHolder &h_in, - const CompileContext &cc) { +bytecode_ptr constructReversedNFA(const NGHolder &h_in, + const CompileContext &cc) { u32 hint = INVALID_NFA; // no hint return constructReversedNFA_i(h_in, hint, cc); } #ifndef RELEASE_BUILD // Variant that allows a hint to be specified. -aligned_unique_ptr constructReversedNFA(const NGHolder &h_in, u32 hint, - const CompileContext &cc) { +bytecode_ptr constructReversedNFA(const NGHolder &h_in, u32 hint, + const CompileContext &cc) { return constructReversedNFA_i(h_in, hint, cc); } #endif // RELEASE_BUILD diff --git a/src/nfagraph/ng_limex.h b/src/nfagraph/ng_limex.h index 1e36e03d..9bf46d69 100644 --- a/src/nfagraph/ng_limex.h +++ b/src/nfagraph/ng_limex.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,7 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief Limex NFA construction code. */ @@ -35,7 +36,7 @@ #include "ue2common.h" #include "som/som.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include #include @@ -51,7 +52,8 @@ class NGHolder; class ReportManager; struct CompileContext; -/** \brief Determine if the given graph is implementable as an NFA. +/** + * \brief Determine if the given graph is implementable as an NFA. * * Returns zero if the NFA is not implementable (usually because it has too * many states for any of our models). Otherwise returns the number of states. @@ -62,11 +64,14 @@ struct CompileContext; u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm, const CompileContext &cc); -/** \brief Late-stage graph reductions. +/** + * \brief Late-stage graph reductions. * * This will call \ref removeRedundancy and apply its changes to the given - * holder only if it is implementable afterwards. */ -void reduceImplementableGraph(NGHolder &g, som_type som, const ReportManager *rm, + * holder only if it is implementable afterwards. + */ +void reduceImplementableGraph(NGHolder &g, som_type som, + const ReportManager *rm, const CompileContext &cc); /** @@ -79,7 +84,8 @@ void reduceImplementableGraph(NGHolder &g, som_type som, const ReportManager *rm u32 countAccelStates(const NGHolder &g, const ReportManager *rm, const CompileContext &cc); -/** \brief Construct an NFA from the given NFAGraph. +/** + * \brief Construct an NFA from the given graph. * * Returns zero if the NFA is not implementable (usually because it has too * many states for any of our models). Otherwise returns the number of states. @@ -90,23 +96,25 @@ u32 countAccelStates(const NGHolder &g, const ReportManager *rm, * Note: this variant of the function allows a model to be specified with the * \a hint parameter. */ -aligned_unique_ptr +bytecode_ptr constructNFA(const NGHolder &g, const ReportManager *rm, const std::map &fixed_depth_tops, const std::map>> &triggers, bool compress_state, const CompileContext &cc); -/** \brief Build a reverse NFA from the graph given, which should have already +/** + * \brief Build a reverse NFA from the graph given, which should have already * been reversed. * * Used for reverse NFAs used in SOM mode. */ -aligned_unique_ptr constructReversedNFA(const NGHolder &h, - const CompileContext &cc); +bytecode_ptr constructReversedNFA(const NGHolder &h, + const CompileContext &cc); #ifndef RELEASE_BUILD -/** \brief Construct an NFA (with model type hint) from the given NFAGraph. +/** + * \brief Construct an NFA (with model type hint) from the given graph. * * Returns zero if the NFA is not implementable (usually because it has too * many states for any of our models). Otherwise returns the number of states. @@ -117,19 +125,20 @@ aligned_unique_ptr constructReversedNFA(const NGHolder &h, * Note: this variant of the function allows a model to be specified with the * \a hint parameter. */ -aligned_unique_ptr +bytecode_ptr constructNFA(const NGHolder &g, const ReportManager *rm, const std::map &fixed_depth_tops, const std::map>> &triggers, bool compress_state, u32 hint, const CompileContext &cc); -/** \brief Build a reverse NFA (with model type hint) from the graph given, +/** + * \brief Build a reverse NFA (with model type hint) from the graph given, * which should have already been reversed. * * Used for reverse NFAs used in SOM mode. */ -aligned_unique_ptr constructReversedNFA(const NGHolder &h, u32 hint, - const CompileContext &cc); +bytecode_ptr constructReversedNFA(const NGHolder &h, u32 hint, + const CompileContext &cc); #endif // RELEASE_BUILD diff --git a/src/nfagraph/ng_som.cpp b/src/nfagraph/ng_som.cpp index 5bf52915..6481eff7 100644 --- a/src/nfagraph/ng_som.cpp +++ b/src/nfagraph/ng_som.cpp @@ -26,7 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief SOM ("Start of Match") analysis. */ @@ -1731,19 +1732,19 @@ void clearProperInEdges(NGHolder &g, const NFAVertex sink) { namespace { struct SomRevNfa { - SomRevNfa(NFAVertex s, ReportID r, aligned_unique_ptr n) + SomRevNfa(NFAVertex s, ReportID r, bytecode_ptr n) : sink(s), report(r), nfa(move(n)) {} SomRevNfa(SomRevNfa&& s) // MSVC2013 needs this for emplace : sink(s.sink), report(s.report), nfa(move(s.nfa)) {} NFAVertex sink; ReportID report; - aligned_unique_ptr nfa; + bytecode_ptr nfa; }; } static -aligned_unique_ptr makeBareSomRevNfa(const NGHolder &g, - const CompileContext &cc) { +bytecode_ptr makeBareSomRevNfa(const NGHolder &g, + const CompileContext &cc) { // Create a reversed anchored version of this NFA which fires a zero report // ID on accept. NGHolder g_rev; @@ -1759,7 +1760,7 @@ aligned_unique_ptr makeBareSomRevNfa(const NGHolder &g, DEBUG_PRINTF("building a rev NFA with %zu vertices\n", num_vertices(g_rev)); - aligned_unique_ptr nfa = constructReversedNFA(g_rev, cc); + auto nfa = constructReversedNFA(g_rev, cc); if (!nfa) { return nfa; } @@ -1794,7 +1795,7 @@ bool makeSomRevNfa(vector &som_nfas, const NGHolder &g, renumber_vertices(g2); // for findMinWidth, findMaxWidth. - aligned_unique_ptr nfa = makeBareSomRevNfa(g2, cc); + auto nfa = makeBareSomRevNfa(g2, cc); if (!nfa) { DEBUG_PRINTF("couldn't build rev nfa\n"); return false; diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index 065990ec..12b62128 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -813,7 +813,7 @@ vector> getAnchoredDfas(RoseBuildImpl &build, */ static size_t buildNfas(vector &anchored_dfas, - vector> *nfas, + vector> *nfas, vector *start_offset, const CompileContext &cc, const ReportManager &rm) { const size_t num_dfas = anchored_dfas.size(); @@ -883,7 +883,7 @@ buildAnchoredMatcher(RoseBuildImpl &build, const vector &fragments, remapIdsToPrograms(fragments, rdfa); } - vector> nfas; + vector> nfas; vector start_offset; // start offset for each dfa (dots removed) size_t total_size = buildNfas(dfas, &nfas, &start_offset, cc, build.rm); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index c0747d97..b48a80c2 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -74,7 +74,6 @@ #include "nfagraph/ng_width.h" #include "smallwrite/smallwrite_build.h" #include "som/slot_manager.h" -#include "util/alloc.h" #include "util/bitutils.h" #include "util/boundary_reports.h" #include "util/charreach.h" @@ -274,7 +273,7 @@ struct ProgramBuild : noncopyable { /** \brief subengine info including built engine and * corresponding triggering rose vertices */ struct ExclusiveSubengine { - aligned_unique_ptr nfa; + bytecode_ptr nfa; vector vertices; }; @@ -655,8 +654,8 @@ void findFixedDepthTops(const RoseGraph &g, const set &triggers, * engine. */ static -aligned_unique_ptr pickImpl(aligned_unique_ptr dfa_impl, - aligned_unique_ptr nfa_impl) { +bytecode_ptr pickImpl(bytecode_ptr dfa_impl, + bytecode_ptr nfa_impl) { assert(nfa_impl); assert(dfa_impl); assert(isDfaType(dfa_impl->type)); @@ -708,7 +707,7 @@ aligned_unique_ptr pickImpl(aligned_unique_ptr dfa_impl, * otherwise a Castle. */ static -aligned_unique_ptr +bytecode_ptr buildRepeatEngine(const CastleProto &proto, const map>> &triggers, const CompileContext &cc, const ReportManager &rm) { @@ -724,7 +723,7 @@ buildRepeatEngine(const CastleProto &proto, } static -aligned_unique_ptr getDfa(raw_dfa &rdfa, bool is_transient, +bytecode_ptr getDfa(raw_dfa &rdfa, bool is_transient, const CompileContext &cc, const ReportManager &rm) { // Unleash the Sheng!! @@ -744,7 +743,7 @@ aligned_unique_ptr getDfa(raw_dfa &rdfa, bool is_transient, /* builds suffix nfas */ static -aligned_unique_ptr +bytecode_ptr buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, const map &fixed_depth_tops, const map>> &triggers, @@ -873,14 +872,15 @@ void findTriggerSequences(const RoseBuildImpl &tbi, } } -static aligned_unique_ptr -makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, - const bool is_prefix, const bool is_transient, - const map > &infixTriggers, +static +bytecode_ptr +makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, const bool is_prefix, + const bool is_transient, + const map> &infixTriggers, const CompileContext &cc) { const ReportManager &rm = tbi.rm; - aligned_unique_ptr n; + bytecode_ptr n; // Should compress state if this rose is non-transient and we're in // streaming mode. @@ -1181,7 +1181,7 @@ bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi, leftfix = updateLeftfixWithEager(g, eager.at(leftfix), succs); } - aligned_unique_ptr nfa; + bytecode_ptr nfa; // Need to build NFA, which is either predestined to be a Haig (in SOM mode) // or could be all manner of things. if (leftfix.haig()) { @@ -1669,26 +1669,26 @@ bool hasNonSmallBlockOutfix(const vector &outfixes) { } namespace { -class OutfixBuilder : public boost::static_visitor> { +class OutfixBuilder : public boost::static_visitor> { public: explicit OutfixBuilder(const RoseBuildImpl &build_in) : build(build_in) {} - aligned_unique_ptr operator()(boost::blank&) const { + bytecode_ptr operator()(boost::blank&) const { return nullptr; }; - aligned_unique_ptr operator()(unique_ptr &rdfa) const { + bytecode_ptr operator()(unique_ptr &rdfa) const { // Unleash the mighty DFA! return getDfa(*rdfa, false, build.cc, build.rm); } - aligned_unique_ptr operator()(unique_ptr &haig) const { + bytecode_ptr operator()(unique_ptr &haig) const { // Unleash the Goughfish! return goughCompile(*haig, build.ssm.somPrecision(), build.cc, build.rm); } - aligned_unique_ptr operator()(unique_ptr &holder) const { + bytecode_ptr operator()(unique_ptr &holder) const { const CompileContext &cc = build.cc; const ReportManager &rm = build.rm; @@ -1717,7 +1717,7 @@ public: return n; } - aligned_unique_ptr operator()(UNUSED MpvProto &mpv) const { + bytecode_ptr operator()(UNUSED MpvProto &mpv) const { // MPV construction handled separately. assert(mpv.puffettes.empty()); return nullptr; @@ -1729,7 +1729,7 @@ private: } static -aligned_unique_ptr buildOutfix(const RoseBuildImpl &build, OutfixInfo &outfix) { +bytecode_ptr buildOutfix(const RoseBuildImpl &build, OutfixInfo &outfix) { assert(!outfix.is_dead()); // should not be marked dead. auto n = boost::apply_visitor(OutfixBuilder(build), outfix.proto); diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index 914349b8..dc2a4466 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -403,10 +403,10 @@ bool is_slow(const raw_dfa &rdfa, const set &accel, } static -aligned_unique_ptr getDfa(raw_dfa &rdfa, const CompileContext &cc, - const ReportManager &rm, - set &accel_states) { - aligned_unique_ptr dfa = nullptr; +bytecode_ptr getDfa(raw_dfa &rdfa, const CompileContext &cc, + const ReportManager &rm, + set &accel_states) { + bytecode_ptr dfa = nullptr; if (cc.grey.allowSmallWriteSheng) { dfa = shengCompile(rdfa, cc, rm, &accel_states); } @@ -417,10 +417,9 @@ aligned_unique_ptr getDfa(raw_dfa &rdfa, const CompileContext &cc, } static -aligned_unique_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, - const CompileContext &cc, - const ReportManager &rm, u32 *start_offset, - u32 *small_region) { +bytecode_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, + const CompileContext &cc, const ReportManager &rm, + u32 *start_offset, u32 *small_region) { *start_offset = remove_leading_dots(rdfa); // Unleash the McClellan! diff --git a/src/som/slot_manager.cpp b/src/som/slot_manager.cpp index b1aa6bf7..3dc74d3d 100644 --- a/src/som/slot_manager.cpp +++ b/src/som/slot_manager.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,9 +26,11 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief SOM Slot Manager. */ + #include "slot_manager.h" #include "slot_manager_internal.h" @@ -245,7 +247,7 @@ u32 SomSlotManager::numSomSlots() const { return nextSomSlot; } -u32 SomSlotManager::addRevNfa(aligned_unique_ptr nfa, u32 maxWidth) { +u32 SomSlotManager::addRevNfa(bytecode_ptr nfa, u32 maxWidth) { u32 rv = verify_u32(rev_nfas.size()); rev_nfas.push_back(move(nfa)); diff --git a/src/som/slot_manager.h b/src/som/slot_manager.h index adccf99a..ddb105f5 100644 --- a/src/som/slot_manager.h +++ b/src/som/slot_manager.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,7 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief SOM Slot Manager. */ @@ -35,7 +36,7 @@ #include "ue2common.h" #include "nfagraph/ng_holder.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include "util/noncopyable.h" #include "util/ue2_containers.h" @@ -78,11 +79,11 @@ public: u32 numSomSlots() const; - const std::deque> &getRevNfas() const { + const std::deque> &getRevNfas() const { return rev_nfas; } - u32 addRevNfa(aligned_unique_ptr nfa, u32 maxWidth); + u32 addRevNfa(bytecode_ptr nfa, u32 maxWidth); u32 somHistoryRequired() const { return historyRequired; } @@ -97,7 +98,7 @@ private: std::unique_ptr cache; /** \brief Reverse NFAs used for SOM support. */ - std::deque> rev_nfas; + std::deque> rev_nfas; /** \brief In streaming mode, the amount of history we've committed to * using for SOM rev NFAs. */ diff --git a/src/util/bytecode_ptr.h b/src/util/bytecode_ptr.h index 2cc4277e..f55f78ec 100644 --- a/src/util/bytecode_ptr.h +++ b/src/util/bytecode_ptr.h @@ -63,8 +63,7 @@ public: bytecode_ptr(std::nullptr_t) {} - T *get() { return ptr.get(); }; - const T *get() const { return ptr.get(); }; + T *get() const { return ptr.get(); }; T &operator*() { return *ptr; } const T &operator*() const { return *ptr; } diff --git a/unit/internal/lbr.cpp b/unit/internal/lbr.cpp index 60bf8940..ab2126aa 100644 --- a/unit/internal/lbr.cpp +++ b/unit/internal/lbr.cpp @@ -152,7 +152,7 @@ protected: unsigned matches; // Compiled NFA structure. - aligned_unique_ptr nfa; + bytecode_ptr nfa; // Space for full state. aligned_unique_ptr full_state; diff --git a/unit/internal/limex_nfa.cpp b/unit/internal/limex_nfa.cpp index 333c35f3..a32d1bcd 100644 --- a/unit/internal/limex_nfa.cpp +++ b/unit/internal/limex_nfa.cpp @@ -116,7 +116,7 @@ protected: unsigned matches; // Compiled NFA structure. - aligned_unique_ptr nfa; + bytecode_ptr nfa; // Space for full state. aligned_unique_ptr full_state; @@ -187,8 +187,7 @@ TEST_P(LimExModelTest, CompressExpand) { // Expand state into a new copy and check that it matches the original // uncompressed state. - aligned_unique_ptr state_copy = - aligned_zmalloc_unique(nfa->scratchStateSize); + auto state_copy = aligned_zmalloc_unique(nfa->scratchStateSize); char *dest = state_copy.get(); memset(dest, 0xff, nfa->scratchStateSize); nfaExpandState(nfa.get(), dest, q.streamState, q.offset, @@ -331,7 +330,7 @@ protected: unsigned matches; // Compiled NFA structure. - aligned_unique_ptr nfa; + bytecode_ptr nfa; }; INSTANTIATE_TEST_CASE_P(LimExReverse, LimExReverseTest, @@ -410,7 +409,7 @@ protected: unsigned matches; // Compiled NFA structure. - aligned_unique_ptr nfa; + bytecode_ptr nfa; // Space for full state. aligned_unique_ptr full_state; From 67779e0c0af17a4c9dbbf75856d24c6ceb2c5dcf Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 3 Apr 2017 17:38:26 +1000 Subject: [PATCH 222/326] bytecode_ptr: some small improvements --- src/util/bytecode_ptr.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/util/bytecode_ptr.h b/src/util/bytecode_ptr.h index f55f78ec..1242e23d 100644 --- a/src/util/bytecode_ptr.h +++ b/src/util/bytecode_ptr.h @@ -35,7 +35,9 @@ #define UTIL_BYTECODE_PTR_H #include "util/alloc.h" +#include "util/operators.h" +#include // std::max #include #include @@ -47,7 +49,7 @@ namespace ue2 { * This is intended to be used for flat aligned memory regions that will * eventually end up copied into the Hyperscan bytecode. */ -template class bytecode_ptr { +template class bytecode_ptr : totally_ordered> { public: bytecode_ptr() = default; explicit bytecode_ptr(size_t size, size_t align = alignof(T)) @@ -81,7 +83,14 @@ public: return std::shared_ptr(ptr.release(), d); } - void reset(T* p = nullptr) { ptr.reset(p); } + void reset(T *p = nullptr) { ptr.reset(p); } + + T *release() { + auto *p = ptr.release(); + bytes = 0; + alignment = 0; + return p; + } void swap(bytecode_ptr &other) { using std::swap; @@ -97,11 +106,7 @@ public: size_t align() const { return alignment; } bool operator==(const bytecode_ptr &a) const { return ptr == a.ptr; } - bool operator!=(const bytecode_ptr &a) const { return ptr != a.ptr; } bool operator<(const bytecode_ptr &a) const { return ptr < a.ptr; } - bool operator<=(const bytecode_ptr &a) const { return ptr <= a.ptr; } - bool operator>(const bytecode_ptr &a) const { return ptr > a.ptr; } - bool operator>=(const bytecode_ptr &a) const { return ptr >= a.ptr; } private: /** \brief Deleter function for std::unique_ptr. */ From d9bac2bdb3da02ba755eeebddd9583314f996986 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 4 Apr 2017 09:29:41 +1000 Subject: [PATCH 223/326] teddy: fix forward decl (namespace ue2) --- src/fdr/teddy_compile.cpp | 6 +++++- src/fdr/teddy_compile.h | 5 +++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/fdr/teddy_compile.cpp b/src/fdr/teddy_compile.cpp index 33a1050c..80d3316a 100644 --- a/src/fdr/teddy_compile.cpp +++ b/src/fdr/teddy_compile.cpp @@ -26,6 +26,11 @@ * POSSIBILITY OF SUCH DAMAGE. */ +/** + * \file + * \brief FDR literal matcher: Teddy build code. + */ + #include "teddy_compile.h" #include "fdr.h" @@ -44,7 +49,6 @@ #include "util/target_info.h" #include "util/verify_types.h" - #include #include #include diff --git a/src/fdr/teddy_compile.h b/src/fdr/teddy_compile.h index 22e87405..5ff4d839 100644 --- a/src/fdr/teddy_compile.h +++ b/src/fdr/teddy_compile.h @@ -26,7 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief FDR literal matcher: Teddy build API. */ @@ -39,12 +40,12 @@ #include struct FDR; -struct target_t; namespace ue2 { struct Grey; struct hwlmLiteral; +struct target_t; bytecode_ptr teddyBuildTableHinted(const std::vector &lits, bool make_small, u32 hint, From 5653fa55a1af255efc6d752dff3789fea34b99a2 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 4 Apr 2017 10:51:09 +1000 Subject: [PATCH 224/326] bytecode_ptr updates --- src/util/bytecode_ptr.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/util/bytecode_ptr.h b/src/util/bytecode_ptr.h index 1242e23d..713649ab 100644 --- a/src/util/bytecode_ptr.h +++ b/src/util/bytecode_ptr.h @@ -28,7 +28,8 @@ /** * \file - * \brief bytecode_ptr: Smart pointer that knows its length and alignment. + * \brief bytecode_ptr: Smart pointer with unique ownership that knows its + * length and alignment. */ #ifndef UTIL_BYTECODE_PTR_H @@ -44,12 +45,14 @@ namespace ue2 { /** - * \brief Smart pointer that knows its length and alignment. + * \brief Smart pointer that knows its length and alignment and behaves like a + * std::unique_ptr -- i.e. it retains unique ownership of the memory region. * * This is intended to be used for flat aligned memory regions that will * eventually end up copied into the Hyperscan bytecode. */ -template class bytecode_ptr : totally_ordered> { +template +class bytecode_ptr : totally_ordered> { public: bytecode_ptr() = default; explicit bytecode_ptr(size_t size, size_t align = alignof(T)) @@ -65,7 +68,7 @@ public: bytecode_ptr(std::nullptr_t) {} - T *get() const { return ptr.get(); }; + T *get() const { return ptr.get(); } T &operator*() { return *ptr; } const T &operator*() const { return *ptr; } From 63fe84c3f14edd231e427be70553c056e48d820d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 4 Apr 2017 11:02:33 +1000 Subject: [PATCH 225/326] bytecode_ptr: add make_zeroed_bytecode_ptr Rather than always zeroing memory. --- src/fdr/fdr_compile.cpp | 2 +- src/fdr/fdr_confirm_compile.cpp | 4 ++-- src/fdr/flood_compile.cpp | 2 +- src/fdr/teddy_compile.cpp | 2 +- src/hwlm/hwlm_build.cpp | 3 ++- src/hwlm/noodle_build.cpp | 2 +- src/nfa/castlecompile.cpp | 2 +- src/nfa/goughcompile.cpp | 2 +- src/nfa/limex_compile.cpp | 4 ++-- src/nfa/mcclellancompile.cpp | 4 ++-- src/nfa/mcsheng_compile.cpp | 4 ++-- src/nfa/mpvcompile.cpp | 2 +- src/nfa/shengcompile.cpp | 2 +- src/nfa/tamaramacompile.cpp | 2 +- src/nfagraph/ng_lbr.cpp | 2 +- src/rose/rose_build_anchored.cpp | 3 ++- src/rose/rose_build_bytecode.cpp | 4 ++-- src/smallwrite/smallwrite_build.cpp | 2 +- src/util/bytecode_ptr.h | 16 +++++++++++++++- 19 files changed, 40 insertions(+), 24 deletions(-) diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index 36bcda69..c4ea50f2 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -162,7 +162,7 @@ bytecode_ptr FDRCompiler::setupFDR() { headerSize, tabSize, confirmTmp.size(), floodControlTmp.size(), size); - auto fdr = make_bytecode_ptr(size, 64); + auto fdr = make_zeroed_bytecode_ptr(size, 64); assert(fdr); // otherwise would have thrown std::bad_alloc fdr->size = size; diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index 5e1a540e..d6eb6640 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -284,7 +284,7 @@ bytecode_ptr getFDRConfirm(const vector &lits, sizeof(LitInfo) * lits.size() + totalLitSize; size = ROUNDUP_N(size, alignof(FDRConfirm)); - auto fdrc = make_bytecode_ptr(size); + auto fdrc = make_zeroed_bytecode_ptr(size); assert(fdrc); // otherwise would have thrown std::bad_alloc fdrc->andmsk = andmsk; @@ -373,7 +373,7 @@ setupFullConfs(const vector &lits, u32 totalConfSwitchSize = nBuckets * sizeof(u32); u32 totalSize = ROUNDUP_16(totalConfSwitchSize + totalConfirmSize); - auto buf = make_bytecode_ptr(totalSize, 16); + auto buf = make_zeroed_bytecode_ptr(totalSize, 16); assert(buf); // otherwise would have thrown std::bad_alloc u32 *confBase = (u32 *)buf.get(); diff --git a/src/fdr/flood_compile.cpp b/src/fdr/flood_compile.cpp index 9b948419..7dcc17d1 100644 --- a/src/fdr/flood_compile.cpp +++ b/src/fdr/flood_compile.cpp @@ -207,7 +207,7 @@ bytecode_ptr setupFDRFloodControl(const vector &lits, size_t floodStructSize = sizeof(FDRFlood) * nDistinctFloods; size_t totalSize = ROUNDUP_16(floodHeaderSize + floodStructSize); - auto buf = make_bytecode_ptr(totalSize, 16); + auto buf = make_zeroed_bytecode_ptr(totalSize, 16); assert(buf); // otherwise would have thrown std::bad_alloc u32 *floodHeader = (u32 *)buf.get(); diff --git a/src/fdr/teddy_compile.cpp b/src/fdr/teddy_compile.cpp index 80d3316a..6f956e8c 100644 --- a/src/fdr/teddy_compile.cpp +++ b/src/fdr/teddy_compile.cpp @@ -324,7 +324,7 @@ bytecode_ptr TeddyCompiler::build() { floodControlTmp.size(), 16 * maskWidth); - auto fdr = make_bytecode_ptr(size, 64); + auto fdr = make_zeroed_bytecode_ptr(size, 64); assert(fdr); // otherwise would have thrown std::bad_alloc Teddy *teddy = (Teddy *)fdr.get(); // ugly u8 *teddy_base = (u8 *)teddy; diff --git a/src/hwlm/hwlm_build.cpp b/src/hwlm/hwlm_build.cpp index 824ac3fd..2f61ea6d 100644 --- a/src/hwlm/hwlm_build.cpp +++ b/src/hwlm/hwlm_build.cpp @@ -165,7 +165,8 @@ bytecode_ptr hwlmBuild(const vector &lits, bool make_small, throw ResourceLimitError(); } - auto h = make_bytecode_ptr(ROUNDUP_CL(sizeof(HWLM)) + engSize, 64); + const size_t hwlm_len = ROUNDUP_CL(sizeof(HWLM)) + engSize; + auto h = make_zeroed_bytecode_ptr(hwlm_len, 64); h->type = engType; memcpy(HWLM_DATA(h.get()), eng.get(), engSize); diff --git a/src/hwlm/noodle_build.cpp b/src/hwlm/noodle_build.cpp index 6412d3f5..63fdf072 100644 --- a/src/hwlm/noodle_build.cpp +++ b/src/hwlm/noodle_build.cpp @@ -74,7 +74,7 @@ bytecode_ptr noodBuildTable(const hwlmLiteral &lit) { const auto &s = lit.s; size_t noodle_len = sizeof(noodTable) + s.length(); - auto n = make_bytecode_ptr(noodle_len); + auto n = make_zeroed_bytecode_ptr(noodle_len); assert(n); size_t key_offset = findNoodFragOffset(lit); diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index 00f75159..40fbc18c 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -579,7 +579,7 @@ buildCastle(const CastleProto &proto, total_size = ROUNDUP_N(total_size, alignof(mmbit_sparse_iter)); total_size += byte_length(stale_iter); // stale sparse iter - auto nfa = make_bytecode_ptr(total_size); + auto nfa = make_zeroed_bytecode_ptr(total_size); nfa->type = verify_u8(CASTLE_NFA); nfa->length = verify_u32(total_size); nfa->nPositions = verify_u32(subs.size()); diff --git a/src/nfa/goughcompile.cpp b/src/nfa/goughcompile.cpp index 62360561..d92f285f 100644 --- a/src/nfa/goughcompile.cpp +++ b/src/nfa/goughcompile.cpp @@ -1116,7 +1116,7 @@ bytecode_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, gi.stream_som_loc_width = somPrecision; u32 gough_size = ROUNDUP_N(curr_offset, 16); - auto gough_dfa = make_bytecode_ptr(gough_size); + auto gough_dfa = make_zeroed_bytecode_ptr(gough_size); memcpy(gough_dfa.get(), basic_dfa.get(), basic_dfa->length); memcpy((char *)gough_dfa.get() + haig_offset, &gi, sizeof(gi)); diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 235d7168..3cdf5de1 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -1797,7 +1797,7 @@ struct Factory { u32 tableOffset, tugMaskOffset; size_t len = repeatAllocSize(br, &tableOffset, &tugMaskOffset); - auto info = make_bytecode_ptr(len); + auto info = make_zeroed_bytecode_ptr(len); char *info_ptr = (char *)info.get(); // Collect state space info. @@ -2297,7 +2297,7 @@ struct Factory { size_t nfaSize = sizeof(NFA) + offset; DEBUG_PRINTF("nfa size %zu\n", nfaSize); - auto nfa = make_bytecode_ptr(nfaSize); + auto nfa = make_zeroed_bytecode_ptr(nfaSize); assert(nfa); // otherwise we would have thrown std::bad_alloc implNFA_t *limex = (implNFA_t *)getMutableImplNfa(nfa.get()); diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index e07d7a2a..206f468a 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -496,7 +496,7 @@ bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - auto nfa = make_bytecode_ptr(total_size); + auto nfa = make_zeroed_bytecode_ptr(total_size); char *nfa_base = (char *)nfa.get(); populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset, @@ -715,7 +715,7 @@ bytecode_ptr mcclellanCompile8(dfa_info &info, const CompileContext &cc, accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - auto nfa = bytecode_ptr(total_size); + auto nfa = make_zeroed_bytecode_ptr(total_size); char *nfa_base = (char *)nfa.get(); mcclellan *m = (mcclellan *)getMutableImplNfa(nfa.get()); diff --git a/src/nfa/mcsheng_compile.cpp b/src/nfa/mcsheng_compile.cpp index c7133d08..e4e4173a 100644 --- a/src/nfa/mcsheng_compile.cpp +++ b/src/nfa/mcsheng_compile.cpp @@ -872,7 +872,7 @@ bytecode_ptr mcshengCompile16(dfa_info &info, dstate_id_t sheng_end, accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - auto nfa = make_bytecode_ptr(total_size); + auto nfa = make_zeroed_bytecode_ptr(total_size); mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get()); populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset, @@ -998,7 +998,7 @@ bytecode_ptr mcshengCompile8(dfa_info &info, dstate_id_t sheng_end, accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - auto nfa = make_bytecode_ptr(total_size); + auto nfa = make_zeroed_bytecode_ptr(total_size); mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get()); allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8, diff --git a/src/nfa/mpvcompile.cpp b/src/nfa/mpvcompile.cpp index 51787512..8497c648 100644 --- a/src/nfa/mpvcompile.cpp +++ b/src/nfa/mpvcompile.cpp @@ -343,7 +343,7 @@ bytecode_ptr mpvCompile(const vector &puffs_in, DEBUG_PRINTF("%u puffs, len = %u\n", puffette_count, len); - auto nfa = make_bytecode_ptr(len); + auto nfa = make_zeroed_bytecode_ptr(len); mpv_puffette *pa_base = (mpv_puffette *) ((char *)nfa.get() + sizeof(NFA) + sizeof(mpv) diff --git a/src/nfa/shengcompile.cpp b/src/nfa/shengcompile.cpp index b2996aa5..8c061913 100644 --- a/src/nfa/shengcompile.cpp +++ b/src/nfa/shengcompile.cpp @@ -507,7 +507,7 @@ bytecode_ptr shengCompile(raw_dfa &raw, const CompileContext &cc, DEBUG_PRINTF("NFA: %u, aux: %u, reports: %u, accel: %u, total: %u\n", nfa_size, total_aux, total_reports, total_accel, total_size); - auto nfa = make_bytecode_ptr(total_size); + auto nfa = make_zeroed_bytecode_ptr(total_size); populateBasicInfo(nfa.get(), info, accelInfo, nfa_size, reports_offset, accel_offset, total_size, total_size - sizeof(NFA)); diff --git a/src/nfa/tamaramacompile.cpp b/src/nfa/tamaramacompile.cpp index 6d253411..1a6e8bef 100644 --- a/src/nfa/tamaramacompile.cpp +++ b/src/nfa/tamaramacompile.cpp @@ -134,7 +134,7 @@ buildTamarama(const TamaInfo &tamaInfo, const u32 queue, // use subSize as a sentinel value for no active subengines, // so add one to subSize here u32 activeIdxSize = calcPackedBytes(subSize + 1); - auto nfa = make_bytecode_ptr(total_size); + auto nfa = make_zeroed_bytecode_ptr(total_size); nfa->type = verify_u8(TAMARAMA_NFA); nfa->length = verify_u32(total_size); nfa->queueIndex = queue; diff --git a/src/nfagraph/ng_lbr.cpp b/src/nfagraph/ng_lbr.cpp index 11262ae1..9bf16efe 100644 --- a/src/nfagraph/ng_lbr.cpp +++ b/src/nfagraph/ng_lbr.cpp @@ -137,7 +137,7 @@ bytecode_ptr makeLbrNfa(NFAEngineType nfa_type, enum RepeatType rtype, } size_t len = sizeof(NFA) + sizeof(LbrStruct) + sizeof(RepeatInfo) + tableLen + sizeof(u64a); - auto nfa = make_bytecode_ptr(len); + auto nfa = make_zeroed_bytecode_ptr(len); nfa->type = verify_u8(nfa_type); nfa->length = verify_u32(len); return nfa; diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index 12b62128..6d56ee00 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -891,7 +891,8 @@ buildAnchoredMatcher(RoseBuildImpl &build, const vector &fragments, throw ResourceLimitError(); } - auto atable = make_bytecode_ptr(total_size, 64); + auto atable = + make_zeroed_bytecode_ptr(total_size, 64); char *curr = (char *)atable.get(); u32 state_offset = 0; diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index b48a80c2..baf13446 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -5612,7 +5612,7 @@ bytecode_ptr addSmallWriteEngine(const RoseBuildImpl &build, const size_t smwrOffset = ROUNDUP_CL(mainSize); const size_t newSize = smwrOffset + smallWriteSize; - auto rose2 = make_bytecode_ptr(newSize, 64); + auto rose2 = make_zeroed_bytecode_ptr(newSize, 64); char *ptr = (char *)rose2.get(); memcpy(ptr, rose.get(), mainSize); memcpy(ptr + smwrOffset, smwr_engine.get(), smallWriteSize); @@ -5958,7 +5958,7 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { proto.size = currOffset; // Time to allocate the real RoseEngine structure, at cacheline alignment. - auto engine = make_bytecode_ptr(currOffset, 64); + auto engine = make_zeroed_bytecode_ptr(currOffset, 64); assert(engine); // will have thrown bad_alloc otherwise. // Copy in our prototype engine data. diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index dc2a4466..d5c1ccb7 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -504,7 +504,7 @@ bytecode_ptr SmallWriteBuildImpl::build(u32 roseQuality) { } u32 size = sizeof(SmallWriteEngine) + nfa->length; - auto smwr = make_bytecode_ptr(size); + auto smwr = make_zeroed_bytecode_ptr(size); smwr->size = size; smwr->start_offset = start_offset; diff --git a/src/util/bytecode_ptr.h b/src/util/bytecode_ptr.h index 713649ab..d3f5215a 100644 --- a/src/util/bytecode_ptr.h +++ b/src/util/bytecode_ptr.h @@ -63,7 +63,6 @@ public: if (!ptr) { throw std::bad_alloc(); } - std::memset(ptr.get(), 0, bytes); } bytecode_ptr(std::nullptr_t) {} @@ -122,12 +121,27 @@ private: size_t alignment = 0; //!< Alignment of memory region in bytes. }; +/** + * \brief Constructs a bytecode_ptr with the given size and alignment. + */ template inline bytecode_ptr make_bytecode_ptr(size_t size, size_t align = alignof(T)) { return bytecode_ptr(size, align); } +/** + * \brief Constructs a bytecode_ptr with the given size and alignment and + * fills the memory region with zeroes. + */ +template +inline bytecode_ptr make_zeroed_bytecode_ptr(size_t size, + size_t align = alignof(T)) { + auto ptr = make_bytecode_ptr(size, align); + std::memset(ptr.get(), 0, size); + return ptr; +} + } // namespace ue2 #endif // UTIL_BYTECODE_PTR_H From 0a3bd455ad0a00d01201f6c919d5dc5a94737ba6 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 4 Apr 2017 11:06:02 +1000 Subject: [PATCH 226/326] rose_build_bytecode: fix up indentation --- src/rose/rose_build_bytecode.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index baf13446..13d485ea 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -873,11 +873,10 @@ void findTriggerSequences(const RoseBuildImpl &tbi, } static -bytecode_ptr -makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, const bool is_prefix, - const bool is_transient, - const map> &infixTriggers, - const CompileContext &cc) { +bytecode_ptr makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, + const bool is_prefix, const bool is_transient, + const map> &infixTriggers, + const CompileContext &cc) { const ReportManager &rm = tbi.rm; bytecode_ptr n; From b6047ea5d4ba0c01eccc91beab09688ffd13b328 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 4 Apr 2017 11:19:07 +1000 Subject: [PATCH 227/326] rose: use bytecode_ptr for interpreter programs --- src/rose/rose_build_bytecode.cpp | 9 ++++----- src/rose/rose_build_program.cpp | 12 +++++++----- src/rose/rose_build_program.h | 6 +++--- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 13d485ea..98e1ef70 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2567,11 +2567,10 @@ u32 writeProgram(build_context &bc, RoseProgram &&program) { recordResources(bc.resources, program); recordLongLiterals(bc.longLiterals, program); - u32 len = 0; - auto prog_bytecode = writeProgram(bc.engine_blob, program, &len); - u32 offset = bc.engine_blob.add(prog_bytecode.get(), len, - ROSE_INSTR_MIN_ALIGN); - DEBUG_PRINTF("prog len %u written at offset %u\n", len, offset); + auto prog_bytecode = writeProgram(bc.engine_blob, program); + u32 offset = bc.engine_blob.add(prog_bytecode); + DEBUG_PRINTF("prog len %zu written at offset %u\n", prog_bytecode.size(), + offset); bc.program_cache.emplace(move(program), offset); return offset; } diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index 2fb76c77..bca867f0 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -639,12 +639,14 @@ OffsetMap makeOffsetMap(const RoseProgram &program, u32 *total_len) { return offset_map; } -aligned_unique_ptr -writeProgram(RoseEngineBlob &blob, const RoseProgram &program, u32 *total_len) { - const auto offset_map = makeOffsetMap(program, total_len); - DEBUG_PRINTF("%zu instructions, len %u\n", program.size(), *total_len); +bytecode_ptr writeProgram(RoseEngineBlob &blob, + const RoseProgram &program) { + u32 total_len = 0; + const auto offset_map = makeOffsetMap(program, &total_len); + DEBUG_PRINTF("%zu instructions, len %u\n", program.size(), total_len); - auto bytecode = aligned_zmalloc_unique(*total_len); + auto bytecode = make_zeroed_bytecode_ptr(total_len, + ROSE_INSTR_MIN_ALIGN); char *ptr = bytecode.get(); for (const auto &ri : program) { diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index 3931f095..06233231 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -32,7 +32,7 @@ #include "rose_build_impl.h" #include "rose_program.h" #include "som/som_operation.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include "util/container.h" #include "util/hash.h" #include "util/make_unique.h" @@ -2328,8 +2328,8 @@ public: } }; -aligned_unique_ptr -writeProgram(RoseEngineBlob &blob, const RoseProgram &program, u32 *total_len); +bytecode_ptr writeProgram(RoseEngineBlob &blob, + const RoseProgram &program); class RoseProgramHash { public: From e8162960fcbbf4eea848fae85acb9632c7854011 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 4 Apr 2017 11:41:50 +1000 Subject: [PATCH 228/326] unit: use bytecode_ptr instead of aligned_unique_ptr --- unit/internal/fdr.cpp | 2 +- unit/internal/fdr_loadval.cpp | 8 ++++---- unit/internal/lbr.cpp | 26 +++++++++++++------------- unit/internal/limex_nfa.cpp | 20 ++++++++++---------- unit/internal/simd_utils.cpp | 4 ++-- 5 files changed, 30 insertions(+), 30 deletions(-) diff --git a/unit/internal/fdr.cpp b/unit/internal/fdr.cpp index 8ec72598..bd0bb4c0 100644 --- a/unit/internal/fdr.cpp +++ b/unit/internal/fdr.cpp @@ -383,7 +383,7 @@ TEST_P(FDRp, moveByteStream) { size_t size = fdrSize(fdrTable0.get()); - auto fdrTable = aligned_zmalloc_unique(size); + auto fdrTable = make_bytecode_ptr(size, 64); EXPECT_NE(nullptr, fdrTable); memcpy(fdrTable.get(), fdrTable0.get(), size); diff --git a/unit/internal/fdr_loadval.cpp b/unit/internal/fdr_loadval.cpp index 22fee770..bb5efb5f 100644 --- a/unit/internal/fdr_loadval.cpp +++ b/unit/internal/fdr_loadval.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,7 +30,7 @@ #include "gtest/gtest.h" #include "fdr/fdr_loadval.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" using namespace std; using namespace testing; @@ -71,7 +71,7 @@ static void fillWithBytes(u8 *ptr, size_t len) { TYPED_TEST(FDR_Loadval, Normal) { // We should be able to do a normal load at any alignment. const size_t len = sizeof(TypeParam); - aligned_unique_ptr mem_p = aligned_zmalloc_unique(len + 15); + auto mem_p = make_bytecode_ptr(len + 15, 16); u8 * mem = mem_p.get(); ASSERT_TRUE(ISALIGNED_16(mem)); fillWithBytes(mem, len + 15); @@ -90,7 +90,7 @@ TYPED_TEST(FDR_Loadval, CautiousEverywhere) { // the 'lo' ptr or after the 'hi' ptr. const size_t len = sizeof(TypeParam); - aligned_unique_ptr mem_p = aligned_zmalloc_unique(len + 1); + auto mem_p = make_bytecode_ptr(len + 1, 16); u8 *mem = mem_p.get() + 1; // force unaligned fillWithBytes(mem, len); diff --git a/unit/internal/lbr.cpp b/unit/internal/lbr.cpp index ab2126aa..d32f7e8f 100644 --- a/unit/internal/lbr.cpp +++ b/unit/internal/lbr.cpp @@ -29,20 +29,20 @@ #include "config.h" #include "gtest/gtest.h" -#include "util/target_info.h" -#include "util/charreach.h" +#include "grey.h" +#include "hs_compile.h" /* for controlling ssse3 usage */ +#include "compiler/compiler.h" #include "nfa/lbr.h" #include "nfa/nfa_api.h" -#include "nfa/nfa_internal.h" #include "nfa/nfa_api_util.h" +#include "nfa/nfa_internal.h" +#include "nfagraph/ng.h" #include "nfagraph/ng_lbr.h" #include "nfagraph/ng_util.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" +#include "util/charreach.h" #include "util/compile_context.h" -#include "grey.h" -#include "nfagraph/ng.h" -#include "compiler/compiler.h" -#include "hs_compile.h" /* for controlling ssse3 usage */ +#include "util/target_info.h" #include @@ -110,8 +110,8 @@ protected: nfa = constructLBR(*g, triggers, cc, rm); ASSERT_TRUE(nfa != nullptr); - full_state = aligned_zmalloc_unique(nfa->scratchStateSize); - stream_state = aligned_zmalloc_unique(nfa->streamStateSize); + full_state = make_bytecode_ptr(nfa->scratchStateSize, 64); + stream_state = make_bytecode_ptr(nfa->streamStateSize); } virtual void initQueue() { @@ -154,11 +154,11 @@ protected: // Compiled NFA structure. bytecode_ptr nfa; - // Space for full state. - aligned_unique_ptr full_state; + // Aligned space for full state. + bytecode_ptr full_state; // Space for stream state. - aligned_unique_ptr stream_state; + bytecode_ptr stream_state; // Queue structure. struct mq q; diff --git a/unit/internal/limex_nfa.cpp b/unit/internal/limex_nfa.cpp index a32d1bcd..c70ceeae 100644 --- a/unit/internal/limex_nfa.cpp +++ b/unit/internal/limex_nfa.cpp @@ -38,7 +38,7 @@ #include "nfagraph/ng.h" #include "nfagraph/ng_limex.h" #include "nfagraph/ng_util.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include "util/target_info.h" using namespace std; @@ -88,8 +88,8 @@ protected: type, cc); ASSERT_TRUE(nfa != nullptr); - full_state = aligned_zmalloc_unique(nfa->scratchStateSize); - stream_state = aligned_zmalloc_unique(nfa->streamStateSize); + full_state = make_bytecode_ptr(nfa->scratchStateSize, 64); + stream_state = make_bytecode_ptr(nfa->streamStateSize); } virtual void initQueue() { @@ -119,10 +119,10 @@ protected: bytecode_ptr nfa; // Space for full state. - aligned_unique_ptr full_state; + bytecode_ptr full_state; // Space for stream state. - aligned_unique_ptr stream_state; + bytecode_ptr stream_state; // Queue structure. struct mq q; @@ -187,7 +187,7 @@ TEST_P(LimExModelTest, CompressExpand) { // Expand state into a new copy and check that it matches the original // uncompressed state. - auto state_copy = aligned_zmalloc_unique(nfa->scratchStateSize); + auto state_copy = make_bytecode_ptr(nfa->scratchStateSize, 64); char *dest = state_copy.get(); memset(dest, 0xff, nfa->scratchStateSize); nfaExpandState(nfa.get(), dest, q.streamState, q.offset, @@ -381,8 +381,8 @@ protected: type, cc); ASSERT_TRUE(nfa != nullptr); - full_state = aligned_zmalloc_unique(nfa->scratchStateSize); - stream_state = aligned_zmalloc_unique(nfa->streamStateSize); + full_state = make_bytecode_ptr(nfa->scratchStateSize, 64); + stream_state = make_bytecode_ptr(nfa->streamStateSize); } virtual void initQueue() { @@ -412,10 +412,10 @@ protected: bytecode_ptr nfa; // Space for full state. - aligned_unique_ptr full_state; + bytecode_ptr full_state; // Space for stream state. - aligned_unique_ptr stream_state; + bytecode_ptr stream_state; // Queue structure. struct mq q; diff --git a/unit/internal/simd_utils.cpp b/unit/internal/simd_utils.cpp index 31b72648..220d2a12 100644 --- a/unit/internal/simd_utils.cpp +++ b/unit/internal/simd_utils.cpp @@ -29,8 +29,8 @@ #include "config.h" #include "gtest/gtest.h" -#include "util/alloc.h" #include "util/arch.h" +#include "util/bytecode_ptr.h" #include "util/make_unique.h" #include "util/simd_utils.h" @@ -540,7 +540,7 @@ TYPED_TEST(SimdUtilsTest, load_store) { a.bytes[i] = (char)(i % 256); } - aligned_unique_ptr mem_ptr = aligned_zmalloc_unique(sizeof(a)); + auto mem_ptr = make_bytecode_ptr(sizeof(a), alignof(TypeParam)); char *mem = mem_ptr.get(); ASSERT_EQ(0, (size_t)mem % 16U); From d269b83dda4f8707718161353ab92aba330cd324 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 4 Apr 2017 11:50:10 +1000 Subject: [PATCH 229/326] rose_build_long_lit: use bytecode_ptr --- src/rose/rose_build_long_lit.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/rose/rose_build_long_lit.cpp b/src/rose/rose_build_long_lit.cpp index c32f49d0..7ebf73ec 100644 --- a/src/rose/rose_build_long_lit.cpp +++ b/src/rose/rose_build_long_lit.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,7 +31,7 @@ #include "rose_build_engine_blob.h" #include "rose_build_impl.h" #include "stream_long_lit_hash.h" -#include "util/alloc.h" +#include "util/bytecode_ptr.h" #include "util/bitutils.h" #include "util/verify_types.h" #include "util/compile_context.h" @@ -401,7 +401,7 @@ u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob, u8 streamBitsNocase = lg2(roundUpToPowerOfTwo(tab_nocase.size() + 2)); u32 tot_state_bytes = ROUNDUP_N(streamBitsCase + streamBitsNocase, 8) / 8; - auto table = aligned_zmalloc_unique(tabSize); + auto table = make_zeroed_bytecode_ptr(tabSize, 16); assert(table); // otherwise would have thrown std::bad_alloc // Fill in the RoseLongLitTable header structure. @@ -435,7 +435,7 @@ u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob, *historyRequired = max(*historyRequired, max_len); *longLitStreamStateRequired = tot_state_bytes; - return blob.add(table.get(), tabSize, 16); + return blob.add(table); } } // namespace ue2 From 820f1432aadf24a0c4ea199133a68c5e74525821 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 4 Apr 2017 11:52:25 +1000 Subject: [PATCH 230/326] alloc: remove aligned_unique_ptr --- src/util/alloc.h | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) diff --git a/src/util/alloc.h b/src/util/alloc.h index 191bc387..de20c8d0 100644 --- a/src/util/alloc.h +++ b/src/util/alloc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,7 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief Aligned memory alloc/free. */ @@ -51,25 +52,6 @@ void *aligned_zmalloc(size_t size); /** \brief Free a pointer allocated with \ref aligned_zmalloc. */ void aligned_free(void *ptr); -template struct AlignedDeleter { - void operator()(T *ptr) const { aligned_free(ptr); } -}; -template -using aligned_unique_ptr = std::unique_ptr>; - -/** \brief 64-byte aligned, zeroed malloc that returns an appropriately-typed - * aligned_unique_ptr. - * - * If the requested size cannot be allocated, throws std::bad_alloc. - */ -template -inline -aligned_unique_ptr aligned_zmalloc_unique(size_t size) { - T* ptr = static_cast(aligned_zmalloc(size)); - assert(ptr); // Guaranteed by aligned_zmalloc. - return aligned_unique_ptr(ptr); -} - /** \brief Internal use only, used by AlignedAllocator. */ void *aligned_malloc_internal(size_t size, size_t align); From 3e5a8c9c9042eaac7ec5689a9305e441a8301a5b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 5 Apr 2017 10:29:22 +1000 Subject: [PATCH 231/326] rose: eliminate roseSize, use bytecode_ptr size --- src/compiler/compiler.cpp | 2 +- src/rose/rose_build.h | 2 -- src/rose/rose_build_bytecode.cpp | 2 +- src/rose/rose_build_misc.cpp | 5 ----- 4 files changed, 2 insertions(+), 9 deletions(-) diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index b2deae32..9b726f77 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -362,7 +362,7 @@ struct hs_database *build(NG &ng, unsigned int *length) { if (!rose) { throw CompileError("Unable to generate bytecode."); } - *length = roseSize(rose.get()); + *length = rose.size(); if (!*length) { DEBUG_PRINTF("RoseEngine has zero length\n"); assert(0); diff --git a/src/rose/rose_build.h b/src/rose/rose_build.h index a14ea8ff..cbb925f7 100644 --- a/src/rose/rose_build.h +++ b/src/rose/rose_build.h @@ -134,8 +134,6 @@ std::unique_ptr makeRoseBuilder(ReportManager &rm, bool roseCheckRose(const RoseInGraph &ig, bool prefilter, const ReportManager &rm, const CompileContext &cc); -size_t roseSize(const RoseEngine *t); - /* used by heuristics to determine the small write engine. High numbers are * intended to indicate a lightweight rose. */ u32 roseQuality(const RoseEngine *t); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 98e1ef70..dad00cd6 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -5603,7 +5603,7 @@ bytecode_ptr addSmallWriteEngine(const RoseBuildImpl &build, return rose; } - const size_t mainSize = roseSize(rose.get()); + const size_t mainSize = rose.size(); const size_t smallWriteSize = smwr_engine.size(); DEBUG_PRINTF("adding smwr engine, size=%zu\n", smallWriteSize); diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index ef650714..1e353a58 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -239,11 +239,6 @@ unique_ptr makeRoseBuilder(ReportManager &rm, return ue2::make_unique(rm, ssm, smwr, cc, boundary); } -size_t roseSize(const RoseEngine *t) { - assert(t); - return t->size; -} - bool roseIsPureLiteral(const RoseEngine *t) { return t->runtimeImpl == ROSE_RUNTIME_PURE_LITERAL; } From 82b889f4a26c62c03e95a02f2d83737a2b6ce60b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 5 Apr 2017 15:54:52 +1000 Subject: [PATCH 232/326] bytecode_ptr: fix shadow/conversion issues (gcc48) --- src/util/bytecode_ptr.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/util/bytecode_ptr.h b/src/util/bytecode_ptr.h index d3f5215a..ace5063c 100644 --- a/src/util/bytecode_ptr.h +++ b/src/util/bytecode_ptr.h @@ -55,11 +55,11 @@ template class bytecode_ptr : totally_ordered> { public: bytecode_ptr() = default; - explicit bytecode_ptr(size_t size, size_t align = alignof(T)) - : bytes(size), alignment(align) { + explicit bytecode_ptr(size_t bytes_in, size_t alignment_in = alignof(T)) + : bytes(bytes_in), alignment(alignment_in) { // posix_memalign doesn't like us asking for smaller alignment. - size_t mem_align = std::max(align, sizeof(void *)); - ptr.reset(static_cast(aligned_malloc_internal(size, mem_align))); + size_t mem_align = std::max(alignment, sizeof(void *)); + ptr.reset(static_cast(aligned_malloc_internal(bytes, mem_align))); if (!ptr) { throw std::bad_alloc(); } @@ -116,7 +116,7 @@ private: void operator()(DT *p) const { aligned_free_internal(p); } }; - std::unique_ptr> ptr = nullptr; //!< Underlying pointer. + std::unique_ptr> ptr; //!< Underlying pointer. size_t bytes = 0; //!< Size of memory region in bytes. size_t alignment = 0; //!< Alignment of memory region in bytes. }; From d4146059db033326c0d2da035702fb451ac66707 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 5 Apr 2017 16:58:53 +1000 Subject: [PATCH 233/326] bytecode_ptr: add shrink() member function --- src/fdr/fdr_confirm_compile.cpp | 1 + src/util/bytecode_ptr.h | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index d6eb6640..319141c4 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -337,6 +337,7 @@ bytecode_ptr getFDRConfirm(const vector &lits, size_t actual_size = ROUNDUP_N((size_t)(ptr - fdrc_base), alignof(FDRConfirm)); assert(actual_size <= size); + fdrc.shrink(actual_size); return fdrc; } diff --git a/src/util/bytecode_ptr.h b/src/util/bytecode_ptr.h index ace5063c..15d3e2fe 100644 --- a/src/util/bytecode_ptr.h +++ b/src/util/bytecode_ptr.h @@ -101,6 +101,19 @@ public: swap(alignment, other.alignment); } + /** + * \brief Reduces the apparent size of the memory region. Note that this + * does not reallocate and copy, it just changes the value returned by + * size(). + */ + void shrink(size_t size) { + if (size > bytes) { + assert(0); + throw std::logic_error("Must shrink to a smaller value"); + } + bytes = size; + } + /** \brief Returns size of the memory region in bytes. */ size_t size() const { return bytes; } From 37596c50cfdf9ba7b6b2a6ba5d27b0629e5c3750 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 6 Apr 2017 08:37:05 +1000 Subject: [PATCH 234/326] bytecode_ptr: avoid shadowing size() --- src/util/bytecode_ptr.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/util/bytecode_ptr.h b/src/util/bytecode_ptr.h index 15d3e2fe..a9a4b704 100644 --- a/src/util/bytecode_ptr.h +++ b/src/util/bytecode_ptr.h @@ -106,12 +106,12 @@ public: * does not reallocate and copy, it just changes the value returned by * size(). */ - void shrink(size_t size) { - if (size > bytes) { + void shrink(size_t new_size) { + if (new_size > bytes) { assert(0); throw std::logic_error("Must shrink to a smaller value"); } - bytes = size; + bytes = new_size; } /** \brief Returns size of the memory region in bytes. */ From 97bbb6250424b12409fe447cf50f8cd08ea16347 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 6 Apr 2017 10:09:34 +1000 Subject: [PATCH 235/326] bytecode_ptr: needs for logic_error --- src/util/bytecode_ptr.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/util/bytecode_ptr.h b/src/util/bytecode_ptr.h index a9a4b704..f1f2e5ef 100644 --- a/src/util/bytecode_ptr.h +++ b/src/util/bytecode_ptr.h @@ -41,6 +41,7 @@ #include // std::max #include #include +#include // std::logic_error namespace ue2 { From 2f9d063190f99bbd6ccb0049d20e886f15c95908 Mon Sep 17 00:00:00 2001 From: "Xu, Chi" Date: Thu, 6 Apr 2017 04:20:17 +0800 Subject: [PATCH 236/326] rose: fix CHECK_MULTIPATH_LOOKAROUND match difference bug --- src/rose/program_runtime.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 88c312d2..10a87c8b 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -1192,9 +1192,10 @@ int roseMultipathLookaround(const struct RoseEngine *t, return 0; } - u32 start_offset = 0; + s8 base_look_offset = *look; do { s64a offset = base_offset + *look; + u32 start_offset = (u32)(*look - base_look_offset); DEBUG_PRINTF("start_mask[%u] = %x\n", start_offset, start_mask[start_offset]); path = start_mask[start_offset]; @@ -1202,7 +1203,6 @@ int roseMultipathLookaround(const struct RoseEngine *t, break; } DEBUG_PRINTF("look=%d before history\n", *look); - start_offset++; look++; reach += MULTI_REACH_BITVECTOR_LEN; } while (look < look_end); From eec2b8233dbd78552cd5a2ef689c75dc6e940e3d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 5 Apr 2017 16:40:07 +1000 Subject: [PATCH 237/326] rdfa_merge: sort esets before alphabet calc --- src/nfa/rdfa_merge.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/nfa/rdfa_merge.cpp b/src/nfa/rdfa_merge.cpp index 45457555..50e9b62a 100644 --- a/src/nfa/rdfa_merge.cpp +++ b/src/nfa/rdfa_merge.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,6 +40,7 @@ #include "util/report_manager.h" #include "util/ue2_containers.h" +#include #include using namespace std; @@ -135,6 +136,10 @@ public: } } + // Sort so that our alphabet mapping isn't dependent on the order of + // rdfas passed in. + sort(esets.begin(), esets.end()); + alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha); } From b75b169b495175bb0ca49e81759805f8052e6433 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 2 Mar 2017 11:09:27 +1100 Subject: [PATCH 238/326] smallwrite: simple trie experiment --- src/smallwrite/smallwrite_build.cpp | 53 +++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index d5c1ccb7..f7c9ad8c 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -51,6 +51,7 @@ #include "util/compile_context.h" #include "util/container.h" #include "util/make_unique.h" +#include "util/ue2_graph.h" #include "util/ue2string.h" #include "util/verify_types.h" @@ -65,9 +66,30 @@ namespace ue2 { #define LITERAL_MERGE_CHUNK_SIZE 25 #define DFA_MERGE_MAX_STATES 8000 +#define MAX_TRIE_VERTICES 8000 namespace { // unnamed +struct LitTrieVertexProps { + LitTrieVertexProps() = default; + explicit LitTrieVertexProps(char c_in) : c(c_in) {} + char c = 0; + size_t index; // managed by ue2_graph +}; + +struct LitTrieEdgeProps { + LitTrieEdgeProps() = default; + size_t index; // managed by ue2_graph +}; + +struct LitTrie + : public ue2_graph { + + LitTrie() : root(add_vertex(*this)) {} + + const vertex_descriptor root; +}; + // Concrete impl class class SmallWriteBuildImpl : public SmallWriteBuild { public: @@ -89,6 +111,8 @@ public: unique_ptr rdfa; vector > cand_literals; + LitTrie lit_trie; + LitTrie lit_trie_nocase; bool poisoned; }; @@ -247,6 +271,29 @@ void SmallWriteBuildImpl::add(const NGHolder &g, const ExpressionInfo &expr) { } } +static +bool add_to_trie(const ue2_literal &literal, LitTrie &trie) { + auto u = trie.root; + for (auto &c : literal) { + auto next = LitTrie::null_vertex(); + for (auto v : adjacent_vertices_range(u, trie)) { + if (trie[v].c == c.c) { + next = v; + break; + } + } + if (next == LitTrie::null_vertex()) { + next = add_vertex(LitTrieVertexProps(c.c), trie); + add_edge(u, next, trie); + } + u = next; + } + + DEBUG_PRINTF("added '%s' to trie, now %zu vertices\n", + escapeString(literal).c_str(), num_vertices(trie)); + return num_vertices(trie) <= MAX_TRIE_VERTICES; +} + void SmallWriteBuildImpl::add(const ue2_literal &literal, ReportID r) { // If the graph is poisoned (i.e. we can't build a SmallWrite version), // we don't even try. @@ -260,6 +307,12 @@ void SmallWriteBuildImpl::add(const ue2_literal &literal, ReportID r) { cand_literals.push_back(make_pair(literal, r)); + if (!add_to_trie(literal, + literal.any_nocase() ? lit_trie_nocase : lit_trie)) { + poisoned = true; + return; + } + if (cand_literals.size() > cc.grey.smallWriteMaxLiterals) { poisoned = true; } From d4c66e294bb9c76d043f41f5db88f001292dcd02 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 31 Mar 2017 14:04:44 +1100 Subject: [PATCH 239/326] smallwrite: aho-corasick construction for literals --- src/smallwrite/smallwrite_build.cpp | 410 ++++++++++++++++++++++------ src/smallwrite/smallwrite_build.h | 19 +- 2 files changed, 333 insertions(+), 96 deletions(-) diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index f7c9ad8c..a27db736 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -26,6 +26,11 @@ * POSSIBILITY OF SUCH DAMAGE. */ +/** + * \file + * \brief Small-write engine build code. + */ + #include "smallwrite/smallwrite_build.h" #include "grey.h" @@ -48,6 +53,7 @@ #include "util/alloc.h" #include "util/bytecode_ptr.h" #include "util/charreach.h" +#include "util/compare.h" #include "util/compile_context.h" #include "util/container.h" #include "util/make_unique.h" @@ -60,36 +66,58 @@ #include #include +#include + using namespace std; namespace ue2 { -#define LITERAL_MERGE_CHUNK_SIZE 25 #define DFA_MERGE_MAX_STATES 8000 #define MAX_TRIE_VERTICES 8000 -namespace { // unnamed - struct LitTrieVertexProps { LitTrieVertexProps() = default; - explicit LitTrieVertexProps(char c_in) : c(c_in) {} - char c = 0; + explicit LitTrieVertexProps(u8 c_in) : c(c_in) {} size_t index; // managed by ue2_graph + u8 c = 0; //!< character reached on this vertex + flat_set reports; //!< managed reports fired on this vertex }; struct LitTrieEdgeProps { - LitTrieEdgeProps() = default; size_t index; // managed by ue2_graph }; +/** + * \brief BGL graph used to store a trie of literals (for later AC construction + * into a DFA). + */ struct LitTrie : public ue2_graph { LitTrie() : root(add_vertex(*this)) {} - const vertex_descriptor root; + const vertex_descriptor root; //!< Root vertex for the trie. }; +static +bool is_empty(const LitTrie &trie) { + return num_vertices(trie) <= 1; +} + +static +std::set all_reports(const LitTrie &trie) { + std::set reports; + for (auto v : vertices_range(trie)) { + insert(&reports, trie[v].reports); + } + return reports; +} + +using LitTrieVertex = LitTrie::vertex_descriptor; +using LitTrieEdge = LitTrie::edge_descriptor; + +namespace { // unnamed + // Concrete impl class class SmallWriteBuildImpl : public SmallWriteBuild { public: @@ -110,15 +138,15 @@ public: const CompileContext &cc; unique_ptr rdfa; - vector > cand_literals; LitTrie lit_trie; LitTrie lit_trie_nocase; + size_t num_literals = 0; bool poisoned; }; } // namespace -SmallWriteBuild::~SmallWriteBuild() { } +SmallWriteBuild::~SmallWriteBuild() = default; SmallWriteBuildImpl::SmallWriteBuildImpl(size_t num_patterns, const ReportManager &rm_in, @@ -272,25 +300,27 @@ void SmallWriteBuildImpl::add(const NGHolder &g, const ExpressionInfo &expr) { } static -bool add_to_trie(const ue2_literal &literal, LitTrie &trie) { +bool add_to_trie(const ue2_literal &literal, ReportID report, LitTrie &trie) { auto u = trie.root; - for (auto &c : literal) { + for (const auto &c : literal) { auto next = LitTrie::null_vertex(); for (auto v : adjacent_vertices_range(u, trie)) { - if (trie[v].c == c.c) { + if (trie[v].c == (u8)c.c) { next = v; break; } } - if (next == LitTrie::null_vertex()) { - next = add_vertex(LitTrieVertexProps(c.c), trie); + if (!next) { + next = add_vertex(LitTrieVertexProps((u8)c.c), trie); add_edge(u, next, trie); } u = next; } - DEBUG_PRINTF("added '%s' to trie, now %zu vertices\n", - escapeString(literal).c_str(), num_vertices(trie)); + trie[u].reports.insert(report); + + DEBUG_PRINTF("added '%s' (report %u) to trie, now %zu vertices\n", + escapeString(literal).c_str(), report, num_vertices(trie)); return num_vertices(trie) <= MAX_TRIE_VERTICES; } @@ -298,105 +328,310 @@ void SmallWriteBuildImpl::add(const ue2_literal &literal, ReportID r) { // If the graph is poisoned (i.e. we can't build a SmallWrite version), // we don't even try. if (poisoned) { + DEBUG_PRINTF("poisoned\n"); return; } if (literal.length() > cc.grey.smallWriteLargestBuffer) { + DEBUG_PRINTF("exceeded length limit\n"); return; /* too long */ } - cand_literals.push_back(make_pair(literal, r)); - - if (!add_to_trie(literal, - literal.any_nocase() ? lit_trie_nocase : lit_trie)) { + if (++num_literals > cc.grey.smallWriteMaxLiterals) { + DEBUG_PRINTF("exceeded literal limit\n"); poisoned = true; return; } - if (cand_literals.size() > cc.grey.smallWriteMaxLiterals) { + auto &trie = literal.any_nocase() ? lit_trie_nocase : lit_trie; + if (!add_to_trie(literal, r, trie)) { + DEBUG_PRINTF("trie add failed\n"); poisoned = true; } } -static -void lit_to_graph(NGHolder *h, const ue2_literal &literal, ReportID r) { - NFAVertex u = h->startDs; - for (const auto &c : literal) { - NFAVertex v = add_vertex(*h); - add_edge(u, v, *h); - (*h)[v].char_reach = c; - u = v; +namespace { + +/** + * \brief BFS visitor for Aho-Corasick automaton construction. + * + * This is doing two things: + * + * - Computing the failure edges (also called fall or supply edges) for each + * vertex, giving the longest suffix of the path to that point that is also + * a prefix in the trie reached on the same character. The BFS traversal + * makes it possible to build these from earlier failure paths. + * + * - Computing the output function for each vertex, which is done by + * propagating the reports from failure paths as well. This ensures that + * substrings of the current path also report correctly. + */ +struct ACVisitor : public boost::default_bfs_visitor { + ACVisitor(LitTrie &trie_in, + map &failure_map_in, + vector &ordering_in) + : mutable_trie(trie_in), failure_map(failure_map_in), + ordering(ordering_in) {} + + LitTrieVertex find_failure_target(LitTrieVertex u, LitTrieVertex v, + const LitTrie &trie) { + assert(u == trie.root || contains(failure_map, u)); + assert(!contains(failure_map, v)); + + const auto &c = trie[v].c; + + while (u != trie.root) { + auto f = failure_map.at(u); + for (auto w : adjacent_vertices_range(f, trie)) { + if (trie[w].c == c) { + return w; + } + } + u = f; + } + + DEBUG_PRINTF("no failure edge\n"); + return LitTrie::null_vertex(); } - (*h)[u].reports.insert(r); - add_edge(u, h->accept, *h); + + void tree_edge(LitTrieEdge e, const LitTrie &trie) { + auto u = source(e, trie); + auto v = target(e, trie); + DEBUG_PRINTF("bfs (%zu, %zu) on '%c'\n", trie[u].index, trie[v].index, + trie[v].c); + ordering.push_back(v); + + auto f = find_failure_target(u, v, trie); + + if (f) { + DEBUG_PRINTF("final failure vertex %zu\n", trie[f].index); + failure_map.emplace(v, f); + + // Propagate reports from failure path to ensure we correctly + // report substrings. + insert(&mutable_trie[v].reports, mutable_trie[f].reports); + } else { + DEBUG_PRINTF("final failure vertex root\n"); + failure_map.emplace(v, trie.root); + } + } + +private: + LitTrie &mutable_trie; //!< For setting reports property. + map &failure_map; + vector &ordering; //!< BFS ordering for vertices. +}; +} + +static UNUSED +bool isSaneTrie(const LitTrie &trie) { + CharReach seen; + for (auto u : vertices_range(trie)) { + seen.clear(); + for (auto v : adjacent_vertices_range(u, trie)) { + if (seen.test(trie[v].c)) { + return false; + } + seen.set(trie[v].c); + } + } + return true; +} + +/** + * \brief Turn the given literal trie into an AC automaton by adding additional + * edges and reports. + */ +static +void buildAutomaton(LitTrie &trie) { + assert(isSaneTrie(trie)); + + // Find our failure transitions and reports. + map failure_map; + vector ordering; + ACVisitor ac_vis(trie, failure_map, ordering); + boost::breadth_first_search(trie, trie.root, visitor(ac_vis)); + + // Compute missing edges from failure map. + for (auto v : ordering) { + DEBUG_PRINTF("vertex %zu\n", trie[v].index); + CharReach seen; + for (auto w : adjacent_vertices_range(v, trie)) { + DEBUG_PRINTF("edge to %zu with reach 0x%02x\n", trie[w].index, + trie[w].c); + assert(!seen.test(trie[w].c)); + seen.set(trie[w].c); + } + auto parent = failure_map.at(v); + for (auto w : adjacent_vertices_range(parent, trie)) { + if (!seen.test(trie[w].c)) { + add_edge(v, w, trie); + } + } + } +} + +static +vector getAlphabet(const LitTrie &trie, bool nocase) { + vector esets = {CharReach::dot()}; + for (auto v : vertices_range(trie)) { + if (v == trie.root) { + continue; + } + + CharReach cr; + if (nocase) { + cr.set(mytoupper(trie[v].c)); + cr.set(mytolower(trie[v].c)); + } else { + cr.set(trie[v].c); + } + + for (size_t i = 0; i < esets.size(); i++) { + if (esets[i].count() == 1) { + continue; + } + + CharReach t = cr & esets[i]; + if (t.any() && t != esets[i]) { + esets[i] &= ~t; + esets.push_back(t); + } + } + } + + // For deterministic compiles. + sort(esets.begin(), esets.end()); + return esets; +} + +static +u16 buildAlphabet(const LitTrie &trie, bool nocase, + array &alpha, + array &unalpha) { + const auto &esets = getAlphabet(trie, nocase); + + u16 i = 0; + for (const auto &cr : esets) { + u16 leader = cr.find_first(); + for (size_t s = cr.find_first(); s != cr.npos; s = cr.find_next(s)) { + alpha[s] = i; + } + unalpha[i] = leader; + i++; + } + + for (u16 j = N_CHARS; j < ALPHABET_SIZE; j++, i++) { + alpha[j] = i; + unalpha[i] = j; + } + + DEBUG_PRINTF("alphabet size %u\n", i); + return i; +} + +/** \brief Construct a raw_dfa from a literal trie. */ +static +unique_ptr buildDfa(LitTrie &trie, bool nocase) { + DEBUG_PRINTF("trie has %zu states\n", num_vertices(trie)); + + buildAutomaton(trie); + + auto rdfa = make_unique(NFA_OUTFIX); + + // Calculate alphabet. + array unalpha; + auto &alpha = rdfa->alpha_remap; + rdfa->alpha_size = buildAlphabet(trie, nocase, alpha, unalpha); + + // Construct states and transitions. + const u16 root_state = DEAD_STATE + 1; + rdfa->start_anchored = root_state; + rdfa->start_floating = root_state; + rdfa->states.resize(num_vertices(trie) + 1, dstate(rdfa->alpha_size)); + + // Dead state. + fill(rdfa->states[DEAD_STATE].next.begin(), + rdfa->states[DEAD_STATE].next.end(), DEAD_STATE); + + for (auto u : vertices_range(trie)) { + auto u_state = trie[u].index + 1; + DEBUG_PRINTF("state %zu\n", u_state); + assert(u_state < rdfa->states.size()); + auto &ds = rdfa->states[u_state]; + ds.daddy = root_state; + ds.reports = trie[u].reports; + + if (!ds.reports.empty()) { + DEBUG_PRINTF("reports: %s\n", as_string_list(ds.reports).c_str()); + } + + // By default, transition back to the root. + fill(ds.next.begin(), ds.next.end(), root_state); + // TOP should be a self-loop. + ds.next[alpha[TOP]] = u_state; + + // Add in the real transitions. + for (auto v : adjacent_vertices_range(u, trie)) { + if (v == trie.root) { + continue; + } + auto v_state = trie[v].index + 1; + assert((u16)trie[v].c < alpha.size()); + u16 sym = alpha[trie[v].c]; + DEBUG_PRINTF("edge to %zu on 0x%02x (sym %u)\n", v_state, + trie[v].c, sym); + assert(sym < ds.next.size()); + assert(ds.next[sym] == root_state); + ds.next[sym] = v_state; + } + } + + return rdfa; } bool SmallWriteBuildImpl::determiniseLiterals() { DEBUG_PRINTF("handling literals\n"); assert(!poisoned); - assert(cand_literals.size() <= cc.grey.smallWriteMaxLiterals); + assert(num_literals <= cc.grey.smallWriteMaxLiterals); - if (cand_literals.empty()) { + if (is_empty(lit_trie) && is_empty(lit_trie_nocase)) { + DEBUG_PRINTF("no literals\n"); return true; /* nothing to do */ } - vector > temp_dfas; + vector> dfas; - for (const auto &cand : cand_literals) { - NGHolder h; - DEBUG_PRINTF("determinising %s\n", dumpString(cand.first).c_str()); - lit_to_graph(&h, cand.first, cand.second); - temp_dfas.push_back(buildMcClellan(h, &rm, cc.grey)); - - // If we couldn't build a McClellan DFA for this portion, then we - // can't SmallWrite optimize the entire graph, so we can't - // optimize any of it - if (!temp_dfas.back()) { - DEBUG_PRINTF("failed to determinise\n"); - poisoned = true; - return false; - } + if (!is_empty(lit_trie)) { + dfas.push_back(buildDfa(lit_trie, false)); + DEBUG_PRINTF("caseful literal dfa with %zu states\n", + dfas.back()->states.size()); + } + if (!is_empty(lit_trie_nocase)) { + dfas.push_back(buildDfa(lit_trie_nocase, true)); + DEBUG_PRINTF("nocase literal dfa with %zu states\n", + dfas.back()->states.size()); } - if (!rdfa && temp_dfas.size() == 1) { - /* no need to merge there is only one dfa */ - rdfa = move(temp_dfas[0]); + if (rdfa) { + dfas.push_back(move(rdfa)); + DEBUG_PRINTF("general dfa with %zu states\n", + dfas.back()->states.size()); + } + + // If we only have one DFA, no merging is necessary. + if (dfas.size() == 1) { + DEBUG_PRINTF("only one dfa\n"); + rdfa = move(dfas.front()); return true; } - /* do a merge of the new dfas */ - + // Merge all DFAs. vector to_merge; - - if (rdfa) {/* also include the existing dfa */ - to_merge.push_back(rdfa.get()); - } - - for (const auto &d : temp_dfas) { + for (const auto &d : dfas) { to_merge.push_back(d.get()); } - assert(to_merge.size() > 1); - - while (to_merge.size() > LITERAL_MERGE_CHUNK_SIZE) { - vector small_merge; - small_merge.insert(small_merge.end(), to_merge.begin(), - to_merge.begin() + LITERAL_MERGE_CHUNK_SIZE); - - temp_dfas.push_back( - mergeAllDfas(small_merge, DFA_MERGE_MAX_STATES, &rm, cc.grey)); - - if (!temp_dfas.back()) { - DEBUG_PRINTF("merge failed\n"); - poisoned = true; - return false; - } - - to_merge.erase(to_merge.begin(), - to_merge.begin() + LITERAL_MERGE_CHUNK_SIZE); - to_merge.push_back(temp_dfas.back().get()); - } - auto merged = mergeAllDfas(to_merge, DFA_MERGE_MAX_STATES, &rm, cc.grey); if (!merged) { @@ -405,11 +640,11 @@ bool SmallWriteBuildImpl::determiniseLiterals() { return false; } - DEBUG_PRINTF("merge succeeded, built %p\n", merged.get()); + DEBUG_PRINTF("merge succeeded, built dfa with %zu states\n", + merged->states.size()); - // Replace our only DFA with the merged one + // Replace our only DFA with the merged one. rdfa = move(merged); - return true; } @@ -527,7 +762,7 @@ unique_ptr makeSmallWriteBuilder(size_t num_patterns, } bytecode_ptr SmallWriteBuildImpl::build(u32 roseQuality) { - if (!rdfa && cand_literals.empty()) { + if (!rdfa && is_empty(lit_trie) && is_empty(lit_trie_nocase)) { DEBUG_PRINTF("no smallwrite engine\n"); poisoned = true; return nullptr; @@ -579,9 +814,10 @@ set SmallWriteBuildImpl::all_reports() const { if (rdfa) { insert(&reports, ::ue2::all_reports(*rdfa)); } - for (const auto &cand : cand_literals) { - reports.insert(cand.second); - } + + insert(&reports, ::ue2::all_reports(lit_trie)); + insert(&reports, ::ue2::all_reports(lit_trie_nocase)); + return reports; } diff --git a/src/smallwrite/smallwrite_build.h b/src/smallwrite/smallwrite_build.h index 92222d62..648b13db 100644 --- a/src/smallwrite/smallwrite_build.h +++ b/src/smallwrite/smallwrite_build.h @@ -30,13 +30,14 @@ #define SMWR_BUILD_H /** - * SmallWrite Build interface. Everything you ever needed to feed literals in - * and get a SmallWriteEngine out. This header should be everything needed by - * the rest of UE2. + * \file + * \brief Small-write engine build interface. + * + * Everything you ever needed to feed literals in and get a SmallWriteEngine + * out. This header should be everything needed by the rest of UE2. */ #include "ue2common.h" -#include "util/alloc.h" #include "util/bytecode_ptr.h" #include "util/noncopyable.h" @@ -53,14 +54,14 @@ class ExpressionInfo; class NGHolder; class ReportManager; -// Abstract interface intended for callers from elsewhere in the tree, real -// underlying implementation is SmallWriteBuildImpl in smwr_build_impl.h. +/** + * Abstract interface intended for callers from elsewhere in the tree, real + * underlying implementation is SmallWriteBuildImpl in smwr_build_impl.h. + */ class SmallWriteBuild : noncopyable { public: - // Destructor virtual ~SmallWriteBuild(); - // Construct a runtime implementation. virtual bytecode_ptr build(u32 roseQuality) = 0; virtual void add(const NGHolder &g, const ExpressionInfo &expr) = 0; @@ -69,7 +70,7 @@ public: virtual std::set all_reports() const = 0; }; -// Construct a usable SmallWrite builder. +/** \brief Construct a usable SmallWrite builder. */ std::unique_ptr makeSmallWriteBuilder(size_t num_patterns, const ReportManager &rm, const CompileContext &cc); From 51b91466081a38b5a7497e0f22314f03fc3c768f Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Thu, 23 Mar 2017 08:13:00 +1100 Subject: [PATCH 240/326] windows: tools are unix only --- tools/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 049fd368..61bb00f2 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -1,3 +1,6 @@ +if (WIN32) + return() +endif() find_package(Threads) # remove some warnings From 19c7161d04d03ef1b551184496acc8df02058c16 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Fri, 7 Apr 2017 11:11:53 +1000 Subject: [PATCH 241/326] Use ue2::noncopyable instead of deleting copy ctor --- src/rose/rose_build_bytecode.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index dad00cd6..e7b00126 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -278,20 +278,13 @@ struct ExclusiveSubengine { }; /** \brief exclusive info to build tamarama */ -struct ExclusiveInfo { +struct ExclusiveInfo : noncopyable { // subengine info vector subengines; // all the report in tamarama set reports; // assigned queue id u32 queue; - - // workaround a deficiency in the standard (as explained by STL @ MS) we - // need to tell the compiler that ExclusiveInfo is moveable-only by - // deleting the copy cons so that vector doesn't get confused - ExclusiveInfo() = default; - ExclusiveInfo(const ExclusiveInfo &) = delete; - ExclusiveInfo(ExclusiveInfo &&) = default; }; } From b1c57f9f5427da09f7e1088b44c05cd26ab8a660 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Thu, 6 Apr 2017 10:53:28 +1000 Subject: [PATCH 242/326] cmake: check for libc++ directly Rather than trying to infer libc++ use from platform and compiler, just look for a defined symbol. --- CMakeLists.txt | 4 ++++ cmake/boost.cmake | 6 ++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fbcb5399..34405097 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,7 @@ set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION}) set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) +include(CheckCXXSymbolExists) INCLUDE (CheckFunctionExists) INCLUDE (CheckIncludeFiles) INCLUDE (CheckIncludeFileCXX) @@ -264,6 +265,9 @@ CHECK_FUNCTION_EXISTS(_aligned_malloc HAVE__ALIGNED_MALLOC) CHECK_C_COMPILER_FLAG(-fvisibility=hidden HAS_C_HIDDEN) CHECK_CXX_COMPILER_FLAG(-fvisibility=hidden HAS_CXX_HIDDEN) +# are we using libc++ +CHECK_CXX_SYMBOL_EXISTS(_LIBCPP_VERSION ciso646 HAVE_LIBCPP) + if (RELEASE_BUILD) if (HAS_C_HIDDEN) set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fvisibility=hidden") diff --git a/cmake/boost.cmake b/cmake/boost.cmake index ff4e2f94..44b4e8ba 100644 --- a/cmake/boost.cmake +++ b/cmake/boost.cmake @@ -3,10 +3,8 @@ set(BOOST_USE_STATIC_LIBS OFF) set(BOOST_USE_MULTITHREADED OFF) set(BOOST_USE_STATIC_RUNTIME OFF) -if (CMAKE_SYSTEM_NAME MATCHES "Darwin" - OR (CMAKE_SYSTEM_NAME MATCHES "FreeBSD" - AND CMAKE_C_COMPILER_ID MATCHES "Clang")) - # we need a more recent boost for libc++ used by clang on OSX and FreeBSD +if (HAVE_LIBCPP) + # we need a more recent boost for libc++ set(BOOST_MINVERSION 1.61.0) else () set(BOOST_MINVERSION 1.57.0) From 6dc1e202b9ea42de6f7728c45a27a5877de666d3 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 31 Oct 2016 09:46:41 +1100 Subject: [PATCH 243/326] hsbench: documentation --- doc/dev-reference/index.rst | 1 + doc/dev-reference/intro.rst | 7 +++ doc/dev-reference/tools.rst | 116 ++++++++++++++++++++++++++++++++++++ 3 files changed, 124 insertions(+) create mode 100644 doc/dev-reference/tools.rst diff --git a/doc/dev-reference/index.rst b/doc/dev-reference/index.rst index df4f8916..32f188dd 100644 --- a/doc/dev-reference/index.rst +++ b/doc/dev-reference/index.rst @@ -17,5 +17,6 @@ Hyperscan |version| Developer's Reference Guide runtime serialization performance + tools api_constants api_files diff --git a/doc/dev-reference/intro.rst b/doc/dev-reference/intro.rst index 5f0cc113..58879aef 100644 --- a/doc/dev-reference/intro.rst +++ b/doc/dev-reference/intro.rst @@ -70,6 +70,13 @@ For a given database, Hyperscan provides several guarantees: See :ref:`runtime` for more detail. +***** +Tools +***** + +Some utilities for testing and benchmarking Hyperscan are included with the +library. See :ref:`tools` for more information. + ************ Example Code ************ diff --git a/doc/dev-reference/tools.rst b/doc/dev-reference/tools.rst new file mode 100644 index 00000000..d2e7a06e --- /dev/null +++ b/doc/dev-reference/tools.rst @@ -0,0 +1,116 @@ +.. _tools: + +##### +Tools +##### + +This section describes the set of utilities included with the Hyperscan library. + +******************** +Benchmarker: hsbench +******************** + +The ``hsbench`` tool provides an easy way to measure Hyperscan's performance +for a particular set of patterns and corpus of data to be scanned. + +Patterns are supplied in the format described below in +:ref:`tools_pattern_format`, while the corpus must be provided in the form of a +`corpus database`: this is a simple SQLite database format intended to allow for +easy control of how a corpus is broken into blocks and streams. + +.. note:: A group of Python scripts for constructing corpora databases from + various input types, such as PCAP network traffic captures or text files, can + be found in the Hyperscan source tree in ``tools/hsbench/scripts``. + +Running hsbench +=============== + +Given a file full of patterns specified with ``-e`` and a corpus database +specified with ``-c``, ``hsbench`` will perform a single-threaded benchmark and +produce output like this:: + + $ hsbench -e /tmp/patterns -c /tmp/corpus.db + + Signatures: /tmp/patterns + Hyperscan info: Version: 4.3.1 Features: AVX2 Mode: STREAM + Expression count: 200 + Bytecode size: 342,540 bytes + Database CRC: 0x6cd6b67c + Stream state size: 252 bytes + Scratch size: 18,406 bytes + Compile time: 0.153 seconds + Peak heap usage: 78,073,856 bytes + + Time spent scanning: 0.600 seconds + Corpus size: 72,138,183 bytes (63,946 blocks in 8,891 streams) + Scan matches: 81 (0.001 matches/kilobyte) + Overall block rate: 2,132,004.45 blocks/sec + Overall throughput: 19,241.10 Mbit/sec + +By default, the corpus is scanned twenty times, and the overall performance +reported is computed based the total number of bytes scanned in the time it +takes to perform all twenty scans. The number of repeats can be changed with the +``-n`` argument, and the results of each scan will be displayed if the +``--per-scan`` argument is specified. + +To benchmark Hyperscan on more than one core, you can supply a list of cores +with the ``-T`` argument, which will instruct ``hsbench`` to start one +benchmark thread per core given and compute the throughput from the time taken +to complete all of them. + +.. tip:: For single-threaded benchmarks on multi-processor systems, we recommend + using a utility like ``taskset`` to lock the hsbench process to one core and + minimize jitter due to the operating system's scheduler. + +.. _tools_pattern_format: + +************** +Pattern Format +************** + +All of the Hyperscan tools accept patterns in the same format, read from plain +text files with one pattern per line. Each line looks like this: + +* ``://`` + +For example:: + + 1:/hatstand.*teakettle/s + 2:/(hatstand|teakettle)/iH + 3:/^.{10,20}hatstand/m + +The integer ID is the value that will be reported when a match is found by +Hyperscan and must be unique. + +The pattern itself is a regular expression in PCRE syntax; see +:ref:`compilation` for more information on supported features. + +The flags are single characters that map to Hyperscan flags as follows: + +========= ================================= =========== +Character API Flag Description +========= ================================= =========== +``i`` :c:member:`HS_FLAG_CASELESS` Case-insensitive matching +``s`` :c:member:`HS_FLAG_DOTALL` Dot (``.``) will match newlines +``m`` :c:member:`HS_FLAG_MULTILINE` Multi-line anchoring +``H`` :c:member:`HS_FLAG_SINGLEMATCH` Report match ID at most once +``V`` :c:member:`HS_FLAG_ALLOWEMPTY` Allow patterns that can match against empty buffers +``8`` :c:member:`HS_FLAG_UTF8` UTF-8 mode +``W`` :c:member:`HS_FLAG_UCP` Unicode property support +``P`` :c:member:`HS_FLAG_PREFILTER` Prefiltering mode +``L`` :c:member:`HS_FLAG_SOM_LEFTMOST` Leftmost start of match reporting +========= ================================= =========== + +In addition to the set of flags above, :ref:`extparam` can be supplied +for each pattern. These are supplied after the flags as ``key=value`` pairs +between braces, separated by commas. For example:: + + 1:/hatstand.*teakettle/s{min_offset=50,max_offset=100} + +All Hyperscan tools will accept a pattern file (or a directory containing +pattern files) with the ``-e`` argument. If no further arguments constraining +the pattern set are given, all patterns in those files are used. + +To select a subset of the patterns, a single ID can be supplied with the ``-z`` +argument, or a file containing a set of IDs can be supplied with the ``-s`` +argument. From bfcb39158027a8aa8c5e6d4aeb5c9477778b4622 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 20 Jan 2017 14:25:50 +1100 Subject: [PATCH 244/326] doc: update copyright year --- doc/dev-reference/conf.py.in | 2 +- doc/dev-reference/copyright.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/dev-reference/conf.py.in b/doc/dev-reference/conf.py.in index 9f089883..2daab369 100644 --- a/doc/dev-reference/conf.py.in +++ b/doc/dev-reference/conf.py.in @@ -44,7 +44,7 @@ master_doc = 'index' # General information about the project. project = u'Hyperscan' -copyright = u'2015-2016, Intel Corporation' +copyright = u'2015-2017, Intel Corporation' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/doc/dev-reference/copyright.rst b/doc/dev-reference/copyright.rst index 737b160f..9464382e 100644 --- a/doc/dev-reference/copyright.rst +++ b/doc/dev-reference/copyright.rst @@ -30,4 +30,4 @@ and/or other countries. \*Other names and brands may be claimed as the property of others. -Copyright |copy| 2015-2016, Intel Corporation. All rights reserved. +Copyright |copy| 2015-2017, Intel Corporation. All rights reserved. From 698653766bf8bf8559f315b0518b12d9b88de4d0 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 6 Apr 2017 16:14:43 +1000 Subject: [PATCH 245/326] find_escape_strings: find outs2_broken faster --- src/nfa/accel_dfa_build_strat.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/nfa/accel_dfa_build_strat.cpp b/src/nfa/accel_dfa_build_strat.cpp index d257b530..4bd83a52 100644 --- a/src/nfa/accel_dfa_build_strat.cpp +++ b/src/nfa/accel_dfa_build_strat.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -379,16 +379,18 @@ accel_dfa_build_strat::find_escape_strings(dstate_id_t this_idx) const { for (auto jj = cr_all_j.find_first(); jj != CharReach::npos; jj = cr_all_j.find_next(jj)) { rv.double_byte.emplace((u8)ii, (u8)jj); + if (rv.double_byte.size() > 8) { + DEBUG_PRINTF("outs2 too big\n"); + outs2_broken = true; + goto done; + } } } } } - if (rv.double_byte.size() > 8) { - DEBUG_PRINTF("outs2 too big\n"); - outs2_broken = true; - } - + done: + assert(outs2_broken || rv.double_byte.size() <= 8); if (outs2_broken) { rv.double_byte.clear(); } From 40f03929be10e0d6b99ad553f03a7181df8bf3d3 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 6 Apr 2017 11:43:56 +1000 Subject: [PATCH 246/326] find_better_daddy: use flat_set for hinted states --- src/nfa/mcclellancompile.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 206f468a..0aff6006 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -762,7 +762,7 @@ bytecode_ptr mcclellanCompile8(dfa_info &info, const CompileContext &cc, #define MAX_SHERMAN_LIST_LEN 8 static -void addIfEarlier(set &dest, dstate_id_t candidate, +void addIfEarlier(flat_set &dest, dstate_id_t candidate, dstate_id_t max) { if (candidate < max) { dest.insert(candidate); @@ -770,7 +770,7 @@ void addIfEarlier(set &dest, dstate_id_t candidate, } static -void addSuccessors(set &dest, const dstate &source, +void addSuccessors(flat_set &dest, const dstate &source, u16 alphasize, dstate_id_t curr_id) { for (symbol_t s = 0; s < alphasize; s++) { addIfEarlier(dest, source.next[s], curr_id); @@ -817,7 +817,7 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, dstate_id_t best_daddy = 0; dstate &currState = info.states[curr_id]; - set hinted; /* set of states to search for a better daddy */ + flat_set hinted; /* states to search for a better daddy */ addIfEarlier(hinted, 0, curr_id); addIfEarlier(hinted, info.raw.start_anchored, curr_id); addIfEarlier(hinted, info.raw.start_floating, curr_id); From c0d79609548eb2291af6be5dde7f7ff1c814537e Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 6 Apr 2017 13:47:30 +1000 Subject: [PATCH 247/326] ng_limex_accel: compile time speedups, tidy up --- src/nfagraph/ng_limex_accel.cpp | 99 ++++++++++++++------------------- 1 file changed, 43 insertions(+), 56 deletions(-) diff --git a/src/nfagraph/ng_limex_accel.cpp b/src/nfagraph/ng_limex_accel.cpp index cd662d9c..bcd04d21 100644 --- a/src/nfagraph/ng_limex_accel.cpp +++ b/src/nfagraph/ng_limex_accel.cpp @@ -45,12 +45,16 @@ #include "util/container.h" #include "util/dump_charclass.h" #include "util/graph_range.h" +#include "util/small_vector.h" #include "util/target_info.h" #include #include +#include + using namespace std; +using boost::adaptors::map_keys; namespace ue2 { @@ -135,15 +139,15 @@ void findAccelFriends(const NGHolder &g, NFAVertex v, static void findPaths(const NGHolder &g, NFAVertex v, const vector &refined_cr, - vector > *paths, + vector> *paths, const flat_set &forbidden, u32 depth) { static const u32 MAGIC_TOO_WIDE_NUMBER = 16; if (!depth) { - paths->push_back(vector()); + paths->push_back({}); return; } if (v == g.accept || v == g.acceptEod) { - paths->push_back(vector()); + paths->push_back({}); if (!generates_callbacks(g) || v == g.acceptEod) { paths->back().push_back(CharReach()); /* red tape options */ } @@ -157,42 +161,37 @@ void findPaths(const NGHolder &g, NFAVertex v, if (out_degree(v, g) >= MAGIC_TOO_WIDE_NUMBER || hasSelfLoop(v, g)) { /* give up on pushing past this point */ - paths->push_back(vector()); - vector &p = paths->back(); - p.push_back(cr); + paths->push_back({cr}); return; } + vector> curr; for (auto w : adjacent_vertices_range(v, g)) { if (contains(forbidden, w)) { /* path has looped back to one of the active+boring acceleration * states. We can ignore this path if we have sufficient back- * off. */ - paths->push_back(vector()); - paths->back().push_back(CharReach()); + paths->push_back({CharReach()}); continue; } u32 new_depth = depth - 1; - vector > curr; do { curr.clear(); findPaths(g, w, refined_cr, &curr, forbidden, new_depth); } while (new_depth-- && curr.size() >= MAGIC_TOO_WIDE_NUMBER); - for (vector >::iterator it = curr.begin(); - it != curr.end(); ++it) { - paths->push_back(vector()); - vector &p = paths->back(); - p.swap(*it); - p.push_back(cr); + for (auto &c : curr) { + c.push_back(cr); + paths->push_back(std::move(c)); } } } +namespace { struct SAccelScheme { - SAccelScheme(const CharReach &cr_in, u32 offset_in) - : cr(cr_in), offset(offset_in) { + SAccelScheme(CharReach cr_in, u32 offset_in) + : cr(std::move(cr_in)), offset(offset_in) { assert(offset <= MAX_ACCEL_DEPTH); } @@ -215,6 +214,7 @@ struct SAccelScheme { CharReach cr = CharReach::dot(); u32 offset = MAX_ACCEL_DEPTH + 1; }; +} static void findBest(vector >::const_iterator pb, @@ -233,12 +233,11 @@ void findBest(vector >::const_iterator pb, DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin()); - vector priority_path; + small_vector priority_path; priority_path.reserve(pb->size()); u32 i = 0; - for (vector::const_iterator p = pb->begin(); p != pb->end(); - ++p, i++) { - SAccelScheme as(*p | curr.cr, MAX(i, curr.offset)); + for (auto p = pb->begin(); p != pb->end(); ++p, i++) { + SAccelScheme as(*p | curr.cr, max(i, curr.offset)); if (*best < as) { DEBUG_PRINTF("worse\n"); continue; @@ -259,13 +258,8 @@ void findBest(vector >::const_iterator pb, } DEBUG_PRINTF("---\n"); - for (vector::const_iterator it = priority_path.begin(); - it != priority_path.end(); ++it) { - DEBUG_PRINTF("%u:|| = %zu; p remaining len %zu\n", i, it->cr.count(), - priority_path.end() - it); - - SAccelScheme in = move(*it); - + for (const SAccelScheme &in : priority_path) { + DEBUG_PRINTF("in: count %zu\n", in.cr.count()); if (*best < in) { DEBUG_PRINTF("worse\n"); continue; @@ -278,9 +272,10 @@ void findBest(vector >::const_iterator pb, } } +namespace { struct DAccelScheme { - DAccelScheme(const CharReach &cr_in, u32 offset_in) - : double_cr(cr_in), double_offset(offset_in) { + DAccelScheme(CharReach cr_in, u32 offset_in) + : double_cr(std::move(cr_in)), double_offset(offset_in) { assert(double_offset <= MAX_ACCEL_DEPTH); } @@ -319,6 +314,7 @@ struct DAccelScheme { CharReach double_cr; u32 double_offset = 0; }; +} static DAccelScheme make_double_accel(DAccelScheme as, CharReach cr_1, @@ -391,11 +387,10 @@ void findDoubleBest(vector >::const_iterator pb, DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin()); - vector priority_path; + small_vector priority_path; priority_path.reserve(pb->size()); u32 i = 0; - for (vector::const_iterator p = pb->begin(); - p != pb->end() && next(p) != pb->end(); + for (auto p = pb->begin(); p != pb->end() && next(p) != pb->end(); ++p, i++) { DAccelScheme as = make_double_accel(curr, *p, *next(p), i); if (*best < as) { @@ -411,9 +406,7 @@ void findDoubleBest(vector >::const_iterator pb, best->double_byte.size(), best->double_cr.count(), best->double_offset); - for (vector::const_iterator it = priority_path.begin(); - it != priority_path.end(); ++it) { - DAccelScheme in = move(*it); + for (const DAccelScheme &in : priority_path) { DEBUG_PRINTF("in: %zu pairs, %zu singles, offset %u\n", in.double_byte.size(), in.double_cr.count(), in.double_offset); @@ -427,14 +420,12 @@ void findDoubleBest(vector >::const_iterator pb, #ifdef DEBUG static -void dumpPaths(const vector > &paths) { - for (vector >::const_iterator p = paths.begin(); - p != paths.end(); ++p) { +void dumpPaths(const vector> &paths) { + for (const auto &path : paths) { DEBUG_PRINTF("path: ["); - for (vector::const_iterator it = p->begin(); it != p->end(); - ++it) { + for (const auto &cr : path) { printf(" ["); - describeClass(stdout, *it, 20, CC_OUT_TEXT); + describeClass(stdout, cr, 20, CC_OUT_TEXT); printf("]"); } printf(" ]\n"); @@ -545,7 +536,7 @@ DAccelScheme findBestDoubleAccelScheme(vector > paths, #define MAX_EXPLORE_PATHS 40 -AccelScheme findBestAccelScheme(vector > paths, +AccelScheme findBestAccelScheme(vector> paths, const CharReach &terminating, bool look_for_double_byte) { AccelScheme rv; @@ -575,14 +566,13 @@ AccelScheme findBestAccelScheme(vector > paths, /* find best is a bit lazy in terms of minimising the offset, see if we can * make it better. need to find the min max offset that we need.*/ u32 offset = 0; - for (vector >::iterator p = paths.begin(); - p != paths.end(); ++p) { + for (const auto &path : paths) { u32 i = 0; - for (vector::iterator it = p->begin(); it != p->end(); - ++it, i++) { - if (it->isSubsetOf(best.cr)) { + for (const auto &cr : path) { + if (cr.isSubsetOf(best.cr)) { break; } + i++; } offset = MAX(offset, i); } @@ -620,17 +610,15 @@ AccelScheme nfaFindAccel(const NGHolder &g, const vector &verts, return AccelScheme(); /* invalid scheme */ } - vector > paths; + vector> paths; flat_set ignore_vert_set(verts.begin(), verts.end()); /* Note: we can not in general (TODO: ignore when possible) ignore entries * into the bounded repeat cyclic states as that is when the magic happens */ - for (map::const_iterator it - = br_cyclic.begin(); - it != br_cyclic.end(); ++it) { + for (auto v : br_cyclic | map_keys) { /* TODO: can allow if repeatMin <= 1 ? */ - ignore_vert_set.erase(it->first); + ignore_vert_set.erase(v); } for (auto v : verts) { @@ -643,9 +631,8 @@ AccelScheme nfaFindAccel(const NGHolder &g, const vector &verts, } /* paths built wrong: reverse them */ - for (vector >::iterator it = paths.begin(); - it != paths.end(); ++it) { - reverse(it->begin(), it->end()); + for (auto &path : paths) { + reverse(path.begin(), path.end()); } return findBestAccelScheme(std::move(paths), terminating, From 0b8f25a036540170e07f341be0d9821f9bb1cb06 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Fri, 7 Apr 2017 16:40:11 +1000 Subject: [PATCH 248/326] Ensure simd types are aligned to what we assume Turns out Clang is sensitive to the location of the alignment attribute. --- src/util/simd_types.h | 2 +- unit/internal/simd_utils.cpp | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/util/simd_types.h b/src/util/simd_types.h index a6c87db7..64844dcb 100644 --- a/src/util/simd_types.h +++ b/src/util/simd_types.h @@ -43,7 +43,7 @@ typedef struct ALIGN_DIRECTIVE {u64a hi; u64a lo;} m128; #if defined(HAVE_AVX2) typedef __m256i m256; #else -typedef ALIGN_AVX_DIRECTIVE struct {m128 lo; m128 hi;} m256; +typedef struct ALIGN_AVX_DIRECTIVE {m128 lo; m128 hi;} m256; #endif // these should align to 16 and 32 respectively diff --git a/unit/internal/simd_utils.cpp b/unit/internal/simd_utils.cpp index 220d2a12..dac3722e 100644 --- a/unit/internal/simd_utils.cpp +++ b/unit/internal/simd_utils.cpp @@ -542,6 +542,7 @@ TYPED_TEST(SimdUtilsTest, load_store) { auto mem_ptr = make_bytecode_ptr(sizeof(a), alignof(TypeParam)); char *mem = mem_ptr.get(); + ASSERT_EQ(0, (size_t)mem % 16U); memset(mem, 0, sizeof(a)); @@ -585,6 +586,13 @@ TYPED_TEST(SimdUtilsTest, loadbytes_storebytes) { } } +TEST(SimdUtilsTest, alignment) { + ASSERT_EQ(16, alignof(m128)); + ASSERT_EQ(32, alignof(m256)); + ASSERT_EQ(16, alignof(m384)); + ASSERT_EQ(32, alignof(m512)); +} + TEST(SimdUtilsTest, movq) { m128 simd; From 083d84cfd6330be0bcaaaf5e21843a2369b3c1bc Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 7 Apr 2017 17:02:00 +1000 Subject: [PATCH 249/326] expressions: add much faster limitToSignatures() --- tools/hsbench/main.cpp | 5 ++--- util/expressions.cpp | 26 +++++++++++++------------- util/expressions.h | 17 +++++++---------- 3 files changed, 22 insertions(+), 26 deletions(-) diff --git a/tools/hsbench/main.cpp b/tools/hsbench/main.cpp index a37d4839..9eadf6dd 100644 --- a/tools/hsbench/main.cpp +++ b/tools/hsbench/main.cpp @@ -756,6 +756,7 @@ int main(int argc, char *argv[]) { // known expressions together. if (sigSets.empty()) { SignatureSet sigs; + sigs.reserve(exprMapTemplate.size()); for (auto i : exprMapTemplate | map_keys) { sigs.push_back(i); } @@ -772,9 +773,7 @@ int main(int argc, char *argv[]) { } for (const auto &s : sigSets) { - ExpressionMap exprMap = exprMapTemplate; // copy - - limitBySignature(exprMap, s.sigs); + auto exprMap = limitToSignatures(exprMapTemplate, s.sigs); if (exprMap.empty()) { continue; } diff --git a/util/expressions.cpp b/util/expressions.cpp index 944c7477..93062b48 100644 --- a/util/expressions.cpp +++ b/util/expressions.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -27,6 +27,10 @@ */ #include "config.h" +#include "expressions.h" + +#include "hs.h" +#include "string_util.h" #include #include @@ -34,7 +38,6 @@ #include #include -#include #include #include #if !defined(_WIN32) @@ -45,9 +48,7 @@ #include #endif -#include "expressions.h" -#include "hs.h" -#include "string_util.h" +#include using namespace std; @@ -90,7 +91,7 @@ void processLine(string &line, unsigned lineNum, //cout << "Inserting expr: id=" << id << ", pcre=" << pcre_str << endl; - bool ins = exprMap.insert(ExpressionMap::value_type(id, pcre_str)).second; + bool ins = exprMap.emplace(id, pcre_str).second; if (!ins) { failLine(lineNum, file, line, "Duplicate ID found."); } @@ -278,20 +279,19 @@ void loadSignatureList(const string &inFile, } } -void limitBySignature(ExpressionMap &exprMap, - const SignatureSet &signatures) { +ExpressionMap limitToSignatures(const ExpressionMap &exprMap, + const SignatureSet &signatures) { ExpressionMap keepers; - SignatureSet::const_iterator it, ite; - for (it = signatures.begin(), ite = signatures.end(); it != ite; ++it) { - ExpressionMap::const_iterator match = exprMap.find(*it); + for (auto id : signatures) { + auto match = exprMap.find(id); if (match == exprMap.end()) { - cerr << "Unable to find signature " << *it + cerr << "Unable to find signature " << id << " in expression set!" << endl; exit(1); } keepers.insert(*match); } - exprMap.swap(keepers); + return keepers; } diff --git a/util/expressions.h b/util/expressions.h index 949c9201..03d59e15 100644 --- a/util/expressions.h +++ b/util/expressions.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,10 +31,10 @@ #include #include -#include +#include -typedef std::map ExpressionMap; -typedef std::list SignatureSet; +using ExpressionMap = std::map; +using SignatureSet = std::vector; // load all of the expressions from the given directory into the given // expression map. Exits on failure. @@ -45,11 +45,8 @@ void loadExpressionsFromFile(const std::string &fname, ExpressionMap &exprMap); // load a list of signature IDs void loadSignatureList(const std::string &inFile, SignatureSet &signatures); -// produce a new expression map only containing those signatures in the -// expression list -void generateExprMap(const SignatureSet &signatures, - const ExpressionMap &allExprs, ExpressionMap &out); +// trim expression map to only the given signatures, returning result +ExpressionMap limitToSignatures(const ExpressionMap &exprMap, + const SignatureSet &signatures); -// trim expression map to only the given signatures (in-place) -void limitBySignature(ExpressionMap &exprMap, const SignatureSet &signatures); #endif From 4cc998e4ab480b546be7f3c03fc60e6e690352f1 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 10 Apr 2017 16:39:31 +1000 Subject: [PATCH 250/326] ng_limex_accel: fix broken termination condition --- src/nfagraph/ng_limex_accel.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/nfagraph/ng_limex_accel.cpp b/src/nfagraph/ng_limex_accel.cpp index bcd04d21..52f1e7d8 100644 --- a/src/nfagraph/ng_limex_accel.cpp +++ b/src/nfagraph/ng_limex_accel.cpp @@ -227,7 +227,6 @@ void findBest(vector >::const_iterator pb, DEBUG_PRINTF("new best\n"); *best = curr; } - *best = curr; return; } From 304bac32869b6811375ce00f73e50f882fc7ba70 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 10 Apr 2017 10:32:37 +1000 Subject: [PATCH 251/326] mcclellancompile_util: reduce malloc traffic, tidy --- src/nfa/mcclellancompile_util.cpp | 53 ++++++++++++++----------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/src/nfa/mcclellancompile_util.cpp b/src/nfa/mcclellancompile_util.cpp index a61a19ab..7a3ceaf1 100644 --- a/src/nfa/mcclellancompile_util.cpp +++ b/src/nfa/mcclellancompile_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -43,6 +43,12 @@ namespace ue2 { #define INIT_STATE 1 +static +bool state_has_reports(const raw_dfa &raw, dstate_id_t s) { + const auto &ds = raw.states[s]; + return !ds.reports.empty() || !ds.reports_eod.empty(); +} + static u32 count_dots(const raw_dfa &raw) { assert(raw.start_anchored == INIT_STATE); @@ -60,8 +66,7 @@ u32 count_dots(const raw_dfa &raw) { } } - if (!raw.states[raw.states[i].next[0]].reports.empty() - || !raw.states[raw.states[i].next[0]].reports_eod.empty()) { + if (state_has_reports(raw, raw.states[i].next[0])) { goto validate; } @@ -163,37 +168,33 @@ u32 calc_min_dist_from_bob(raw_dfa &raw, vector *dist_in) { } static -void find_in_edges(const raw_dfa &raw, vector > *in_edges) { - in_edges->clear(); - in_edges->resize(raw.states.size()); - ue2::unordered_set seen; +vector> find_in_edges(const raw_dfa &raw) { + vector> in_edges(raw.states.size()); + flat_set seen; for (u32 s = 1; s < raw.states.size(); s++) { seen.clear(); for (u32 j = 0; j < raw.alpha_size; j++) { dstate_id_t t = raw.states[s].next[j]; - if (contains(seen, t)) { + if (!seen.insert(t).second) { continue; } - seen.insert(t); - (*in_edges)[t].push_back(s); + in_edges[t].push_back(s); } } + + return in_edges; } static -void calc_min_dist_to_accept(const raw_dfa &raw, - const vector > &in_edges, - vector *accept_dist) { - vector &dist = *accept_dist; - dist.clear(); - dist.resize(raw.states.size(), ~0U); +vector calc_min_dist_to_accept(const raw_dfa &raw, + const vector> &in_edges) { + vector dist(raw.states.size(), ~0U); /* for reporting states to start from */ deque to_visit; for (u32 s = 0; s < raw.states.size(); s++) { - if (!raw.states[s].reports.empty() - || !raw.states[s].reports_eod.empty()) { + if (state_has_reports(raw, s)) { to_visit.push_back(s); dist[s] = 0; } @@ -210,9 +211,7 @@ void calc_min_dist_to_accept(const raw_dfa &raw, assert(d >= last_d); assert(d != ~0U); - for (vector::const_iterator it = in_edges[s].begin(); - it != in_edges[s].end(); ++it) { - dstate_id_t t = *it; + for (auto t : in_edges[s]) { if (t == DEAD_STATE) { continue; } @@ -226,6 +225,8 @@ void calc_min_dist_to_accept(const raw_dfa &raw, last_d = d; } + + return dist; } bool prune_overlong(raw_dfa &raw, u32 max_offset) { @@ -237,13 +238,7 @@ bool prune_overlong(raw_dfa &raw, u32 max_offset) { return false; } - vector > in_edges; - find_in_edges(raw, &in_edges); - - vector accept_dist; - calc_min_dist_to_accept(raw, in_edges, &accept_dist); - - in_edges.clear(); + vector accept_dist = calc_min_dist_to_accept(raw, find_in_edges(raw)); /* look over the states and filter out any which cannot reach a report * states before max_offset */ @@ -267,7 +262,7 @@ bool prune_overlong(raw_dfa &raw, u32 max_offset) { /* swap states */ DEBUG_PRINTF("pruned %zu -> %u\n", raw.states.size(), count); - raw.states.swap(new_states); + raw.states = std::move(new_states); new_states.clear(); /* update edges and daddys to refer to the new ids */ From c9be18c7e26b6c4356ca4d496715c2400f2377ae Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 10 Apr 2017 11:05:06 +1000 Subject: [PATCH 252/326] accel_dfa_build_strat: use small_vector for paths --- src/nfa/accel_dfa_build_strat.cpp | 23 +++++++++++++---------- src/util/dump_charclass.cpp | 11 +---------- src/util/dump_charclass.h | 14 +++++++++++--- 3 files changed, 25 insertions(+), 23 deletions(-) diff --git a/src/nfa/accel_dfa_build_strat.cpp b/src/nfa/accel_dfa_build_strat.cpp index 4bd83a52..2320c756 100644 --- a/src/nfa/accel_dfa_build_strat.cpp +++ b/src/nfa/accel_dfa_build_strat.cpp @@ -33,9 +33,11 @@ #include "nfagraph/ng_limex_accel.h" #include "shufticompile.h" #include "trufflecompile.h" +#include "util/accel_scheme.h" #include "util/charreach.h" #include "util/container.h" #include "util/dump_charclass.h" +#include "util/small_vector.h" #include "util/verify_types.h" #include @@ -49,16 +51,15 @@ namespace ue2 { namespace { struct path { - vector reach; + small_vector reach; dstate_id_t dest = DEAD_STATE; - explicit path(dstate_id_t base) : dest(base) { - } + explicit path(dstate_id_t base) : dest(base) {} }; }; -static -void dump_paths(const vector &paths) { - for (UNUSED const auto &p : paths) { +template +void dump_paths(const Container &paths) { + for (UNUSED const path &p : paths) { DEBUG_PRINTF("[%s] -> %u\n", describeClasses(p.reach).c_str(), p.dest); } DEBUG_PRINTF("%zu paths\n", paths.size()); @@ -113,14 +114,14 @@ void extend(const raw_dfa &rdfa, const path &p, } else { path pp = append(p, CharReach(), p.dest); all[p.dest].push_back(pp); - out.push_back(pp); + out.push_back(move(pp)); } } if (!s.reports_eod.empty()) { path pp = append(p, CharReach(), p.dest); all[p.dest].push_back(pp); - out.push_back(pp); + out.push_back(move(pp)); } map dest; @@ -140,7 +141,7 @@ void extend(const raw_dfa &rdfa, const path &p, DEBUG_PRINTF("----good: [%s] -> %u\n", describeClasses(pp.reach).c_str(), pp.dest); all[e.first].push_back(pp); - out.push_back(pp); + out.push_back(move(pp)); } } @@ -162,8 +163,10 @@ vector> generate_paths(const raw_dfa &rdfa, dump_paths(paths); vector> rv; + rv.reserve(paths.size()); for (auto &p : paths) { - rv.push_back(move(p.reach)); + rv.push_back(vector(std::make_move_iterator(p.reach.begin()), + std::make_move_iterator(p.reach.end()))); } return rv; } diff --git a/src/util/dump_charclass.cpp b/src/util/dump_charclass.cpp index 4c159ec2..4535777d 100644 --- a/src/util/dump_charclass.cpp +++ b/src/util/dump_charclass.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -249,15 +249,6 @@ string describeClass(const CharReach &cr, size_t maxLength, return oss.str(); } -string describeClasses(const std::vector &v, size_t maxClassLength, - enum cc_output_t out_type) { - std::ostringstream oss; - for (const auto &cr : v) { - describeClass(oss, cr, maxClassLength, out_type); - } - return oss.str(); -} - // C stdio wrapper void describeClass(FILE *f, const CharReach &cr, size_t maxLength, enum cc_output_t out_type) { diff --git a/src/util/dump_charclass.h b/src/util/dump_charclass.h index 45b707f1..99964134 100644 --- a/src/util/dump_charclass.h +++ b/src/util/dump_charclass.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,6 +37,7 @@ #include #include +#include #include #include @@ -55,9 +56,16 @@ void describeClass(std::ostream &os, const CharReach &cr, size_t maxLength = 16, std::string describeClass(const CharReach &cr, size_t maxLength = 16, enum cc_output_t out_type = CC_OUT_TEXT); -std::string describeClasses(const std::vector &v, +template +std::string describeClasses(const Container &container, size_t maxClassLength = 16, - enum cc_output_t out_type = CC_OUT_TEXT); + enum cc_output_t out_type = CC_OUT_TEXT) { + std::ostringstream oss; + for (const CharReach &cr : container) { + describeClass(oss, cr, maxClassLength, out_type); + } + return oss.str(); +} void describeClass(FILE *f, const CharReach &cr, size_t maxLength, enum cc_output_t out_type); From 8fdef3f3be3639a6d7b30e2e8757145d958b5ec9 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 10 Apr 2017 14:23:00 +1000 Subject: [PATCH 253/326] accel_dfa_build_strat: use flat_map for small maps --- src/nfa/accel_dfa_build_strat.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nfa/accel_dfa_build_strat.cpp b/src/nfa/accel_dfa_build_strat.cpp index 2320c756..019edc50 100644 --- a/src/nfa/accel_dfa_build_strat.cpp +++ b/src/nfa/accel_dfa_build_strat.cpp @@ -124,7 +124,7 @@ void extend(const raw_dfa &rdfa, const path &p, out.push_back(move(pp)); } - map dest; + flat_map dest; for (unsigned i = 0; i < N_CHARS; i++) { u32 succ = s.next[rdfa.alpha_remap[i]]; dest[succ].set(i); @@ -330,7 +330,7 @@ accel_dfa_build_strat::find_escape_strings(dstate_id_t this_idx) const { const dstate &raw = rdfa.states[this_idx]; const vector rev_map = reverse_alpha_remapping(rdfa); bool outs2_broken = false; - map succs; + flat_map succs; for (u32 i = 0; i < rev_map.size(); i++) { if (raw.next[i] == this_idx) { From 8650a1a33f2a566f23e3dd790f465fc57d04e060 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 13 Apr 2017 16:18:22 +1000 Subject: [PATCH 254/326] dfa_min: clean up and improve minimize code --- src/nfa/dfa_min.cpp | 248 +++++++++++++++++++------------------------- src/nfa/dfa_min.h | 7 +- 2 files changed, 109 insertions(+), 146 deletions(-) diff --git a/src/nfa/dfa_min.cpp b/src/nfa/dfa_min.cpp index f83d1420..f309cc53 100644 --- a/src/nfa/dfa_min.cpp +++ b/src/nfa/dfa_min.cpp @@ -26,12 +26,14 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file -* \brief Build code for DFA minimization -*/ +/** + * \file + * \brief Build code for DFA minimization. + */ /** - * /Summary of the Hopcrofts algorithm/ + * /Summary of the Hopcroft minimisation algorithm/ + * * partition := {F, Q \ F}; * work_queue := {F}; * while (work_queue is not empty) do @@ -57,8 +59,7 @@ #include "dfa_min.h" #include "grey.h" -#include "nfa/rdfa.h" -#include "nfagraph/ng_mcclellan.h" +#include "rdfa.h" #include "ue2common.h" #include "util/container.h" #include "util/noncopyable.h" @@ -67,12 +68,11 @@ #include #include +#include #include +#include #include #include -#include - -#include using namespace std; @@ -81,118 +81,81 @@ namespace ue2 { namespace { struct hopcroft_state_info { - vector > prev; + explicit hopcroft_state_info(size_t alpha_size) : prev(alpha_size) {} + + /** \brief Mapping from symbol to a list of predecessors that transition to + * this state on that symbol. */ + vector> prev; }; -struct DFA_components : noncopyable { - dstate_id_t nstates; - size_t inp_size; - set work_queue; - /*Partition contains reduced states*/ - partitioned_set partition; - vector states; +struct HopcroftInfo : noncopyable { + size_t alpha_size; //!< Size of DFA alphabet. + queue work_queue; //!< Hopcroft work queue of partition indices. + partitioned_set partition; //!< Partition set of DFA states. + vector states; //!< Pre-calculated state info (preds) - explicit DFA_components(const raw_dfa &rdfa); + explicit HopcroftInfo(const raw_dfa &rdfa); }; -} //namespace +} // namespace /** - * create_map: - * Creates an initial partitioning and work_queue. - * Initial partition contains {accepting states..., Non-accepting states} - * Initial work_queue contains accepting state subsets + * \brief Create an initial partitioning and work_queue. * - * The initial partitioning needs to distinguish between the different - * reporting behaviours (unlike standard hopcroft) --> more than one subset - * possible for the accepting states. + * Initial partition contains {accepting states..., Non-accepting states} + * Initial work_queue contains accepting state subsets * - * Look for accepting states in both reports and reports_eod. - * Creates a map with a key(reports, reports_eod) and an id. - * Reports of each state are searched against the map and - * added to the corresponding id -> partition[id] and work_queue[id]. - * Non Accept states are added to partition[id+1]. + * The initial partitioning needs to distinguish between the different + * reporting behaviours (unlike standard Hopcroft) --> more than one subset + * possible for the accepting states. + * + * Look for accepting states in both reports and reports_eod. + * Creates a map with a key(reports, reports_eod) and an id. + * Reports of each state are searched against the map and + * added to the corresponding id -> partition[id] and work_queue[id]. + * Non Accept states are added to partition[id+1]. */ static -vector create_map(const raw_dfa &rdfa, set &work_queue) { +vector create_map(const raw_dfa &rdfa, queue &work_queue) { using ReportKey = pair, flat_set>; map subset_map; vector state_to_subset(rdfa.states.size(), INVALID_SUBSET); for (size_t i = 0; i < rdfa.states.size(); i++) { - if (!rdfa.states[i].reports.empty() || - !rdfa.states[i].reports_eod.empty()) { - ReportKey key(rdfa.states[i].reports, rdfa.states[i].reports_eod); + const auto &ds = rdfa.states[i]; + if (!ds.reports.empty() || !ds.reports_eod.empty()) { + ReportKey key(ds.reports, ds.reports_eod); if (contains(subset_map, key)) { state_to_subset[i] = subset_map[key]; } else { size_t sub = subset_map.size(); - subset_map[key] = sub; + subset_map.emplace(std::move(key), sub); state_to_subset[i] = sub; - work_queue.insert(sub); + work_queue.push(sub); } } } - /* handle non accepts */ + /* Give non-accept states their own subset. */ size_t non_accept_sub = subset_map.size(); - for (size_t i = 0; i < state_to_subset.size(); i++) { - if (state_to_subset[i] == INVALID_SUBSET) { - state_to_subset[i] = non_accept_sub; - } - } + replace(state_to_subset.begin(), state_to_subset.end(), INVALID_SUBSET, + non_accept_sub); return state_to_subset; } -DFA_components::DFA_components(const raw_dfa &rdfa) - : nstates(rdfa.states.size()), - inp_size(rdfa.states[nstates - 1].next.size()), - partition(create_map(rdfa, work_queue)) { - /* initializing states */ - for (size_t i = 0; i < nstates; i++) { - states.push_back(hopcroft_state_info()); - states.back().prev.resize(inp_size); - } - - for (size_t i = 0; i < nstates; i++) { // i is the previous state - for (size_t j = 0; j < inp_size; j++) { - /* Creating X_table */ - dstate_id_t present_state = rdfa.states[i].next[j]; - states[present_state].prev[j].push_back(i); - - DEBUG_PRINTF("rdfa.states[%zu].next[%zu] %hu \n", i, j, - rdfa.states[i].next[j]); +HopcroftInfo::HopcroftInfo(const raw_dfa &rdfa) + : alpha_size(rdfa.alpha_size), partition(create_map(rdfa, work_queue)), + states(rdfa.states.size(), hopcroft_state_info(alpha_size)) { + /* Construct predecessor lists for each state, indexed by symbol. */ + for (size_t i = 0; i < states.size(); i++) { // i is the previous state + for (size_t sym = 0; sym < alpha_size; sym++) { + dstate_id_t present_state = rdfa.states[i].next[sym]; + states[present_state].prev[sym].push_back(i); } } } -/** - * choose and remove a set A from work_queue. - */ -static -void get_work_item(DFA_components &mdfa, ue2::flat_set &A) { - A.clear(); - assert(!mdfa.work_queue.empty()); - set::iterator pt = mdfa.work_queue.begin(); - insert(&A, mdfa.partition[*pt]); - mdfa.work_queue.erase(pt); -} - -/** - * X is the set of states for which a transition on the input leads to a state - * in A. - */ -static -void create_X(const DFA_components &mdfa, const ue2::flat_set &A, - size_t inp, ue2::flat_set &X) { - X.clear(); - - for (dstate_id_t id : A) { - insert(&X, mdfa.states[id].prev[inp]); - } -} - /** * For a split set X, each subset S (given by part_index) in the partition, two * sets are created: v_inter (X intersection S) and v_sub (S - X). @@ -206,14 +169,14 @@ void create_X(const DFA_components &mdfa, const ue2::flat_set &A, * - replace S in work_queue by the smaller of the two sets. */ static -void split_and_replace_set(const size_t part_index, DFA_components &mdfa, - const ue2::flat_set &splitter) { +void split_and_replace_set(const size_t part_index, HopcroftInfo &info, + const flat_set &splitter) { /* singleton sets cannot be split */ - if (mdfa.partition[part_index].size() == 1) { + if (info.partition[part_index].size() == 1) { return; } - size_t small_index = mdfa.partition.split(part_index, splitter); + size_t small_index = info.partition.split(part_index, splitter); if (small_index == INVALID_SUBSET) { /* the set could not be split */ @@ -223,54 +186,56 @@ void split_and_replace_set(const size_t part_index, DFA_components &mdfa, /* larger subset remains at the input subset index, if the input subset was * already in the work queue then the larger subset will remain there. */ - mdfa.work_queue.insert(small_index); + info.work_queue.push(small_index); } /** - * The complete Hopcrofts algorithm is implemented in this function. - * Choose and remove a set tray from work_queue - * For each input- X is created. - * For each subset in the partition, split_and_replace_sets are called with the - * split set. + * \brief Core of the Hopcroft minimisation algorithm. */ static -void dfa_min(DFA_components &mdfa) { - ue2::flat_set A, X; +void dfa_min(HopcroftInfo &info) { + flat_set curr, sym_preds; vector cand_subsets; - while (!mdfa.work_queue.empty()) { - get_work_item(mdfa, A); + while (!info.work_queue.empty()) { + /* Choose and remove a set of states (curr, or A in the description + * above) from the work queue. Note that we copy the set because the + * partition may be split by the loop below. */ + curr.clear(); + insert(&curr, info.partition[info.work_queue.front()]); + info.work_queue.pop(); - for (size_t inp = 0; inp < mdfa.inp_size; inp++) { - create_X(mdfa, A, inp, X); - if (X.empty()) { + for (size_t sym = 0; sym < info.alpha_size; sym++) { + /* Find the set of states sym_preds for which a transition on the + * given symbol leads to a state in curr. */ + sym_preds.clear(); + for (dstate_id_t s : curr) { + insert(&sym_preds, info.states[s].prev[sym]); + } + + if (sym_preds.empty()) { continue; } - /* we only need to consider subsets with at least one member in X for - * splitting */ + /* we only need to consider subsets with at least one member in + * sym_preds for splitting */ cand_subsets.clear(); - mdfa.partition.find_overlapping(X, &cand_subsets); + info.partition.find_overlapping(sym_preds, &cand_subsets); for (size_t sub : cand_subsets) { - split_and_replace_set(sub, mdfa, X); + split_and_replace_set(sub, info, sym_preds); } } } } /** - * Creating new dfa table - * Map ordering contains key being an equivalence classes first state - * and the value being the equivalence class index. - * Eq_state[i] tells us new state id the equivalence class located at - * partition[i]. + * \brief Build the new DFA state table. */ static -void mapping_new_states(const DFA_components &mdfa, - vector &old_to_new, - raw_dfa &rdfa) { - const size_t num_partitions = mdfa.partition.size(); +void mapping_new_states(const HopcroftInfo &info, + vector &old_to_new, raw_dfa &rdfa) { + const size_t num_partitions = info.partition.size(); // Mapping from equiv class's first state to equiv class index. map ordering; @@ -279,7 +244,7 @@ void mapping_new_states(const DFA_components &mdfa, vector eq_state(num_partitions); for (size_t i = 0; i < num_partitions; i++) { - ordering[*mdfa.partition[i].begin()] = i; + ordering[*info.partition[i].begin()] = i; } dstate_id_t new_id = 0; @@ -287,30 +252,28 @@ void mapping_new_states(const DFA_components &mdfa, eq_state[m.second] = new_id++; } - for (size_t t = 0; t < mdfa.partition.size(); t++) { - for (dstate_id_t id : mdfa.partition[t]) { + for (size_t t = 0; t < info.partition.size(); t++) { + for (dstate_id_t id : info.partition[t]) { old_to_new[id] = eq_state[t]; } } vector new_states; new_states.reserve(num_partitions); - for (size_t i = 0; i < mdfa.nstates; i++) { - if (contains(ordering, i)) { - new_states.push_back(rdfa.states[i]); - } + + for (const auto &m : ordering) { + new_states.push_back(rdfa.states[m.first]); } - rdfa.states.swap(new_states); + rdfa.states = std::move(new_states); } static -void renumber_new_states(const DFA_components &mdfa, - const vector &old_to_new, - raw_dfa &rdfa) { - for (size_t i = 0; i < mdfa.partition.size(); i++) { - for (size_t j = 0; j < mdfa.inp_size; j++) { - dstate_id_t output = rdfa.states[i].next[j]; - rdfa.states[i].next[j] = old_to_new[output]; +void renumber_new_states(const HopcroftInfo &info, + const vector &old_to_new, raw_dfa &rdfa) { + for (size_t i = 0; i < info.partition.size(); i++) { + for (size_t sym = 0; sym < info.alpha_size; sym++) { + dstate_id_t output = rdfa.states[i].next[sym]; + rdfa.states[i].next[sym] = old_to_new[output]; } dstate_id_t dad = rdfa.states[i].daddy; rdfa.states[i].daddy = old_to_new[dad]; @@ -321,17 +284,16 @@ void renumber_new_states(const DFA_components &mdfa, } static -void new_dfa(raw_dfa &rdfa, const DFA_components &mdfa) { - if (mdfa.partition.size() != mdfa.nstates) { - vector old_to_new(mdfa.nstates); - mapping_new_states(mdfa, old_to_new, rdfa); - renumber_new_states(mdfa, old_to_new, rdfa); +void new_dfa(raw_dfa &rdfa, const HopcroftInfo &info) { + if (info.partition.size() == info.states.size()) { + return; } + + vector old_to_new(info.states.size()); + mapping_new_states(info, old_to_new, rdfa); + renumber_new_states(info, old_to_new, rdfa); } -/** - * MAIN FUNCTION - */ void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey) { if (!grey.minimizeDFA) { return; @@ -339,10 +301,10 @@ void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey) { UNUSED const size_t states_before = rdfa.states.size(); - DFA_components mdfa(rdfa); + HopcroftInfo info(rdfa); - dfa_min(mdfa); - new_dfa(rdfa, mdfa); + dfa_min(info); + new_dfa(rdfa, info); DEBUG_PRINTF("reduced from %zu to %zu states\n", states_before, rdfa.states.size()); diff --git a/src/nfa/dfa_min.h b/src/nfa/dfa_min.h index 8277a4ba..61ca6c21 100644 --- a/src/nfa/dfa_min.h +++ b/src/nfa/dfa_min.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,8 +26,9 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file - * \brief Build code for McClellan DFA. +/** + * \file + * \brief Build code for DFA minimization. */ #ifndef DFA_MIN_H From 41d7aa82814763cce72c86a602e4920c98b05063 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 13 Apr 2017 13:18:33 +1000 Subject: [PATCH 255/326] dfa: prune_overlong -> clear_deeper_reports Rather than pruning states, we simply clear reports on states that are too deep and allow Hopcroft minimisation to reduce the size of the DFA afterwards. --- src/nfa/mcclellancompile_util.cpp | 111 +++------------------------- src/nfa/mcclellancompile_util.h | 10 ++- src/smallwrite/smallwrite_build.cpp | 4 +- 3 files changed, 18 insertions(+), 107 deletions(-) diff --git a/src/nfa/mcclellancompile_util.cpp b/src/nfa/mcclellancompile_util.cpp index 7a3ceaf1..17e022fe 100644 --- a/src/nfa/mcclellancompile_util.cpp +++ b/src/nfa/mcclellancompile_util.cpp @@ -167,70 +167,8 @@ u32 calc_min_dist_from_bob(raw_dfa &raw, vector *dist_in) { return last_d; } -static -vector> find_in_edges(const raw_dfa &raw) { - vector> in_edges(raw.states.size()); - flat_set seen; - - for (u32 s = 1; s < raw.states.size(); s++) { - seen.clear(); - for (u32 j = 0; j < raw.alpha_size; j++) { - dstate_id_t t = raw.states[s].next[j]; - if (!seen.insert(t).second) { - continue; - } - in_edges[t].push_back(s); - } - } - - return in_edges; -} - -static -vector calc_min_dist_to_accept(const raw_dfa &raw, - const vector> &in_edges) { - vector dist(raw.states.size(), ~0U); - - /* for reporting states to start from */ - deque to_visit; - for (u32 s = 0; s < raw.states.size(); s++) { - if (state_has_reports(raw, s)) { - to_visit.push_back(s); - dist[s] = 0; - } - } - - /* bfs */ - UNUSED u32 last_d = 0; - while (!to_visit.empty()) { - dstate_id_t s = to_visit.front(); - to_visit.pop_front(); - assert(s != DEAD_STATE); - - u32 d = dist[s]; - assert(d >= last_d); - assert(d != ~0U); - - for (auto t : in_edges[s]) { - if (t == DEAD_STATE) { - continue; - } - if (dist[t] == ~0U) { - to_visit.push_back(t); - dist[t] = d + 1; - } else { - assert(dist[t] <= d + 1); - } - } - - last_d = d; - } - - return dist; -} - -bool prune_overlong(raw_dfa &raw, u32 max_offset) { - DEBUG_PRINTF("pruning to at most %u\n", max_offset); +bool clear_deeper_reports(raw_dfa &raw, u32 max_offset) { + DEBUG_PRINTF("clearing reports on states deeper than %u\n", max_offset); vector bob_dist; u32 max_min_dist_bob = calc_min_dist_from_bob(raw, &bob_dist); @@ -238,47 +176,18 @@ bool prune_overlong(raw_dfa &raw, u32 max_offset) { return false; } - vector accept_dist = calc_min_dist_to_accept(raw, find_in_edges(raw)); - - /* look over the states and filter out any which cannot reach a report - * states before max_offset */ - vector new_ids(raw.states.size()); - vector new_states; - u32 count = 1; - new_states.push_back(raw.states[DEAD_STATE]); - + bool changed = false; for (u32 s = DEAD_STATE + 1; s < raw.states.size(); s++) { - if (bob_dist[s] + accept_dist[s] > max_offset) { - DEBUG_PRINTF("pruned %u: bob %u, report %u\n", s, bob_dist[s], - accept_dist[s]); - new_ids[s] = DEAD_STATE; - } else { - new_ids[s] = count++; - new_states.push_back(raw.states[s]); - assert(new_states.size() == count); - assert(new_ids[s] <= s); + if (bob_dist[s] > max_offset && state_has_reports(raw, s)) { + DEBUG_PRINTF("clearing reports on %u (depth %u)\n", s, bob_dist[s]); + auto &ds = raw.states[s]; + ds.reports.clear(); + ds.reports_eod.clear(); + changed = true; } } - /* swap states */ - DEBUG_PRINTF("pruned %zu -> %u\n", raw.states.size(), count); - raw.states = std::move(new_states); - new_states.clear(); - - /* update edges and daddys to refer to the new ids */ - for (u32 s = DEAD_STATE + 1; s < raw.states.size(); s++) { - for (u32 j = 0; j < raw.alpha_size; j++) { - dstate_id_t old_t = raw.states[s].next[j]; - raw.states[s].next[j] = new_ids[old_t]; - } - raw.states[s].daddy = new_ids[raw.states[s].daddy]; - } - - /* update specials */ - raw.start_floating = new_ids[raw.start_floating]; - raw.start_anchored = new_ids[raw.start_anchored]; - - return true; + return changed; } set all_reports(const raw_dfa &rdfa) { diff --git a/src/nfa/mcclellancompile_util.h b/src/nfa/mcclellancompile_util.h index 554c1efd..d681e06b 100644 --- a/src/nfa/mcclellancompile_util.h +++ b/src/nfa/mcclellancompile_util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,10 +39,12 @@ namespace ue2 { u32 remove_leading_dots(raw_dfa &raw); /** - * Prunes any states which cannot be reached within max_offset from start of - * stream. Returns false if no changes are made to the rdfa + * \brief Clear reports on any states that are deeper than \a max_offset from + * start of stream. + * + * Returns false if no changes are made to the DFA. */ -bool prune_overlong(raw_dfa &raw, u32 max_offset); +bool clear_deeper_reports(raw_dfa &raw, u32 max_offset); std::set all_reports(const raw_dfa &rdfa); bool has_eod_accepts(const raw_dfa &rdfa); diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index a27db736..bcdd12bb 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -279,7 +279,7 @@ void SmallWriteBuildImpl::add(const NGHolder &g, const ExpressionInfo &expr) { return; } - if (prune_overlong(*r, cc.grey.smallWriteLargestBuffer)) { + if (clear_deeper_reports(*r, cc.grey.smallWriteLargestBuffer)) { minimize_hopcroft(*r, cc.grey); } @@ -725,7 +725,7 @@ bytecode_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, if (*small_region <= *start_offset) { return nullptr; } - if (prune_overlong(rdfa, *small_region - *start_offset)) { + if (clear_deeper_reports(rdfa, *small_region - *start_offset)) { minimize_hopcroft(rdfa, cc.grey); if (rdfa.start_anchored == DEAD_STATE) { DEBUG_PRINTF("all patterns pruned out\n"); From 0dd8536c6ecff87f28c61b263a1aa596b0fc52c2 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 11 Apr 2017 10:56:22 +1000 Subject: [PATCH 256/326] dfa: only accel init states from smwr path If the small-write DFA has been built from literals, then we only need to look for accel states at init. --- src/nfa/accel_dfa_build_strat.cpp | 27 ++++++++++++++++++++------- src/nfa/accel_dfa_build_strat.h | 8 +++++--- src/nfa/goughcompile.cpp | 2 +- src/nfa/mcclellancompile.cpp | 3 ++- src/nfa/mcclellancompile.h | 7 ++++--- src/nfa/mcsheng_compile.cpp | 2 +- src/nfa/shengcompile.cpp | 4 ++-- src/nfa/shengcompile.h | 7 ++++--- src/rose/rose_build_anchored.cpp | 2 +- src/rose/rose_build_bytecode.cpp | 4 ++-- src/smallwrite/smallwrite_build.cpp | 24 +++++++++++++++--------- 11 files changed, 57 insertions(+), 33 deletions(-) diff --git a/src/nfa/accel_dfa_build_strat.cpp b/src/nfa/accel_dfa_build_strat.cpp index 019edc50..7c56ba72 100644 --- a/src/nfa/accel_dfa_build_strat.cpp +++ b/src/nfa/accel_dfa_build_strat.cpp @@ -541,17 +541,17 @@ accel_dfa_build_strat::getAccelInfo(const Grey &grey) { dstate_id_t sds_proxy = get_sds_or_proxy(rdfa); DEBUG_PRINTF("sds %hu\n", sds_proxy); - for (size_t i = 0; i < rdfa.states.size(); i++) { + /* Find accel info for a single state. */ + auto do_state = [&](size_t i) { if (i == DEAD_STATE) { - continue; + return; } /* Note on report acceleration states: While we can't accelerate while - * we - * are spamming out callbacks, the QR code paths don't raise reports + * we are spamming out callbacks, the QR code paths don't raise reports * during scanning so they can accelerate report states. */ if (generates_callbacks(rdfa.kind) && !rdfa.states[i].reports.empty()) { - continue; + return; } size_t single_limit = @@ -562,15 +562,28 @@ accel_dfa_build_strat::getAccelInfo(const Grey &grey) { if (ei.cr.count() > single_limit) { DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i, ei.cr.count()); - continue; + return; } DEBUG_PRINTF("state %zu should be accelerable %zu\n", i, ei.cr.count()); rv[i] = ei; + }; + + if (only_accel_init) { + DEBUG_PRINTF("only computing accel for init states\n"); + do_state(rdfa.start_anchored); + if (rdfa.start_floating != rdfa.start_anchored) { + do_state(rdfa.start_floating); + } + } else { + DEBUG_PRINTF("computing accel for all states\n"); + for (size_t i = 0; i < rdfa.states.size(); i++) { + do_state(i); + } } - /* provide accleration states to states in the region of sds */ + /* provide acceleration states to states in the region of sds */ if (contains(rv, sds_proxy)) { AccelScheme sds_ei = rv[sds_proxy]; sds_ei.double_byte.clear(); /* region based on single byte scheme diff --git a/src/nfa/accel_dfa_build_strat.h b/src/nfa/accel_dfa_build_strat.h index 3cfaf272..881892ed 100644 --- a/src/nfa/accel_dfa_build_strat.h +++ b/src/nfa/accel_dfa_build_strat.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -43,8 +43,8 @@ struct Grey; class accel_dfa_build_strat : public dfa_build_strat { public: - explicit accel_dfa_build_strat(const ReportManager &rm_in) - : dfa_build_strat(rm_in) {} + accel_dfa_build_strat(const ReportManager &rm_in, bool only_accel_init_in) + : dfa_build_strat(rm_in), only_accel_init(only_accel_init_in) {} virtual AccelScheme find_escape_strings(dstate_id_t this_idx) const; virtual size_t accelSize(void) const = 0; virtual u32 max_allowed_offset_accel() const = 0; @@ -53,6 +53,8 @@ public: virtual void buildAccel(dstate_id_t this_idx, const AccelScheme &info, void *accel_out); virtual std::map getAccelInfo(const Grey &grey); +private: + bool only_accel_init; }; } // namespace ue2 diff --git a/src/nfa/goughcompile.cpp b/src/nfa/goughcompile.cpp index d92f285f..58b05d3d 100644 --- a/src/nfa/goughcompile.cpp +++ b/src/nfa/goughcompile.cpp @@ -80,7 +80,7 @@ public: gough_build_strat( raw_som_dfa &r, const GoughGraph &g, const ReportManager &rm_in, const map &accel_info) - : mcclellan_build_strat(r, rm_in), rdfa(r), gg(g), + : mcclellan_build_strat(r, rm_in, false), rdfa(r), gg(g), accel_gough_info(accel_info) {} unique_ptr gatherReports(vector &reports /* out */, vector &reports_eod /* out */, diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 0aff6006..781a7238 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -981,8 +981,9 @@ bytecode_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, bytecode_ptr mcclellanCompile(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm, + bool only_accel_init, set *accel_states) { - mcclellan_build_strat mbs(raw, rm); + mcclellan_build_strat mbs(raw, rm, only_accel_init); return mcclellanCompile_i(raw, mbs, cc, accel_states); } diff --git a/src/nfa/mcclellancompile.h b/src/nfa/mcclellancompile.h index be0a18c5..c204e03c 100644 --- a/src/nfa/mcclellancompile.h +++ b/src/nfa/mcclellancompile.h @@ -48,8 +48,9 @@ struct CompileContext; class mcclellan_build_strat : public accel_dfa_build_strat { public: - mcclellan_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in) - : accel_dfa_build_strat(rm_in), rdfa(rdfa_in) {} + mcclellan_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in, + bool only_accel_init_in) + : accel_dfa_build_strat(rm_in, only_accel_init_in), rdfa(rdfa_in) {} raw_dfa &get_raw() const override { return rdfa; } std::unique_ptr gatherReports( std::vector &reports /* out */, @@ -69,7 +70,7 @@ private: * states */ bytecode_ptr mcclellanCompile(raw_dfa &raw, const CompileContext &cc, - const ReportManager &rm, + const ReportManager &rm, bool only_accel_init, std::set *accel_states = nullptr); /* used internally by mcclellan/haig/gough compile process */ diff --git a/src/nfa/mcsheng_compile.cpp b/src/nfa/mcsheng_compile.cpp index e4e4173a..2d9658f2 100644 --- a/src/nfa/mcsheng_compile.cpp +++ b/src/nfa/mcsheng_compile.cpp @@ -1025,7 +1025,7 @@ bytecode_ptr mcshengCompile(raw_dfa &raw, const CompileContext &cc, return nullptr; } - mcclellan_build_strat mbs(raw, rm); + mcclellan_build_strat mbs(raw, rm, false); dfa_info info(mbs); bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256; diff --git a/src/nfa/shengcompile.cpp b/src/nfa/shengcompile.cpp index 8c061913..c4094ced 100644 --- a/src/nfa/shengcompile.cpp +++ b/src/nfa/shengcompile.cpp @@ -451,14 +451,14 @@ bool has_accel_sheng(const NFA *) { } bytecode_ptr shengCompile(raw_dfa &raw, const CompileContext &cc, - const ReportManager &rm, + const ReportManager &rm, bool only_accel_init, set *accel_states) { if (!cc.grey.allowSheng) { DEBUG_PRINTF("Sheng is not allowed!\n"); return nullptr; } - sheng_build_strat strat(raw, rm); + sheng_build_strat strat(raw, rm, only_accel_init); dfa_info info(strat); DEBUG_PRINTF("Trying to compile a %zu state Sheng\n", raw.states.size()); diff --git a/src/nfa/shengcompile.h b/src/nfa/shengcompile.h index 6afc1dd1..9885cd16 100644 --- a/src/nfa/shengcompile.h +++ b/src/nfa/shengcompile.h @@ -45,8 +45,9 @@ struct raw_dfa; class sheng_build_strat : public accel_dfa_build_strat { public: - sheng_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in) - : accel_dfa_build_strat(rm_in), rdfa(rdfa_in) {} + sheng_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in, + bool only_accel_init_in) + : accel_dfa_build_strat(rm_in, only_accel_init_in), rdfa(rdfa_in) {} raw_dfa &get_raw() const override { return rdfa; } std::unique_ptr gatherReports( std::vector &reports /* out */, @@ -63,7 +64,7 @@ private: }; bytecode_ptr shengCompile(raw_dfa &raw, const CompileContext &cc, - const ReportManager &rm, + const ReportManager &rm, bool only_accel_init, std::set *accel_states = nullptr); struct sheng_escape_info { diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index 6d56ee00..a2af160e 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -829,7 +829,7 @@ size_t buildNfas(vector &anchored_dfas, minimize_hopcroft(rdfa, cc.grey); - auto nfa = mcclellanCompile(rdfa, cc, rm); + auto nfa = mcclellanCompile(rdfa, cc, rm, false); if (!nfa) { assert(0); throw std::bad_alloc(); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index e7b00126..1155b50a 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -720,7 +720,7 @@ bytecode_ptr getDfa(raw_dfa &rdfa, bool is_transient, const CompileContext &cc, const ReportManager &rm) { // Unleash the Sheng!! - auto dfa = shengCompile(rdfa, cc, rm); + auto dfa = shengCompile(rdfa, cc, rm, false); if (!dfa && !is_transient) { // Sheng wasn't successful, so unleash McClellan! /* We don't try the hybrid for transient prefixes due to the extra @@ -729,7 +729,7 @@ bytecode_ptr getDfa(raw_dfa &rdfa, bool is_transient, } if (!dfa) { // Sheng wasn't successful, so unleash McClellan! - dfa = mcclellanCompile(rdfa, cc, rm); + dfa = mcclellanCompile(rdfa, cc, rm, false); } return dfa; } diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index bcdd12bb..ffd3fe0f 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -692,14 +692,18 @@ bool is_slow(const raw_dfa &rdfa, const set &accel, static bytecode_ptr getDfa(raw_dfa &rdfa, const CompileContext &cc, - const ReportManager &rm, + const ReportManager &rm, bool has_literals, set &accel_states) { + // If we determinised literals, then we only need to consider the init + // states for acceleration. + bool only_accel_init = has_literals; + bytecode_ptr dfa = nullptr; if (cc.grey.allowSmallWriteSheng) { - dfa = shengCompile(rdfa, cc, rm, &accel_states); + dfa = shengCompile(rdfa, cc, rm, only_accel_init, &accel_states); } if (!dfa) { - dfa = mcclellanCompile(rdfa, cc, rm, &accel_states); + dfa = mcclellanCompile(rdfa, cc, rm, only_accel_init, &accel_states); } return dfa; } @@ -707,13 +711,14 @@ bytecode_ptr getDfa(raw_dfa &rdfa, const CompileContext &cc, static bytecode_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, const CompileContext &cc, const ReportManager &rm, - u32 *start_offset, u32 *small_region) { + bool has_literals, u32 *start_offset, + u32 *small_region) { *start_offset = remove_leading_dots(rdfa); // Unleash the McClellan! set accel_states; - auto nfa = getDfa(rdfa, cc, rm, accel_states); + auto nfa = getDfa(rdfa, cc, rm, has_literals, accel_states); if (!nfa) { DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n"); return nullptr; @@ -732,7 +737,7 @@ bytecode_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, return nullptr; } - nfa = getDfa(rdfa, cc, rm, accel_states); + nfa = getDfa(rdfa, cc, rm, has_literals, accel_states); if (!nfa) { DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n"); assert(0); /* able to build orig dfa but not the trimmed? */ @@ -762,7 +767,8 @@ unique_ptr makeSmallWriteBuilder(size_t num_patterns, } bytecode_ptr SmallWriteBuildImpl::build(u32 roseQuality) { - if (!rdfa && is_empty(lit_trie) && is_empty(lit_trie_nocase)) { + const bool has_literals = !is_empty(lit_trie) || !is_empty(lit_trie_nocase); + if (!rdfa && !has_literals) { DEBUG_PRINTF("no smallwrite engine\n"); poisoned = true; return nullptr; @@ -782,8 +788,8 @@ bytecode_ptr SmallWriteBuildImpl::build(u32 roseQuality) { u32 start_offset; u32 small_region; - auto nfa = - prepEngine(*rdfa, roseQuality, cc, rm, &start_offset, &small_region); + auto nfa = prepEngine(*rdfa, roseQuality, cc, rm, has_literals, + &start_offset, &small_region); if (!nfa) { DEBUG_PRINTF("some smallwrite outfix could not be prepped\n"); /* just skip the smallwrite optimization */ From f5d769118aef3b25e9a5852ba8351e17256e16e0 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 11 Apr 2017 11:36:20 +1000 Subject: [PATCH 257/326] mcclellancompile: factor out find_daddy_candidates --- src/nfa/mcclellancompile.cpp | 39 +++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 781a7238..044e38c5 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -777,6 +777,28 @@ void addSuccessors(flat_set &dest, const dstate &source, } } +/* \brief Returns a set of states to search for a better daddy. */ +static +flat_set find_daddy_candidates(const dfa_info &info, + dstate_id_t curr_id) { + flat_set hinted; + + addIfEarlier(hinted, 0, curr_id); + addIfEarlier(hinted, info.raw.start_anchored, curr_id); + addIfEarlier(hinted, info.raw.start_floating, curr_id); + + // Add existing daddy and his successors, then search back one generation. + const u16 alphasize = info.impl_alpha_size; + dstate_id_t daddy = info.states[curr_id].daddy; + for (u32 level = 0; daddy && level < 2; level++) { + addIfEarlier(hinted, daddy, curr_id); + addSuccessors(hinted, info.states[daddy], alphasize, curr_id); + daddy = info.states[daddy].daddy; + } + + return hinted; +} + #define MAX_SHERMAN_SELF_LOOP 20 static @@ -817,22 +839,7 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, dstate_id_t best_daddy = 0; dstate &currState = info.states[curr_id]; - flat_set hinted; /* states to search for a better daddy */ - addIfEarlier(hinted, 0, curr_id); - addIfEarlier(hinted, info.raw.start_anchored, curr_id); - addIfEarlier(hinted, info.raw.start_floating, curr_id); - - dstate_id_t mydaddy = currState.daddy; - if (mydaddy) { - addIfEarlier(hinted, mydaddy, curr_id); - addSuccessors(hinted, info.states[mydaddy], alphasize, curr_id); - dstate_id_t mygranddaddy = info.states[mydaddy].daddy; - if (mygranddaddy) { - addIfEarlier(hinted, mygranddaddy, curr_id); - addSuccessors(hinted, info.states[mygranddaddy], alphasize, - curr_id); - } - } + const auto hinted = find_daddy_candidates(info, curr_id); for (const dstate_id_t &donor : hinted) { assert(donor < curr_id); From 1538d90a9eb5edf69eb86270342b108e2b984d6b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 18 Apr 2017 16:35:06 +1000 Subject: [PATCH 258/326] mcsheng_compile: factor out find_daddy_candidates --- src/nfa/mcsheng_compile.cpp | 43 +++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/src/nfa/mcsheng_compile.cpp b/src/nfa/mcsheng_compile.cpp index 2d9658f2..2049fee0 100644 --- a/src/nfa/mcsheng_compile.cpp +++ b/src/nfa/mcsheng_compile.cpp @@ -617,7 +617,7 @@ void fill_in_succ_table_16(NFA *nfa, const dfa_info &info, #define MAX_SHERMAN_LIST_LEN 8 static -void addIfEarlier(set &dest, dstate_id_t candidate, +void addIfEarlier(flat_set &dest, dstate_id_t candidate, dstate_id_t max) { if (candidate < max) { dest.insert(candidate); @@ -625,13 +625,35 @@ void addIfEarlier(set &dest, dstate_id_t candidate, } static -void addSuccessors(set &dest, const dstate &source, +void addSuccessors(flat_set &dest, const dstate &source, u16 alphasize, dstate_id_t curr_id) { for (symbol_t s = 0; s < alphasize; s++) { addIfEarlier(dest, source.next[s], curr_id); } } +/* \brief Returns a set of states to search for a better daddy. */ +static +flat_set find_daddy_candidates(const dfa_info &info, + dstate_id_t curr_id) { + flat_set hinted; + + addIfEarlier(hinted, 0, curr_id); + addIfEarlier(hinted, info.raw.start_anchored, curr_id); + addIfEarlier(hinted, info.raw.start_floating, curr_id); + + // Add existing daddy and his successors, then search back one generation. + const u16 alphasize = info.impl_alpha_size; + dstate_id_t daddy = info.states[curr_id].daddy; + for (u32 level = 0; daddy && level < 2; level++) { + addIfEarlier(hinted, daddy, curr_id); + addSuccessors(hinted, info.states[daddy], alphasize, curr_id); + daddy = info.states[daddy].daddy; + } + + return hinted; +} + #define MAX_SHERMAN_SELF_LOOP 20 static @@ -671,22 +693,7 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, dstate_id_t best_daddy = 0; dstate &currState = info.states[curr_id]; - set hinted; /* set of states to search for a better daddy */ - addIfEarlier(hinted, 0, curr_id); - addIfEarlier(hinted, info.raw.start_anchored, curr_id); - addIfEarlier(hinted, info.raw.start_floating, curr_id); - - dstate_id_t mydaddy = currState.daddy; - if (mydaddy) { - addIfEarlier(hinted, mydaddy, curr_id); - addSuccessors(hinted, info.states[mydaddy], alphasize, curr_id); - dstate_id_t mygranddaddy = info.states[mydaddy].daddy; - if (mygranddaddy) { - addIfEarlier(hinted, mygranddaddy, curr_id); - addSuccessors(hinted, info.states[mygranddaddy], alphasize, - curr_id); - } - } + flat_set hinted = find_daddy_candidates(info, curr_id); for (const dstate_id_t &donor : hinted) { assert(donor < curr_id); From beac58fcb42123106e7624509a9b8ab187ded090 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 11 Apr 2017 12:44:20 +1000 Subject: [PATCH 259/326] dfa: allow smwr to avoid lengthy daddy recalc --- src/nfa/mcclellancompile.cpp | 19 +++++++++++++------ src/nfa/mcclellancompile.h | 3 ++- src/smallwrite/smallwrite_build.cpp | 19 +++++++++++-------- 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 044e38c5..29642dde 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -802,9 +802,9 @@ flat_set find_daddy_candidates(const dfa_info &info, #define MAX_SHERMAN_SELF_LOOP 20 static -void find_better_daddy(dfa_info &info, dstate_id_t curr_id, - bool using8bit, bool any_cyclic_near_anchored_state, - const Grey &grey) { +void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, + bool any_cyclic_near_anchored_state, + bool trust_daddy_states, const Grey &grey) { if (!grey.allowShermanStates) { return; } @@ -839,7 +839,12 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, dstate_id_t best_daddy = 0; dstate &currState = info.states[curr_id]; - const auto hinted = find_daddy_candidates(info, curr_id); + flat_set hinted; + if (trust_daddy_states) { + hinted.insert(currState.daddy); + } else { + hinted = find_daddy_candidates(info, curr_id); + } for (const dstate_id_t &donor : hinted) { assert(donor < curr_id); @@ -947,6 +952,7 @@ bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) { bytecode_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, const CompileContext &cc, + bool trust_daddy_states, set *accel_states) { u16 total_daddy = 0; dfa_info info(strat); @@ -963,7 +969,7 @@ bytecode_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, for (u32 i = 0; i < info.size(); i++) { find_better_daddy(info, i, using8bit, any_cyclic_near_anchored_state, - cc.grey); + trust_daddy_states, cc.grey); total_daddy += info.extra[i].daddytaken; } @@ -989,9 +995,10 @@ bytecode_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, bytecode_ptr mcclellanCompile(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm, bool only_accel_init, + bool trust_daddy_states, set *accel_states) { mcclellan_build_strat mbs(raw, rm, only_accel_init); - return mcclellanCompile_i(raw, mbs, cc, accel_states); + return mcclellanCompile_i(raw, mbs, cc, trust_daddy_states, accel_states); } size_t mcclellan_build_strat::accelSize(void) const { diff --git a/src/nfa/mcclellancompile.h b/src/nfa/mcclellancompile.h index c204e03c..a176db28 100644 --- a/src/nfa/mcclellancompile.h +++ b/src/nfa/mcclellancompile.h @@ -71,12 +71,13 @@ private: bytecode_ptr mcclellanCompile(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm, bool only_accel_init, + bool trust_daddy_states = false, std::set *accel_states = nullptr); /* used internally by mcclellan/haig/gough compile process */ bytecode_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, - const CompileContext &cc, + const CompileContext &cc, bool trust_daddy_states = false, std::set *accel_states = nullptr); /** diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index ffd3fe0f..829c72e5 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -692,18 +692,20 @@ bool is_slow(const raw_dfa &rdfa, const set &accel, static bytecode_ptr getDfa(raw_dfa &rdfa, const CompileContext &cc, - const ReportManager &rm, bool has_literals, + const ReportManager &rm, bool has_non_literals, set &accel_states) { - // If we determinised literals, then we only need to consider the init + // If we determinised only literals, then we only need to consider the init // states for acceleration. - bool only_accel_init = has_literals; + bool only_accel_init = !has_non_literals; + bool trust_daddy_states = !has_non_literals; bytecode_ptr dfa = nullptr; if (cc.grey.allowSmallWriteSheng) { dfa = shengCompile(rdfa, cc, rm, only_accel_init, &accel_states); } if (!dfa) { - dfa = mcclellanCompile(rdfa, cc, rm, only_accel_init, &accel_states); + dfa = mcclellanCompile(rdfa, cc, rm, only_accel_init, + trust_daddy_states, &accel_states); } return dfa; } @@ -711,14 +713,14 @@ bytecode_ptr getDfa(raw_dfa &rdfa, const CompileContext &cc, static bytecode_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, const CompileContext &cc, const ReportManager &rm, - bool has_literals, u32 *start_offset, + bool has_non_literals, u32 *start_offset, u32 *small_region) { *start_offset = remove_leading_dots(rdfa); // Unleash the McClellan! set accel_states; - auto nfa = getDfa(rdfa, cc, rm, has_literals, accel_states); + auto nfa = getDfa(rdfa, cc, rm, has_non_literals, accel_states); if (!nfa) { DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n"); return nullptr; @@ -737,7 +739,7 @@ bytecode_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, return nullptr; } - nfa = getDfa(rdfa, cc, rm, has_literals, accel_states); + nfa = getDfa(rdfa, cc, rm, has_non_literals, accel_states); if (!nfa) { DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n"); assert(0); /* able to build orig dfa but not the trimmed? */ @@ -768,6 +770,7 @@ unique_ptr makeSmallWriteBuilder(size_t num_patterns, bytecode_ptr SmallWriteBuildImpl::build(u32 roseQuality) { const bool has_literals = !is_empty(lit_trie) || !is_empty(lit_trie_nocase); + const bool has_non_literals = rdfa != nullptr; if (!rdfa && !has_literals) { DEBUG_PRINTF("no smallwrite engine\n"); poisoned = true; @@ -788,7 +791,7 @@ bytecode_ptr SmallWriteBuildImpl::build(u32 roseQuality) { u32 start_offset; u32 small_region; - auto nfa = prepEngine(*rdfa, roseQuality, cc, rm, has_literals, + auto nfa = prepEngine(*rdfa, roseQuality, cc, rm, has_non_literals, &start_offset, &small_region); if (!nfa) { DEBUG_PRINTF("some smallwrite outfix could not be prepped\n"); From 3b3f6b739cfef3e88017328fc75367a85533e725 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 11 Apr 2017 13:37:56 +1000 Subject: [PATCH 260/326] smallwrite: use failure map to set dfa daddy state --- src/nfa/mcclellancompile.cpp | 4 +++- src/smallwrite/smallwrite_build.cpp | 17 ++++++++++++----- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 29642dde..e2466000 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -842,6 +842,8 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, flat_set hinted; if (trust_daddy_states) { hinted.insert(currState.daddy); + addIfEarlier(hinted, info.raw.start_floating, curr_id); + addIfEarlier(hinted, info.raw.start_anchored, curr_id); } else { hinted = find_daddy_candidates(info, curr_id); } @@ -896,7 +898,7 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, if (self_loop_width > MAX_SHERMAN_SELF_LOOP) { DEBUG_PRINTF("%hu is banned wide self loop (%u)\n", curr_id, - self_loop_width); + self_loop_width); return; } diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index 829c72e5..fac8d012 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -443,11 +443,11 @@ bool isSaneTrie(const LitTrie &trie) { * edges and reports. */ static -void buildAutomaton(LitTrie &trie) { +void buildAutomaton(LitTrie &trie, + map &failure_map) { assert(isSaneTrie(trie)); // Find our failure transitions and reports. - map failure_map; vector ordering; ACVisitor ac_vis(trie, failure_map, ordering); boost::breadth_first_search(trie, trie.root, visitor(ac_vis)); @@ -535,7 +535,8 @@ static unique_ptr buildDfa(LitTrie &trie, bool nocase) { DEBUG_PRINTF("trie has %zu states\n", num_vertices(trie)); - buildAutomaton(trie); + map failure_map; + buildAutomaton(trie, failure_map); auto rdfa = make_unique(NFA_OUTFIX); @@ -559,13 +560,19 @@ unique_ptr buildDfa(LitTrie &trie, bool nocase) { DEBUG_PRINTF("state %zu\n", u_state); assert(u_state < rdfa->states.size()); auto &ds = rdfa->states[u_state]; - ds.daddy = root_state; ds.reports = trie[u].reports; - if (!ds.reports.empty()) { DEBUG_PRINTF("reports: %s\n", as_string_list(ds.reports).c_str()); } + // Set daddy state from failure map. + if (u == trie.root) { + ds.daddy = DEAD_STATE; + } else { + assert(contains(failure_map, u)); + ds.daddy = trie[failure_map.at(u)].index + 1; + } + // By default, transition back to the root. fill(ds.next.begin(), ds.next.end(), root_state); // TOP should be a self-loop. From 10f52346ca4d0e5749a844a73942545fe3695381 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 11 Apr 2017 13:56:51 +1000 Subject: [PATCH 261/326] smallwrite: bfs ordering, refine daddy selection --- src/nfa/mcclellancompile.cpp | 14 +++++++++++--- src/smallwrite/smallwrite_build.cpp | 7 +++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index e2466000..e875477b 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -841,9 +841,17 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, flat_set hinted; if (trust_daddy_states) { - hinted.insert(currState.daddy); - addIfEarlier(hinted, info.raw.start_floating, curr_id); - addIfEarlier(hinted, info.raw.start_anchored, curr_id); + // Use the daddy already set for this state so long as it isn't already + // a Sherman state. + if (!info.is_sherman(currState.daddy)) { + hinted.insert(currState.daddy); + } else { + // Fall back to granddaddy, which has already been processed (due + // to BFS ordering) and cannot be a Sherman state. + dstate_id_t granddaddy = info.states[currState.daddy].daddy; + assert(!info.is_sherman(granddaddy)); + hinted.insert(granddaddy); + } } else { hinted = find_daddy_candidates(info, curr_id); } diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index fac8d012..ce3315e8 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -452,6 +452,13 @@ void buildAutomaton(LitTrie &trie, ACVisitor ac_vis(trie, failure_map, ordering); boost::breadth_first_search(trie, trie.root, visitor(ac_vis)); + // Renumber with BFS ordering, which is assumed by other DFA construction + // code (i.e. Sherman state computation). + size_t idx = 0; + for (auto v : ordering) { + trie[v].index = idx++; + } + // Compute missing edges from failure map. for (auto v : ordering) { DEBUG_PRINTF("vertex %zu\n", trie[v].index); From 388c16c550e1ab27a939ca105cdcff3f28947b30 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 11 Apr 2017 14:21:02 +1000 Subject: [PATCH 262/326] smallwrite: construct DFA states in BFS order --- src/smallwrite/smallwrite_build.cpp | 48 +++++++++++++++++++---------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index ce3315e8..10079a98 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -444,21 +444,14 @@ bool isSaneTrie(const LitTrie &trie) { */ static void buildAutomaton(LitTrie &trie, - map &failure_map) { + map &failure_map, + vector &ordering) { assert(isSaneTrie(trie)); // Find our failure transitions and reports. - vector ordering; ACVisitor ac_vis(trie, failure_map, ordering); boost::breadth_first_search(trie, trie.root, visitor(ac_vis)); - // Renumber with BFS ordering, which is assumed by other DFA construction - // code (i.e. Sherman state computation). - size_t idx = 0; - for (auto v : ordering) { - trie[v].index = idx++; - } - // Compute missing edges from failure map. for (auto v : ordering) { DEBUG_PRINTF("vertex %zu\n", trie[v].index); @@ -537,13 +530,35 @@ u16 buildAlphabet(const LitTrie &trie, bool nocase, return i; } +/** + * \brief Calculate state mapping, from vertex in trie to state index in BFS + * ordering. + */ +static +unordered_map +makeStateMap(const LitTrie &trie, const vector &ordering) { + unordered_map state_ids; + state_ids.reserve(num_vertices(trie)); + u32 idx = DEAD_STATE + 1; + state_ids.emplace(trie.root, idx++); + for (auto v : ordering) { + state_ids.emplace(v, idx++); + } + assert(state_ids.size() == num_vertices(trie)); + return state_ids; +} + /** \brief Construct a raw_dfa from a literal trie. */ static unique_ptr buildDfa(LitTrie &trie, bool nocase) { DEBUG_PRINTF("trie has %zu states\n", num_vertices(trie)); + vector ordering; map failure_map; - buildAutomaton(trie, failure_map); + buildAutomaton(trie, failure_map, ordering); + + // Construct DFA states in BFS order. + const auto state_ids = makeStateMap(trie, ordering); auto rdfa = make_unique(NFA_OUTFIX); @@ -553,7 +568,8 @@ unique_ptr buildDfa(LitTrie &trie, bool nocase) { rdfa->alpha_size = buildAlphabet(trie, nocase, alpha, unalpha); // Construct states and transitions. - const u16 root_state = DEAD_STATE + 1; + const u16 root_state = state_ids.at(trie.root); + assert(root_state == DEAD_STATE + 1); rdfa->start_anchored = root_state; rdfa->start_floating = root_state; rdfa->states.resize(num_vertices(trie) + 1, dstate(rdfa->alpha_size)); @@ -563,8 +579,8 @@ unique_ptr buildDfa(LitTrie &trie, bool nocase) { rdfa->states[DEAD_STATE].next.end(), DEAD_STATE); for (auto u : vertices_range(trie)) { - auto u_state = trie[u].index + 1; - DEBUG_PRINTF("state %zu\n", u_state); + auto u_state = state_ids.at(u); + DEBUG_PRINTF("state %u\n", u_state); assert(u_state < rdfa->states.size()); auto &ds = rdfa->states[u_state]; ds.reports = trie[u].reports; @@ -577,7 +593,7 @@ unique_ptr buildDfa(LitTrie &trie, bool nocase) { ds.daddy = DEAD_STATE; } else { assert(contains(failure_map, u)); - ds.daddy = trie[failure_map.at(u)].index + 1; + ds.daddy = state_ids.at(failure_map.at(u)); } // By default, transition back to the root. @@ -590,10 +606,10 @@ unique_ptr buildDfa(LitTrie &trie, bool nocase) { if (v == trie.root) { continue; } - auto v_state = trie[v].index + 1; + auto v_state = state_ids.at(v); assert((u16)trie[v].c < alpha.size()); u16 sym = alpha[trie[v].c]; - DEBUG_PRINTF("edge to %zu on 0x%02x (sym %u)\n", v_state, + DEBUG_PRINTF("edge to %u on 0x%02x (sym %u)\n", v_state, trie[v].c, sym); assert(sym < ds.next.size()); assert(ds.next[sym] == root_state); From 42fca877a75626b2b7d5e1e9ce2a1b50865af190 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 11 Apr 2017 16:13:43 +1000 Subject: [PATCH 263/326] smallwrite: prune trie when rose is high quality --- src/smallwrite/smallwrite_build.cpp | 103 ++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index 10079a98..fe113525 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -471,6 +471,97 @@ void buildAutomaton(LitTrie &trie, } } +static +vector findDistFromRoot(const LitTrie &trie) { + vector dist(num_vertices(trie), UINT32_MAX); + dist[trie[trie.root].index] = 0; + + // BFS to find dist from root. + breadth_first_search( + trie, trie.root, + visitor(make_bfs_visitor(record_distances( + make_iterator_property_map(dist.begin(), + get(&LitTrieVertexProps::index, trie)), + boost::on_tree_edge())))); + + return dist; +} + +static +vector findDistToAccept(const LitTrie &trie) { + vector dist(num_vertices(trie), UINT32_MAX); + + // Start with all reporting vertices. + deque q; + for (auto v : vertices_range(trie)) { + if (!trie[v].reports.empty()) { + q.push_back(v); + dist[trie[v].index] = 0; + } + } + + // Custom BFS, since we have a pile of sources. + while (!q.empty()) { + auto v = q.front(); + q.pop_front(); + u32 d = dist[trie[v].index]; + + for (auto u : inv_adjacent_vertices_range(v, trie)) { + auto &u_dist = dist[trie[u].index]; + if (u_dist == UINT32_MAX) { + q.push_back(u); + u_dist = d + 1; + } + } + } + + return dist; +} + +/** + * \brief Prune all vertices from the trie that do not lie on a path from root + * to accept of length <= max_depth. + */ +static +void pruneTrie(LitTrie &trie, u32 max_depth) { + DEBUG_PRINTF("pruning trie to %u\n", max_depth); + + auto dist_from_root = findDistFromRoot(trie); + auto dist_to_accept = findDistToAccept(trie); + + vector dead; + for (auto v : vertices_range(trie)) { + if (v == trie.root) { + continue; + } + auto v_index = trie[v].index; + DEBUG_PRINTF("vertex %zu: from_start=%u, to_accept=%u\n", trie[v].index, + dist_from_root[v_index], dist_to_accept[v_index]); + assert(dist_from_root[v_index] != UINT32_MAX); + assert(dist_to_accept[v_index] != UINT32_MAX); + u32 min_path_len = dist_from_root[v_index] + dist_to_accept[v_index]; + if (min_path_len > max_depth) { + DEBUG_PRINTF("pruning vertex %zu (min path len %u)\n", + trie[v].index, min_path_len); + clear_vertex(v, trie); + dead.push_back(v); + } + } + + if (dead.empty()) { + return; + } + + for (auto v : dead) { + remove_vertex(v, trie); + } + + DEBUG_PRINTF("%zu vertices remain\n", num_vertices(trie)); + + renumber_edges(trie); + renumber_vertices(trie); +} + static vector getAlphabet(const LitTrie &trie, bool nocase) { vector esets = {CharReach::dot()}; @@ -812,6 +903,18 @@ bytecode_ptr SmallWriteBuildImpl::build(u32 roseQuality) { return nullptr; } + // We happen to know that if the rose is high quality, we're going to limit + // depth further. + if (roseQuality) { + u32 max_depth = cc.grey.smallWriteLargestBufferBad; + if (!is_empty(lit_trie)) { + pruneTrie(lit_trie, max_depth); + } + if (!is_empty(lit_trie_nocase)) { + pruneTrie(lit_trie_nocase, max_depth); + } + } + if (!determiniseLiterals()) { DEBUG_PRINTF("some literal could not be made into a smallwrite dfa\n"); return nullptr; From c6f5275accc9154a25fab9c48762527343cf9851 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 12 Apr 2017 11:24:30 +1000 Subject: [PATCH 264/326] mcclellancompile: docs for main compile function --- src/nfa/mcclellancompile.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/nfa/mcclellancompile.h b/src/nfa/mcclellancompile.h index a176db28..baf72d9c 100644 --- a/src/nfa/mcclellancompile.h +++ b/src/nfa/mcclellancompile.h @@ -66,8 +66,20 @@ private: raw_dfa &rdfa; }; -/* accel_states: (optional) on success, is filled with the set of accelerable - * states */ +/** + * \brief Construct an implementation DFA. + * + * \param raw the raw dfa to construct from + * \param cc compile context + * \param rm report manger + * \param only_accel_init if true, only the init states will be examined for + * acceleration opportunities + * \param trust_daddy_states if true, trust the daddy state set in the raw dfa + * rather than conducting a search for a better daddy (for Sherman + * states) + * \param accel_states (optional) success, is filled with the set of + * accelerable states + */ bytecode_ptr mcclellanCompile(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm, bool only_accel_init, From d809e73d45a9cae3d9d74347f18a6266a22b9fe0 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 19 Apr 2017 15:45:35 +1000 Subject: [PATCH 265/326] smallwrite: cope when everything has been pruned --- src/smallwrite/smallwrite_build.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index fe113525..4acfc713 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -920,6 +920,11 @@ bytecode_ptr SmallWriteBuildImpl::build(u32 roseQuality) { return nullptr; } + if (!rdfa) { + DEBUG_PRINTF("no dfa, pruned everything away\n"); + return nullptr; + } + DEBUG_PRINTF("building rdfa %p\n", rdfa.get()); u32 start_offset; From 0626a30a6a6cfba751795b324916755eaefb1ec0 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 18 Apr 2017 11:48:17 +1000 Subject: [PATCH 266/326] Make the build wrapper less fragile This script was failing for certain paths (dots, whitespace), and using sed to replace parts of the command line was overkill. Do less mangling, and quote command line args. Fixes 01org/hyperscan#51 --- cmake/build_wrapper.sh | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/cmake/build_wrapper.sh b/cmake/build_wrapper.sh index 756d70e8..70392229 100755 --- a/cmake/build_wrapper.sh +++ b/cmake/build_wrapper.sh @@ -8,21 +8,18 @@ cleanup () { PREFIX=$1 KEEPSYMS_IN=$2 shift 2 -BUILD=$@ -OUT=$(echo $BUILD | sed 's/.* -o \(.*\.o\).*/\1/') +# $@ contains the actual build command +OUT=$(echo "$@" | sed 's/.* -o \(.*\.o\).*/\1/') trap cleanup INT QUIT EXIT SYMSFILE=$(mktemp --tmpdir ${PREFIX}_rename.syms.XXXXX) KEEPSYMS=$(mktemp --tmpdir keep.syms.XXXXX) -# grab the command without the target obj or src file flags -# we don't just call gcc directly as there may be flags modifying the arch -CC_CMD=$(echo $BUILD | sed 's/ -o .*\.o//;s/ -c //;s/ .[^ ]*\.c//;') -# find me a libc -LIBC_SO=$(${CC_CMD} --print-file-name=libc.so.6) +# find the libc used by gcc +LIBC_SO=$("$@" --print-file-name=libc.so.6) cp ${KEEPSYMS_IN} ${KEEPSYMS} # get all symbols from libc and turn them into patterns nm -f p -g -D ${LIBC_SO} | sed -s 's/\([^ ]*\).*/^\1$/' >> ${KEEPSYMS} # build the object -${BUILD} +"$@" # rename the symbols in the object nm -f p -g ${OUT} | cut -f1 -d' ' | grep -v -f ${KEEPSYMS} | sed -e "s/\(.*\)/\1\ ${PREFIX}_\1/" >> ${SYMSFILE} if test -s ${SYMSFILE} From cd424bdb45409f93d8ac2013f0ee415e465d16a4 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Fri, 21 Apr 2017 15:46:13 +1000 Subject: [PATCH 267/326] minor clean up of rose_build_bytecode.cpp - consistently name functions creating programs as makeFoo() - replace writeLiteralProgram() with makeFragmentProgram() - make applyFinalSpecialisation() part of writeProgram(bc, prog) - seperate users who want to make a program for a single literal and fragments --- src/rose/rose_build_bytecode.cpp | 108 +++++++++++++++---------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 1155b50a..620ba3df 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -1484,7 +1484,7 @@ void buildExclusiveInfixes(RoseBuildImpl &build, build_context &bc, info.queue = qif.get_queue(); exclusive_info.push_back(move(info)); } - updateExclusiveInfixProperties(build, exclusive_info, bc.leftfix_info, + updateExclusiveInfixProperties(build, exclusive_info, bc.leftfix_info, no_retrigger_queues); buildInfixContainer(g, bc, exclusive_info); } @@ -2551,6 +2551,8 @@ u32 writeProgram(build_context &bc, RoseProgram &&program) { return 0; } + applyFinalSpecialisation(program); + auto it = bc.program_cache.find(program); if (it != end(bc.program_cache)) { DEBUG_PRINTF("reusing cached program at %u\n", it->second); @@ -4248,7 +4250,6 @@ u32 writeBoundaryProgram(const RoseBuildImpl &build, build_context &bc, for (const auto &id : reports) { makeReport(build, id, has_som, program); } - applyFinalSpecialisation(program); return writeProgram(bc, move(program)); } @@ -4758,10 +4759,10 @@ bool hasDelayedLiteral(const RoseBuildImpl &build, } static -RoseProgram buildLitInitialProgram(const RoseBuildImpl &build, - build_context &bc, ProgramBuild &prog_build, - u32 lit_id, - const vector &lit_edges) { +RoseProgram makeLitInitialProgram(const RoseBuildImpl &build, + build_context &bc, ProgramBuild &prog_build, + u32 lit_id, + const vector &lit_edges) { RoseProgram program; // Check long literal info. @@ -4790,10 +4791,10 @@ RoseProgram buildLitInitialProgram(const RoseBuildImpl &build, } static -RoseProgram buildLiteralProgram(const RoseBuildImpl &build, build_context &bc, - ProgramBuild &prog_build, u32 lit_id, - const vector &lit_edges, - bool is_anchored_program) { +RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc, + ProgramBuild &prog_build, u32 lit_id, + const vector &lit_edges, + bool is_anchored_program) { const auto &g = build.g; DEBUG_PRINTF("lit id=%u, %zu lit edges\n", lit_id, lit_edges.size()); @@ -4851,13 +4852,32 @@ RoseProgram buildLiteralProgram(const RoseBuildImpl &build, build_context &bc, // Construct initial program up front, as its early checks must be able // to jump to end and terminate processing for this literal. - auto lit_program = - buildLitInitialProgram(build, bc, prog_build, lit_id, lit_edges); + auto lit_program = makeLitInitialProgram(build, bc, prog_build, lit_id, + lit_edges); lit_program.add_before_end(move(program)); return lit_program; } +static +RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc, + ProgramBuild &prog_build, u32 lit_id, + const map> &lit_edge_map, + bool is_anchored_program) { + const vector no_edges; + + DEBUG_PRINTF("lit_id=%u\n", lit_id); + const vector *edges_ptr; + if (contains(lit_edge_map, lit_id)) { + edges_ptr = &lit_edge_map.at(lit_id); + } else { + edges_ptr = &no_edges; + } + + return makeLiteralProgram(build, bc, prog_build, lit_id, *edges_ptr, + is_anchored_program); +} + /** * \brief Consumes list of program blocks, checks them for duplicates and then * concatenates them into one program. @@ -4887,10 +4907,10 @@ RoseProgram assembleProgramBlocks(vector &&blocks) { } static -u32 writeLiteralProgram(const RoseBuildImpl &build, build_context &bc, - ProgramBuild &prog_build, const vector &lit_ids, - const map> &lit_edge_map, - bool is_anchored_program) { +RoseProgram makeFragmentProgram(const RoseBuildImpl &build, build_context &bc, + ProgramBuild &prog_build, + const vector &lit_ids, + const map> &lit_edge_map) { assert(!lit_ids.empty()); // If we have multiple literals and any of them squash groups, we will have @@ -4907,18 +4927,9 @@ u32 writeLiteralProgram(const RoseBuildImpl &build, build_context &bc, vector blocks; - const vector no_edges; - for (const auto &lit_id : lit_ids) { - DEBUG_PRINTF("lit_id=%u\n", lit_id); - const vector *edges_ptr; - if (contains(lit_edge_map, lit_id)) { - edges_ptr = &lit_edge_map.at(lit_id); - } else { - edges_ptr = &no_edges; - } - auto prog = buildLiteralProgram(build, bc, prog_build, lit_id, - *edges_ptr, is_anchored_program); + auto prog = makeLiteralProgram(build, bc, prog_build, lit_id, + lit_edge_map, false); if (needs_clear_work) { RoseProgram clear_block; clear_block.add_before_end(make_unique()); @@ -4927,13 +4938,7 @@ u32 writeLiteralProgram(const RoseBuildImpl &build, build_context &bc, blocks.push_back(move(prog)); } - auto program = assembleProgramBlocks(move(blocks)); - - if (program.empty()) { - return 0; - } - applyFinalSpecialisation(program); - return writeProgram(bc, move(program)); + return assembleProgramBlocks(move(blocks)); } static @@ -4965,10 +4970,6 @@ u32 writeDelayRebuildProgram(const RoseBuildImpl &build, build_context &bc, auto program = assembleProgramBlocks(move(blocks)); - if (program.empty()) { - return 0; - } - applyFinalSpecialisation(program); return writeProgram(bc, move(program)); } @@ -5107,9 +5108,10 @@ void buildLiteralPrograms(const RoseBuildImpl &build, DEBUG_PRINTF("frag_id=%u, lit_ids=[%s]\n", frag.fragment_id, as_string_list(frag.lit_ids).c_str()); - frag.lit_program_offset - = writeLiteralProgram(build, bc, prog_build, frag.lit_ids, - lit_edge_map, false); + auto lit_prog = makeFragmentProgram(build, bc, prog_build, frag.lit_ids, + lit_edge_map); + frag.lit_program_offset = writeProgram(bc, move(lit_prog)); + frag.delay_program_offset = writeDelayRebuildProgram(build, bc, prog_build, frag.lit_ids); } @@ -5137,9 +5139,10 @@ pair writeDelayPrograms(const RoseBuildImpl &build, for (const auto &delayed_lit_id : info.delayed_ids) { DEBUG_PRINTF("lit id %u delay id %u\n", lit_id, delayed_lit_id); - u32 offset = writeLiteralProgram(build, bc, prog_build, - {delayed_lit_id}, lit_edge_map, - false); + auto prog = makeLiteralProgram(build, bc, prog_build, + delayed_lit_id, lit_edge_map, + false); + u32 offset = writeProgram(bc, move(prog)); u32 delay_id; auto it = cache.find(offset); @@ -5197,8 +5200,9 @@ pair writeAnchoredPrograms(const RoseBuildImpl &build, continue; } - u32 offset = writeLiteralProgram(build, bc, prog_build, {lit_id}, - lit_edge_map, true); + auto prog = makeLiteralProgram(build, bc, prog_build, lit_id, + lit_edge_map, true); + u32 offset = writeProgram(bc, move(prog)); DEBUG_PRINTF("lit_id=%u -> anch prog at %u\n", lit_id, offset); u32 anch_id; @@ -5260,7 +5264,6 @@ pair buildReportPrograms(const RoseBuildImpl &build, const bool has_som = false; makeCatchupMpv(build, bc.needs_mpv_catchup, id, program); makeReport(build, id, has_som, program); - applyFinalSpecialisation(program); u32 offset = writeProgram(bc, move(program)); programs.push_back(offset); build.rm.setProgramOffset(id, offset); @@ -5404,8 +5407,10 @@ void addEodEventProgram(const RoseBuildImpl &build, build_context &bc, tie(g[source(b, g)].index, g[target(b, g)].index); }); - program.add_block(buildLiteralProgram( - build, bc, prog_build, build.eod_event_literal_id, edge_list, false)); + auto block = makeLiteralProgram(build, bc, prog_build, + build.eod_event_literal_id, edge_list, + false); + program.add_block(move(block)); } static @@ -5453,11 +5458,6 @@ u32 writeEodProgram(const RoseBuildImpl &build, build_context &bc, addEodAnchorProgram(build, bc, prog_build, true, program); addSuffixesEodProgram(build, program); - if (program.empty()) { - return 0; - } - - applyFinalSpecialisation(program); return writeProgram(bc, move(program)); } From a810bac8f7dffb960acafa2243319375878bbcb6 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Tue, 11 Apr 2017 10:50:16 +1000 Subject: [PATCH 268/326] be more selective about generating CLEAR_WORK_DONE instructions --- src/rose/rose_build_bytecode.cpp | 39 ++++++++++++++------------------ src/rose/rose_build_program.cpp | 9 ++++++++ src/rose/rose_build_program.h | 3 +++ 3 files changed, 29 insertions(+), 22 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 620ba3df..bd7481ab 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4879,8 +4879,11 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc, } /** - * \brief Consumes list of program blocks, checks them for duplicates and then - * concatenates them into one program. + * \brief Consumes list of program blocks corresponding to different literals, + * checks them for duplicates and then concatenates them into one program. + * + * Note: if a block will squash groups, a CLEAR_WORK_DONE instruction is + * inserted to prevent the work_done flag being contaminated by early blocks. */ static RoseProgram assembleProgramBlocks(vector &&blocks) { @@ -4899,8 +4902,18 @@ RoseProgram assembleProgramBlocks(vector &&blocks) { DEBUG_PRINTF("%zu blocks after dedupe\n", blocks.size()); - for (auto &prog : blocks) { - program.add_block(move(prog)); + for (auto &block : blocks) { + /* If we have multiple blocks from different literals and any of them + * squash groups, we will have to add a CLEAR_WORK_DONE instruction to + * each literal program block to clear the work_done flags so that it's + * only set if a state has been. */ + if (!program.empty() && reads_work_done_flag(block)) { + RoseProgram clear_block; + clear_block.add_before_end(make_unique()); + program.add_block(move(clear_block)); + } + + program.add_block(move(block)); } return program; @@ -4913,28 +4926,10 @@ RoseProgram makeFragmentProgram(const RoseBuildImpl &build, build_context &bc, const map> &lit_edge_map) { assert(!lit_ids.empty()); - // If we have multiple literals and any of them squash groups, we will have - // to add a CLEAR_WORK_DONE instruction to each literal program block to - // clear the work_done flags so that it's only set if a state has been - // switched on for that literal. - - // Note that we add it to every lit program, as they may be - // reordered/uniquified by assembleProgramBlocks() above. - const bool needs_clear_work = lit_ids.size() > 1 && - any_of_in(lit_ids, [&](u32 lit_id) { - return build.literal_info.at(lit_id).squash_group; - }); - vector blocks; - for (const auto &lit_id : lit_ids) { auto prog = makeLiteralProgram(build, bc, prog_build, lit_id, lit_edge_map, false); - if (needs_clear_work) { - RoseProgram clear_block; - clear_block.add_before_end(make_unique()); - prog.add_block(move(clear_block)); - } blocks.push_back(move(prog)); } diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index bca867f0..cd9b79c8 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -681,4 +681,13 @@ bool RoseProgramEquivalence::operator()(const RoseProgram &prog1, return std::equal(prog1.begin(), prog1.end(), prog2.begin(), is_equiv); } +bool reads_work_done_flag(const RoseProgram &prog) { + for (const auto &ri : prog) { + if (dynamic_cast(ri.get())) { + return true; + } + } + return false; +} + } // namespace ue2 diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index 06233231..d0c67382 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -2348,6 +2348,9 @@ public: bool operator()(const RoseProgram &prog1, const RoseProgram &prog2) const; }; +/** Returns true if the program may read the the interpreter's work_done flag */ +bool reads_work_done_flag(const RoseProgram &prog); + } // namespace ue2 #endif // ROSE_BUILD_PROGRAM_H From 88fd95e38a5c669d0c67231a920429ed2d353e38 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 24 Apr 2017 09:51:58 +1000 Subject: [PATCH 269/326] rose: minor clean up of catchup - anchored dfa do not mean that catchup is required - remove needsCatchup from rose bytecode as catchup is based on interpreter --- src/rose/program_runtime.h | 14 +++------ src/rose/rose_build_bytecode.cpp | 51 ++++++++++++++++---------------- src/rose/rose_build_dump.cpp | 1 - src/rose/rose_internal.h | 1 - 4 files changed, 29 insertions(+), 38 deletions(-) diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 10a87c8b..090913ae 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -484,7 +484,6 @@ static rose_inline hwlmcb_rv_t roseReport(const struct RoseEngine *t, struct hs_scratch *scratch, u64a end, ReportID onmatch, s32 offset_adjust, u32 ekey) { - assert(!t->needsCatchup || end == scratch->tctxt.minMatchOffset); DEBUG_PRINTF("firing callback onmatch=%u, end=%llu\n", onmatch, end); updateLastMatchOffset(&scratch->tctxt, end); @@ -518,13 +517,11 @@ hwlmcb_rv_t roseCatchUpAndHandleChainMatch(const struct RoseEngine *t, } static rose_inline -void roseHandleSom(UNUSED const struct RoseEngine *t, - struct hs_scratch *scratch, const struct som_operation *sr, +void roseHandleSom(struct hs_scratch *scratch, const struct som_operation *sr, u64a end) { DEBUG_PRINTF("end=%llu, minMatchOffset=%llu\n", end, scratch->tctxt.minMatchOffset); - assert(!t->needsCatchup || end == scratch->tctxt.minMatchOffset); updateLastMatchOffset(&scratch->tctxt, end); handleSomInternal(scratch, sr, end); } @@ -533,7 +530,6 @@ static rose_inline hwlmcb_rv_t roseReportSom(const struct RoseEngine *t, struct hs_scratch *scratch, u64a start, u64a end, ReportID onmatch, s32 offset_adjust, u32 ekey) { - assert(!t->needsCatchup || end == scratch->tctxt.minMatchOffset); DEBUG_PRINTF("firing som callback onmatch=%u, start=%llu, end=%llu\n", onmatch, start, end); updateLastMatchOffset(&scratch->tctxt, end); @@ -553,13 +549,11 @@ hwlmcb_rv_t roseReportSom(const struct RoseEngine *t, } static rose_inline -void roseHandleSomSom(UNUSED const struct RoseEngine *t, - struct hs_scratch *scratch, +void roseHandleSomSom(struct hs_scratch *scratch, const struct som_operation *sr, u64a start, u64a end) { DEBUG_PRINTF("start=%llu, end=%llu, minMatchOffset=%llu\n", start, end, scratch->tctxt.minMatchOffset); - assert(!t->needsCatchup || end == scratch->tctxt.minMatchOffset); updateLastMatchOffset(&scratch->tctxt, end); setSomFromSomAware(scratch, sr, start, end); } @@ -2211,14 +2205,14 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, PROGRAM_CASE(REPORT_SOM_INT) { updateSeqPoint(tctxt, end, from_mpv); - roseHandleSom(t, scratch, &ri->som, end); + roseHandleSom(scratch, &ri->som, end); work_done = 1; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_SOM_AWARE) { updateSeqPoint(tctxt, end, from_mpv); - roseHandleSomSom(t, scratch, &ri->som, som, end); + roseHandleSomSom(scratch, &ri->som, som, end); work_done = 1; } PROGRAM_NEXT_INSTRUCTION diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index bd7481ab..a53cc534 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -231,9 +231,9 @@ struct build_context : noncopyable { * RoseEngine. */ RoseEngineBlob engine_blob; - /** \brief True if reports need CATCH_UP instructions, to catch up anchored - * matches, suffixes, outfixes etc. */ - bool needs_catchup = false; + /** \brief True if reports need CATCH_UP instructions to catch up suffixes, + * outfixes etc. */ + bool needs_catchup; /** \brief True if this Rose engine has an MPV engine. */ bool needs_mpv_catchup = false; @@ -327,38 +327,39 @@ u32 countRosePrefixes(const vector &roses) { } /** - * \brief True if this Rose engine needs to run a catch up whenever a report is - * generated. + * \brief True if this Rose engine needs to run a catch up whenever a literal + * report is generated. * * Catch up is necessary if there are output-exposed engines (suffixes, - * outfixes) or an anchored table (anchored literals, acyclic DFAs). + * outfixes). */ static -bool needsCatchup(const RoseBuildImpl &build, - const vector &anchored_dfas) { +bool needsCatchup(const RoseBuildImpl &build) { + /* Note: we could be more selective about when we need to generate catch up + * instructions rather than just a boolean yes/no - for instance, if we know + * that a role can only match before the point that an outfix/suffix could + * match, we do not strictly need a catchup instruction. + * + * However, this would add a certain amount of complexity to the + * catchup logic and would likely have limited applicability - how many + * reporting roles have a fixed max offset and how much time is spent on + * catchup for these cases? + */ + if (!build.outfixes.empty()) { + /* TODO: check that they have non-eod reports */ DEBUG_PRINTF("has outfixes\n"); return true; } - if (!anchored_dfas.empty()) { - DEBUG_PRINTF("has anchored dfas\n"); - return true; - } const RoseGraph &g = build.g; for (auto v : vertices_range(g)) { - if (build.root == v) { - continue; - } - if (build.anchored_root == v) { - continue; - } if (g[v].suffix) { + /* TODO: check that they have non-eod reports */ DEBUG_PRINTF("vertex %zu has suffix\n", g[v].index); return true; } - } DEBUG_PRINTF("no need for catch-up on report\n"); @@ -4794,7 +4795,7 @@ static RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc, ProgramBuild &prog_build, u32 lit_id, const vector &lit_edges, - bool is_anchored_program) { + bool is_anchored_replay_program) { const auto &g = build.g; DEBUG_PRINTF("lit id=%u, %zu lit edges\n", lit_id, lit_edges.size()); @@ -4844,7 +4845,7 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc, makeGroupSquashInstruction(build, lit_id, root_block); // Literal may be anchored and need to be recorded. - if (!is_anchored_program) { + if (!is_anchored_replay_program) { makeRecordAnchoredInstruction(build, prog_build, lit_id, root_block); } @@ -4863,7 +4864,7 @@ static RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc, ProgramBuild &prog_build, u32 lit_id, const map> &lit_edge_map, - bool is_anchored_program) { + bool is_anchored_replay_program) { const vector no_edges; DEBUG_PRINTF("lit_id=%u\n", lit_id); @@ -4875,7 +4876,7 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc, } return makeLiteralProgram(build, bc, prog_build, lit_id, *edges_ptr, - is_anchored_program); + is_anchored_replay_program); } /** @@ -5726,7 +5727,7 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 floatingMinLiteralMatchOffset = findMinFloatingLiteralMatch(*this, anchored_dfas); bc.longLitLengthThreshold = longLitLengthThreshold; - bc.needs_catchup = needsCatchup(*this, anchored_dfas); + bc.needs_catchup = needsCatchup(*this); recordResources(bc.resources, *this, fragments); if (!anchored_dfas.empty()) { bc.resources.has_anchored = true; @@ -5891,8 +5892,6 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { proto.somLocationCount = ssm.numSomSlots(); proto.somLocationFatbitSize = fatbit_size(proto.somLocationCount); - proto.needsCatchup = bc.needs_catchup ? 1 : 0; - proto.runtimeImpl = pickRuntimeImpl(*this, bc.resources, proto.outfixEndQueue); proto.mpvTriggeredByLeaf = anyEndfixMpvTriggers(*this); diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index a52830b0..73ed830e 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -2112,7 +2112,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U8(t, canExhaust); DUMP_U8(t, hasSom); DUMP_U8(t, somHorizon); - DUMP_U8(t, needsCatchup); DUMP_U32(t, mode); DUMP_U32(t, historyRequired); DUMP_U32(t, ekeyCount); diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 06a9b069..777e7234 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -304,7 +304,6 @@ struct RoseEngine { u8 hasSom; /**< has at least one pattern which tracks SOM. */ u8 somHorizon; /**< width in bytes of SOM offset storage (governed by SOM precision) */ - u8 needsCatchup; /** catch up needs to be run on every report. */ u32 mode; /**< scanning mode, one of HS_MODE_{BLOCK,STREAM,VECTORED} */ u32 historyRequired; /**< max amount of history required for streaming */ u32 ekeyCount; /**< number of exhaustion keys */ From e24c38a85c499c8af88a7ee3cdeed1ead63d65c8 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 24 Apr 2017 10:22:44 +1000 Subject: [PATCH 270/326] rose: minor improvements to avoid unneeded program instructions - strip out lonely check handled instructions - avoid producing programs for empty ghost roles --- src/rose/rose_build_bytecode.cpp | 46 +++++++++++++++++++------------- src/rose/rose_build_program.cpp | 27 +++++++++++++++++++ src/rose/rose_build_program.h | 9 +++++-- 3 files changed, 62 insertions(+), 20 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index a53cc534..0f41da71 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4163,9 +4163,10 @@ void makeRoleEagerEodReports(const RoseBuildImpl &build, build_context &bc, program.add_before_end(move(eod_program)); } +/* Makes a program for a role/vertex given a specfic pred/in_edge. */ static -RoseProgram makeProgram(const RoseBuildImpl &build, build_context &bc, - ProgramBuild &prog_build, const RoseEdge &e) { +RoseProgram makeRoleProgram(const RoseBuildImpl &build, build_context &bc, + ProgramBuild &prog_build, const RoseEdge &e) { const RoseGraph &g = build.g; auto v = target(e, g); @@ -4187,10 +4188,11 @@ RoseProgram makeProgram(const RoseBuildImpl &build, build_context &bc, makeRoleCheckBounds(build, v, e, program); } - // This program may be triggered by different predecessors, with different - // offset bounds. We must ensure we put this check/set operation after the - // bounds check to deal with this case. + // This role program may be triggered by different predecessors, with + // different offset bounds. We must ensure we put this check/set operation + // after the bounds check to deal with this case. if (in_degree(v, g) > 1) { + assert(!build.isRootSuccessor(v)); makeRoleCheckNotHandled(prog_build, v, program); } @@ -4231,6 +4233,12 @@ RoseProgram makeProgram(const RoseBuildImpl &build, build_context &bc, makeRoleEagerEodReports(build, bc, v, eod_block); effects_block.add_block(move(eod_block)); + /* a 'ghost role' may do nothing if we know that its groups are already set + * - in this case we can avoid producing a program at all. */ + if (effects_block.empty()) { + return {}; + } + program.add_before_end(move(effects_block)); return program; } @@ -4414,6 +4422,7 @@ void addPredBlockSingle(u32 pred_state, RoseProgram &pred_block, RoseProgram &program) { // Prepend an instruction to check the pred state is on. const auto *end_inst = pred_block.end_instruction(); + assert(!pred_block.empty()); pred_block.insert(begin(pred_block), make_unique(pred_state, end_inst)); program.add_block(move(pred_block)); @@ -4434,6 +4443,12 @@ void addPredBlocksAny(map &pred_blocks, u32 num_states, sparse_program.add_before_end(move(ri)); RoseProgram &block = pred_blocks.begin()->second; + assert(!block.empty()); + + /* we no longer need the check handled instruction as all the pred-role + * blocks are being collapsed together */ + stripCheckHandledInstruction(block); + sparse_program.add_before_end(move(block)); program.add_block(move(sparse_program)); } @@ -4491,15 +4506,6 @@ void addPredBlocksMulti(map &pred_blocks, static void addPredBlocks(map &pred_blocks, u32 num_states, RoseProgram &program) { - // Trim empty blocks, if any exist. - for (auto it = pred_blocks.begin(); it != pred_blocks.end();) { - if (it->second.empty()) { - it = pred_blocks.erase(it); - } else { - ++it; - } - } - const size_t num_preds = pred_blocks.size(); if (num_preds == 0) { return; @@ -4815,8 +4821,10 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc, g[target(e, g)].index); assert(contains(bc.roleStateIndices, u)); u32 pred_state = bc.roleStateIndices.at(u); - pred_blocks[pred_state].add_block( - makeProgram(build, bc, prog_build, e)); + auto role_prog = makeRoleProgram(build, bc, prog_build, e); + if (!role_prog.empty()) { + pred_blocks[pred_state].add_block(move(role_prog)); + } } // Add blocks to deal with non-root edges (triggered by sparse iterator or @@ -4831,7 +4839,7 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc, } DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].index, g[target(e, g)].index); - program.add_block(makeProgram(build, bc, prog_build, e)); + program.add_block(makeRoleProgram(build, bc, prog_build, e)); } if (lit_id == build.eod_event_literal_id) { @@ -4872,6 +4880,7 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc, if (contains(lit_edge_map, lit_id)) { edges_ptr = &lit_edge_map.at(lit_id); } else { + /* literal may happen only in a delay context */ edges_ptr = &no_edges; } @@ -5205,7 +5214,8 @@ pair writeAnchoredPrograms(const RoseBuildImpl &build, auto it = cache.find(offset); if (it != end(cache)) { anch_id = it->second; - DEBUG_PRINTF("reusing anch_id %u for offset %u\n", anch_id, offset); + DEBUG_PRINTF("reusing anch_id %u for offset %u\n", anch_id, + offset); } else { anch_id = verify_u32(programs.size()); programs.push_back(offset); diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index cd9b79c8..a659f22e 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -639,6 +639,11 @@ OffsetMap makeOffsetMap(const RoseProgram &program, u32 *total_len) { return offset_map; } +RoseProgram::iterator RoseProgram::erase(RoseProgram::iterator first, + RoseProgram::iterator last) { + return prog.erase(first, last); +} + bytecode_ptr writeProgram(RoseEngineBlob &blob, const RoseProgram &program) { u32 total_len = 0; @@ -681,6 +686,28 @@ bool RoseProgramEquivalence::operator()(const RoseProgram &prog1, return std::equal(prog1.begin(), prog1.end(), prog2.begin(), is_equiv); } +void stripCheckHandledInstruction(RoseProgram &prog) { + for (auto it = prog.begin(); it != prog.end();) { + auto ins = dynamic_cast(it->get()); + if (!ins) { + ++it; + continue; + } + + auto next_it = next(it); + assert(next_it != prog.end()); /* there should always be an end ins */ + auto next_ins = next_it->get(); + + /* update all earlier instructions which point to ins to instead point + * to the next instruction. Only need to look at earlier as we only ever + * jump forward. */ + RoseProgram::update_targets(prog.begin(), it, ins, next_ins); + + /* remove check handled instruction */ + it = prog.erase(it, next_it); + } +} + bool reads_work_done_flag(const RoseProgram &prog) { for (const auto &ri : prog) { if (dynamic_cast(ri.get())) { diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index d0c67382..19b9f90a 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -2219,7 +2219,6 @@ public: return prog.back().get(); } -private: static void update_targets(iterator it, iterator it_end, const RoseInstruction *old_target, const RoseInstruction *new_target) { @@ -2231,7 +2230,6 @@ private: } } -public: iterator insert(iterator it, std::unique_ptr ri) { assert(!prog.empty()); assert(it != end()); @@ -2267,6 +2265,10 @@ public: return it; } + /* Note: takes iterator rather than const_iterator to support toolchains + * with pre-C++11 standard libraries (i.e., gcc-4.8). */ + iterator erase(iterator first, iterator last); + /** * \brief Adds this instruction to the program just before the terminating * ROSE_INSTR_END. @@ -2348,6 +2350,9 @@ public: bool operator()(const RoseProgram &prog1, const RoseProgram &prog2) const; }; +/* Removes any CHECK_HANDLED instructions from the given program */ +void stripCheckHandledInstruction(RoseProgram &prog); + /** Returns true if the program may read the the interpreter's work_done flag */ bool reads_work_done_flag(const RoseProgram &prog); From f74f4751894b8b1aec5bc6c0ba988eafa01a9ac6 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 26 Apr 2017 09:31:04 +1000 Subject: [PATCH 271/326] rose_program: merge RECORD_ANCHORED instruction into ANCHORED_DELAY --- src/rose/program_runtime.h | 6 +- src/rose/rose_build_bytecode.cpp | 94 ++++++++++++++++---------------- src/rose/rose_build_dump.cpp | 5 +- src/rose/rose_build_program.cpp | 8 +-- src/rose/rose_build_program.h | 45 +++++---------- src/rose/rose_program.h | 8 +-- 6 files changed, 71 insertions(+), 95 deletions(-) diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 090913ae..dac8345e 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -1892,6 +1892,8 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, DEBUG_PRINTF("delay until playback\n"); tctxt->groups |= ri->groups; work_done = 1; + recordAnchoredLiteralMatch(t, scratch, ri->anch_id, end); + assert(ri->done_jump); // must progress pc += ri->done_jump; continue; @@ -2085,8 +2087,8 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(RECORD_ANCHORED) { - recordAnchoredLiteralMatch(t, scratch, ri->id, end); + PROGRAM_CASE(DUMMY_NOP) { + assert(0); } PROGRAM_NEXT_INSTRUCTION diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 0f41da71..ae352e2e 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -3615,22 +3615,37 @@ void makeRoleCheckLeftfix(const RoseBuildImpl &build, } static -void makeRoleAnchoredDelay(const RoseBuildImpl &build, - u32 floatingMinLiteralMatchOffset, - RoseVertex v, RoseProgram &program) { - // Only relevant for roles that can be triggered by the anchored table. - if (!build.isAnchored(v)) { +void makeAnchoredLiteralDelay(const RoseBuildImpl &build, + const ProgramBuild &prog_build, u32 lit_id, + RoseProgram &program) { + // Only relevant for literals in the anchored table. + const rose_literal_id &lit = build.literals.right.at(lit_id); + if (lit.table != ROSE_ANCHORED) { return; } - // If this match cannot occur after floatingMinLiteralMatchOffset, we do - // not need this check. - if (build.g[v].max_offset <= floatingMinLiteralMatchOffset) { + // If this literal match cannot occur after floatingMinLiteralMatchOffset, + // we do not need this check. + bool all_too_early = true; + rose_group groups = 0; + + const auto &lit_vertices = build.literal_info.at(lit_id).vertices; + for (RoseVertex v : lit_vertices) { + if (build.g[v].max_offset > prog_build.floatingMinLiteralMatchOffset) { + all_too_early = false; + } + groups |= build.g[v].groups; + } + + if (all_too_early) { return; } + assert(contains(prog_build.anchored_programs, lit_id)); + u32 anch_id = prog_build.anchored_programs.at(lit_id); + const auto *end_inst = program.end_instruction(); - auto ri = make_unique(build.g[v].groups, end_inst); + auto ri = make_unique(groups, anch_id, end_inst); program.add_before_end(move(ri)); } @@ -4175,9 +4190,6 @@ RoseProgram makeRoleProgram(const RoseBuildImpl &build, build_context &bc, // First, add program instructions that enforce preconditions without // effects. - makeRoleAnchoredDelay(build, prog_build.floatingMinLiteralMatchOffset, v, - program); - if (onlyAtEod(build, v)) { DEBUG_PRINTF("only at eod\n"); const auto *end_inst = program.end_instruction(); @@ -4626,21 +4638,6 @@ u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) { return max_offset; } -static -void makeRecordAnchoredInstruction(const RoseBuildImpl &build, - ProgramBuild &prog_build, u32 lit_id, - RoseProgram &program) { - if (build.literals.right.at(lit_id).table != ROSE_ANCHORED) { - return; - } - if (!contains(prog_build.anchored_programs, lit_id)) { - return; - } - auto anch_id = prog_build.anchored_programs.at(lit_id); - DEBUG_PRINTF("adding RECORD_ANCHORED for anch_id=%u\n", anch_id); - program.add_before_end(make_unique(anch_id)); -} - static u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id) { const auto &lit_vertices = build.literal_info.at(lit_id).vertices; @@ -4768,8 +4765,8 @@ bool hasDelayedLiteral(const RoseBuildImpl &build, static RoseProgram makeLitInitialProgram(const RoseBuildImpl &build, build_context &bc, ProgramBuild &prog_build, - u32 lit_id, - const vector &lit_edges) { + u32 lit_id, const vector &lit_edges, + bool is_anchored_replay_program) { RoseProgram program; // Check long literal info. @@ -4794,6 +4791,11 @@ RoseProgram makeLitInitialProgram(const RoseBuildImpl &build, prog_build.floatingMinLiteralMatchOffset, program); + /* Check if we are able to deliever matches from the anchored table now */ + if (!is_anchored_replay_program) { + makeAnchoredLiteralDelay(build, prog_build, lit_id, program); + } + return program; } @@ -4806,7 +4808,13 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc, DEBUG_PRINTF("lit id=%u, %zu lit edges\n", lit_id, lit_edges.size()); - RoseProgram program; + // Construct initial program up front, as its early checks must be able + // to jump to end and terminate processing for this literal. + auto lit_program = makeLitInitialProgram(build, bc, prog_build, lit_id, + lit_edges, + is_anchored_replay_program); + + RoseProgram role_programs; // Predecessor state id -> program block. map pred_blocks; @@ -4829,7 +4837,7 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc, // Add blocks to deal with non-root edges (triggered by sparse iterator or // mmbit_isset checks). - addPredBlocks(pred_blocks, bc.roleStateIndices.size(), program); + addPredBlocks(pred_blocks, bc.roleStateIndices.size(), role_programs); // Add blocks to handle root roles. for (const auto &e : lit_edges) { @@ -4839,31 +4847,23 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc, } DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].index, g[target(e, g)].index); - program.add_block(makeRoleProgram(build, bc, prog_build, e)); + role_programs.add_block(makeRoleProgram(build, bc, prog_build, e)); } if (lit_id == build.eod_event_literal_id) { + /* Note: does not require the lit intial program */ assert(build.eod_event_literal_id != MO_INVALID_IDX); - return program; + return role_programs; } - RoseProgram root_block; + /* Instructions to run even if a role program bails out */ + RoseProgram unconditional_block; // Literal may squash groups. - makeGroupSquashInstruction(build, lit_id, root_block); + makeGroupSquashInstruction(build, lit_id, unconditional_block); - // Literal may be anchored and need to be recorded. - if (!is_anchored_replay_program) { - makeRecordAnchoredInstruction(build, prog_build, lit_id, root_block); - } - - program.add_block(move(root_block)); - - // Construct initial program up front, as its early checks must be able - // to jump to end and terminate processing for this literal. - auto lit_program = makeLitInitialProgram(build, bc, prog_build, lit_id, - lit_edges); - lit_program.add_before_end(move(program)); + role_programs.add_block(move(unconditional_block)); + lit_program.add_before_end(move(role_programs)); return lit_program; } diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 73ed830e..dfbbe116 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -890,6 +890,7 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { PROGRAM_CASE(ANCHORED_DELAY) { os << " groups 0x" << std::hex << ri->groups << std::dec << endl; + os << " anch_id " << ri->anch_id << "\n"; os << " done_jump " << offset + ri->done_jump << endl; } PROGRAM_NEXT_INSTRUCTION @@ -1097,9 +1098,7 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(RECORD_ANCHORED) { - os << " id " << ri->id << endl; - } + PROGRAM_CASE(DUMMY_NOP) {} PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CATCH_UP) {} diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index a659f22e..5cf06200 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -72,6 +72,7 @@ void RoseInstrAnchoredDelay::write(void *dest, RoseEngineBlob &blob, RoseInstrBase::write(dest, blob, offset_map); auto *inst = static_cast(dest); inst->groups = groups; + inst->anch_id = anch_id; inst->done_jump = calc_jump(offset_map, this, target); } @@ -248,13 +249,6 @@ void RoseInstrPushDelayed::write(void *dest, RoseEngineBlob &blob, inst->index = index; } -void RoseInstrRecordAnchored::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->id = id; -} - void RoseInstrSomAdjust::write(void *dest, RoseEngineBlob &blob, const OffsetMap &offset_map) const { RoseInstrBase::write(dest, blob, offset_map); diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index 19b9f90a..9c74d488 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -216,18 +216,20 @@ class RoseInstrAnchoredDelay RoseInstrAnchoredDelay> { public: rose_group groups; + u32 anch_id; const RoseInstruction *target; - RoseInstrAnchoredDelay(rose_group groups_in, + RoseInstrAnchoredDelay(rose_group groups_in, u32 anch_id_in, const RoseInstruction *target_in) - : groups(groups_in), target(target_in) {} + : groups(groups_in), anch_id(anch_id_in), target(target_in) {} bool operator==(const RoseInstrAnchoredDelay &ri) const { - return groups == ri.groups && target == ri.target; + return groups == ri.groups && anch_id == ri.anch_id + && target == ri.target; } size_t hash() const override { - return hash_all(static_cast(opcode), groups); + return hash_all(static_cast(opcode), groups, anch_id); } void write(void *dest, RoseEngineBlob &blob, @@ -235,8 +237,8 @@ public: bool equiv_to(const RoseInstrAnchoredDelay &ri, const OffsetMap &offsets, const OffsetMap &other_offsets) const { - return groups == ri.groups && - offsets.at(target) == other_offsets.at(ri.target); + return groups == ri.groups && anch_id == ri.anch_id + && offsets.at(target) == other_offsets.at(ri.target); } }; @@ -844,32 +846,6 @@ public: } }; -class RoseInstrRecordAnchored - : public RoseInstrBaseNoTargets { -public: - u32 id; - - explicit RoseInstrRecordAnchored(u32 id_in) : id(id_in) {} - - bool operator==(const RoseInstrRecordAnchored &ri) const { - return id == ri.id; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), id); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrRecordAnchored &ri, const OffsetMap &, - const OffsetMap &) const { - return id == ri.id; - } -}; - class RoseInstrCatchUp : public RoseInstrBaseTrivial { @@ -2281,6 +2257,8 @@ public: /** * \brief Adds this block to the program just before the terminating * ROSE_INSTR_END. + * + * Any existing instruction that was jumping to end continues to do so. */ void add_before_end(RoseProgram &&block) { assert(!prog.empty()); @@ -2295,6 +2273,9 @@ public: /** * \brief Append this program block, replacing our current ROSE_INSTR_END. + * + * Any existing instruction that was jumping to end, now leads to the newly + * added block. */ void add_block(RoseProgram &&block) { assert(!prog.empty()); diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index ebda679a..cdfe96ac 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -62,7 +62,7 @@ enum RoseInstructionCode { ROSE_INSTR_CHECK_INFIX, //!< Infix engine must be in accept state. ROSE_INSTR_CHECK_PREFIX, //!< Prefix engine must be in accept state. ROSE_INSTR_PUSH_DELAYED, //!< Push delayed literal matches. - ROSE_INSTR_RECORD_ANCHORED, //!< Record an anchored literal match. + ROSE_INSTR_DUMMY_NOP, //!< NOP. Should not exist in build programs. ROSE_INSTR_CATCH_UP, //!< Catch up engines, anchored matches. ROSE_INSTR_CATCH_UP_MPV, //!< Catch up the MPV. ROSE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM. @@ -188,7 +188,8 @@ struct ROSE_STRUCT_END { struct ROSE_STRUCT_ANCHORED_DELAY { u8 code; //!< From enum RoseInstructionCode. rose_group groups; //!< Bitmask. - u32 done_jump; //!< Jump forward this many bytes if successful. + u32 anch_id; //!< Program to restart after the delay. + u32 done_jump; //!< Jump forward this many bytes if we have to delay. }; struct ROSE_STRUCT_CHECK_LIT_EARLY { @@ -327,9 +328,8 @@ struct ROSE_STRUCT_PUSH_DELAYED { u32 index; // Delay literal index (relative to first delay lit). }; -struct ROSE_STRUCT_RECORD_ANCHORED { +struct ROSE_STRUCT_DUMMY_NOP { u8 code; //!< From enum RoseInstructionCode. - u32 id; //!< Literal ID. }; struct ROSE_STRUCT_CATCH_UP { From 1287b70f4b3a41c78b90dd308ded609279650b61 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 26 Apr 2017 10:38:55 +1000 Subject: [PATCH 272/326] split out instruction details to own files --- CMakeLists.txt | 2 + src/rose/rose_build_bytecode.cpp | 1 + src/rose/rose_build_instructions.cpp | 616 +++++++ src/rose/rose_build_instructions.h | 2143 +++++++++++++++++++++++++ src/rose/rose_build_program.cpp | 700 ++------ src/rose/rose_build_program.h | 2218 +------------------------- 6 files changed, 2896 insertions(+), 2784 deletions(-) create mode 100644 src/rose/rose_build_instructions.cpp create mode 100644 src/rose/rose_build_instructions.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 34405097..f03969ec 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -955,6 +955,8 @@ SET (hs_SRCS src/rose/rose_build_impl.h src/rose/rose_build_infix.cpp src/rose/rose_build_infix.h + src/rose/rose_build_instructions.cpp + src/rose/rose_build_instructions.h src/rose/rose_build_lit_accel.cpp src/rose/rose_build_lit_accel.h src/rose/rose_build_long_lit.cpp diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index ae352e2e..94927558 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -38,6 +38,7 @@ #include "rose_build_exclusive.h" #include "rose_build_groups.h" #include "rose_build_infix.h" +#include "rose_build_instructions.h" #include "rose_build_long_lit.h" #include "rose_build_lookaround.h" #include "rose_build_matchers.h" diff --git a/src/rose/rose_build_instructions.cpp b/src/rose/rose_build_instructions.cpp new file mode 100644 index 00000000..f39fbe98 --- /dev/null +++ b/src/rose/rose_build_instructions.cpp @@ -0,0 +1,616 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rose_build_instructions.h" + +#include "rose_build_engine_blob.h" +#include "util/multibit_build.h" +#include "util/verify_types.h" + +#include + +using namespace std; + +namespace ue2 { +/* Destructors to avoid weak vtables. */ + +RoseInstruction::~RoseInstruction() = default; +RoseInstrCatchUp::~RoseInstrCatchUp() = default; +RoseInstrCatchUpMpv::~RoseInstrCatchUpMpv() = default; +RoseInstrSomZero::~RoseInstrSomZero() = default; +RoseInstrSuffixesEod::~RoseInstrSuffixesEod() = default; +RoseInstrMatcherEod::~RoseInstrMatcherEod() = default; +RoseInstrEnd::~RoseInstrEnd() = default; +RoseInstrClearWorkDone::~RoseInstrClearWorkDone() = default; + +using OffsetMap = RoseInstruction::OffsetMap; + +static +u32 calc_jump(const OffsetMap &offset_map, const RoseInstruction *from, + const RoseInstruction *to) { + DEBUG_PRINTF("computing relative jump from %p to %p\n", from, to); + assert(from && contains(offset_map, from)); + assert(to && contains(offset_map, to)); + + u32 from_offset = offset_map.at(from); + u32 to_offset = offset_map.at(to); + DEBUG_PRINTF("offsets: %u -> %u\n", from_offset, to_offset); + assert(from_offset <= to_offset); + + return to_offset - from_offset; +} + +void RoseInstrAnchoredDelay::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->groups = groups; + inst->anch_id = anch_id; + inst->done_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckLitEarly::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->min_offset = min_offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckGroups::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->groups = groups; +} + +void RoseInstrCheckOnlyEod::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckBounds::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->min_bound = min_bound; + inst->max_bound = max_bound; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckNotHandled::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->key = key; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckSingleLookaround::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->offset = offset; + inst->reach_index = reach_index; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckLookaround::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->look_index = look_index; + inst->reach_index = reach_index; + inst->count = count; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMask::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->and_mask = and_mask; + inst->cmp_mask = cmp_mask; + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMask32::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(and_mask), end(and_mask), inst->and_mask); + copy(begin(cmp_mask), end(cmp_mask), inst->cmp_mask); + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckByte::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->and_mask = and_mask; + inst->cmp_mask = cmp_mask; + inst->negation = negation; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckShufti16x8::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(nib_mask), end(nib_mask), inst->nib_mask); + copy(begin(bucket_select_mask), end(bucket_select_mask), + inst->bucket_select_mask); + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckShufti32x8::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(hi_mask), end(hi_mask), inst->hi_mask); + copy(begin(lo_mask), end(lo_mask), inst->lo_mask); + copy(begin(bucket_select_mask), end(bucket_select_mask), + inst->bucket_select_mask); + + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckShufti16x16::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(hi_mask), end(hi_mask), inst->hi_mask); + copy(begin(lo_mask), end(lo_mask), inst->lo_mask); + copy(begin(bucket_select_mask), end(bucket_select_mask), + inst->bucket_select_mask); + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckShufti32x16::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(hi_mask), end(hi_mask), inst->hi_mask); + copy(begin(lo_mask), end(lo_mask), inst->lo_mask); + copy(begin(bucket_select_mask_hi), end(bucket_select_mask_hi), + inst->bucket_select_mask_hi); + copy(begin(bucket_select_mask_lo), end(bucket_select_mask_lo), + inst->bucket_select_mask_lo); + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckInfix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->queue = queue; + inst->lag = lag; + inst->report = report; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckPrefix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->queue = queue; + inst->lag = lag; + inst->report = report; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrPushDelayed::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->delay = delay; + inst->index = index; +} + +void RoseInstrSomAdjust::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->distance = distance; +} + +void RoseInstrSomLeftfix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->queue = queue; + inst->lag = lag; +} + +void RoseInstrSomFromReport::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->som = som; +} + +void RoseInstrTriggerInfix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->cancel = cancel; + inst->queue = queue; + inst->event = event; +} + +void RoseInstrTriggerSuffix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->queue = queue; + inst->event = event; +} + +void RoseInstrDedupe::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->quash_som = quash_som; + inst->dkey = dkey; + inst->offset_adjust = offset_adjust; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrDedupeSom::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->quash_som = quash_som; + inst->dkey = dkey; + inst->offset_adjust = offset_adjust; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrReportChain::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->event = event; + inst->top_squash_distance = top_squash_distance; +} + +void RoseInstrReportSomInt::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->som = som; +} + +void RoseInstrReportSomAware::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->som = som; +} + +void RoseInstrReport::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; +} + +void RoseInstrReportExhaust::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; + inst->ekey = ekey; +} + +void RoseInstrReportSom::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; +} + +void RoseInstrReportSomExhaust::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; + inst->ekey = ekey; +} + +void RoseInstrDedupeAndReport::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->quash_som = quash_som; + inst->dkey = dkey; + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrFinalReport::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; +} + +void RoseInstrCheckExhausted::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->ekey = ekey; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMinLength::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->end_adj = end_adj; + inst->min_length = min_length; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrSetState::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->index = index; +} + +void RoseInstrSetGroups::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->groups = groups; +} + +void RoseInstrSquashGroups::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->groups = groups; +} + +void RoseInstrCheckState::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->index = index; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrSparseIterBegin::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->fail_jump = calc_jump(offset_map, this, target); + + // Resolve and write the multibit sparse iterator and the jump table. + vector keys; + vector jump_offsets; + for (const auto &jump : jump_table) { + keys.push_back(jump.first); + assert(contains(offset_map, jump.second)); + jump_offsets.push_back(offset_map.at(jump.second)); + } + + auto iter = mmbBuildSparseIterator(keys, num_keys); + assert(!iter.empty()); + inst->iter_offset = blob.add_iterator(iter); + inst->jump_table = blob.add(jump_offsets.begin(), jump_offsets.end()); + + // Store offsets for corresponding SPARSE_ITER_NEXT operations. + is_written = true; + iter_offset = inst->iter_offset; + jump_table_offset = inst->jump_table; +} + +void RoseInstrSparseIterNext::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->state = state; + inst->fail_jump = calc_jump(offset_map, this, target); + + // Use the same sparse iterator and jump table as the SPARSE_ITER_BEGIN + // instruction. + assert(begin); + assert(contains(offset_map, begin)); + assert(begin->is_written); + inst->iter_offset = begin->iter_offset; + inst->jump_table = begin->jump_table_offset; +} + +void RoseInstrSparseIterAny::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->fail_jump = calc_jump(offset_map, this, target); + + // Write the multibit sparse iterator. + auto iter = mmbBuildSparseIterator(keys, num_keys); + assert(!iter.empty()); + inst->iter_offset = blob.add_iterator(iter); +} + +void RoseInstrEnginesEod::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->iter_offset = iter_offset; +} + +void RoseInstrCheckLongLit::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + assert(!literal.empty()); + inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); + inst->lit_length = verify_u32(literal.size()); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckLongLitNocase::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + assert(!literal.empty()); + inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); + inst->lit_length = verify_u32(literal.size()); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMedLit::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + assert(!literal.empty()); + inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); + inst->lit_length = verify_u32(literal.size()); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMedLitNocase::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + assert(!literal.empty()); + inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); + inst->lit_length = verify_u32(literal.size()); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrMultipathLookaround::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->look_index = look_index; + inst->reach_index = reach_index; + inst->count = count; + inst->last_start = last_start; + copy(begin(start_mask), end(start_mask), inst->start_mask); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMultipathShufti16x8::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(nib_mask), end(nib_mask), inst->nib_mask); + copy(begin(bucket_select_mask), begin(bucket_select_mask) + 16, + inst->bucket_select_mask); + copy(begin(data_select_mask), begin(data_select_mask) + 16, + inst->data_select_mask); + inst->hi_bits_mask = hi_bits_mask; + inst->lo_bits_mask = lo_bits_mask; + inst->neg_mask = neg_mask; + inst->base_offset = base_offset; + inst->last_start = last_start; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMultipathShufti32x8::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(hi_mask), begin(hi_mask) + 16, inst->hi_mask); + copy(begin(lo_mask), begin(lo_mask) + 16, inst->lo_mask); + copy(begin(bucket_select_mask), begin(bucket_select_mask) + 32, + inst->bucket_select_mask); + copy(begin(data_select_mask), begin(data_select_mask) + 32, + inst->data_select_mask); + inst->hi_bits_mask = hi_bits_mask; + inst->lo_bits_mask = lo_bits_mask; + inst->neg_mask = neg_mask; + inst->base_offset = base_offset; + inst->last_start = last_start; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMultipathShufti32x16::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(hi_mask), end(hi_mask), inst->hi_mask); + copy(begin(lo_mask), end(lo_mask), inst->lo_mask); + copy(begin(bucket_select_mask_hi), begin(bucket_select_mask_hi) + 32, + inst->bucket_select_mask_hi); + copy(begin(bucket_select_mask_lo), begin(bucket_select_mask_lo) + 32, + inst->bucket_select_mask_lo); + copy(begin(data_select_mask), begin(data_select_mask) + 32, + inst->data_select_mask); + inst->hi_bits_mask = hi_bits_mask; + inst->lo_bits_mask = lo_bits_mask; + inst->neg_mask = neg_mask; + inst->base_offset = base_offset; + inst->last_start = last_start; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMultipathShufti64::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(hi_mask), begin(hi_mask) + 16, inst->hi_mask); + copy(begin(lo_mask), begin(lo_mask) + 16, inst->lo_mask); + copy(begin(bucket_select_mask), end(bucket_select_mask), + inst->bucket_select_mask); + copy(begin(data_select_mask), end(data_select_mask), + inst->data_select_mask); + inst->hi_bits_mask = hi_bits_mask; + inst->lo_bits_mask = lo_bits_mask; + inst->neg_mask = neg_mask; + inst->base_offset = base_offset; + inst->last_start = last_start; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +} diff --git a/src/rose/rose_build_instructions.h b/src/rose/rose_build_instructions.h new file mode 100644 index 00000000..06d146a5 --- /dev/null +++ b/src/rose/rose_build_instructions.h @@ -0,0 +1,2143 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Concrete classes for interpreter instructions. + * + * Note: this header should only be included in files which need to deal with + * the details of actual instructions. It is expected that most will only + * require access to the RoseInstruction API exposed in rose_build_program.h + */ + +#ifndef ROSE_BUILD_INSTRUCTIONS_H +#define ROSE_BUILD_INSTRUCTIONS_H + +#include "rose_build_program.h" +#include "util/verify_types.h" + +namespace ue2 { + +/** + * \brief Abstract base class representing a single Rose instruction. + */ +class RoseInstruction { +public: + virtual ~RoseInstruction(); + + /** \brief Opcode used for the instruction in the bytecode. */ + virtual RoseInstructionCode code() const = 0; + + /** + * \brief Simple hash used for program equivalence. + * + * Note that pointers (jumps, for example) should not be used when + * calculating the hash: they will be converted to instruction offsets when + * compared later. + */ + virtual size_t hash() const = 0; + + /** \brief Length of the bytecode instruction in bytes. */ + virtual size_t byte_length() const = 0; + + using OffsetMap = unordered_map; + + /** + * \brief Writes a concrete implementation of this instruction. + * + * Other data that this instruction depends on is written directly into the + * blob, while the instruction structure itself (of size given by + * the byte_length() function) is written to dest. + */ + virtual void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const = 0; + + /** + * \brief Update a target pointer. + * + * If this instruction contains any reference to the old target, replace it + * with the new one. + */ + virtual void update_target(const RoseInstruction *old_target, + const RoseInstruction *new_target) = 0; + + /** + * \brief True if these instructions are equivalent within their own + * programs. + * + * Checks that any pointers to other instructions point to the same + * offsets. + */ + bool equiv(const RoseInstruction &other, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return equiv_impl(other, offsets, other_offsets); + } + +private: + virtual bool equiv_impl(const RoseInstruction &other, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const = 0; +}; + +/** + * \brief Templated implementation class to handle boring boilerplate code. + */ +template +class RoseInstrBase : public RoseInstruction { +protected: + static constexpr RoseInstructionCode opcode = Opcode; + using impl_type = ImplType; + +public: + RoseInstructionCode code() const override { return opcode; } + + size_t byte_length() const override { + return sizeof(impl_type); + } + + /** + * Note: this implementation simply zeroes the destination region and + * writes in the correct opcode. This is sufficient for trivial + * instructions, but instructions with data members will want to override + * it. + */ + void write(void *dest, RoseEngineBlob &, + const RoseInstruction::OffsetMap &) const override { + assert(dest != nullptr); + assert(ISALIGNED_N(dest, ROSE_INSTR_MIN_ALIGN)); + + impl_type *inst = static_cast(dest); + memset(inst, 0, sizeof(impl_type)); + inst->code = verify_u8(opcode); + } + +private: + bool equiv_impl(const RoseInstruction &other, const OffsetMap &offsets, + const OffsetMap &other_offsets) const override { + const auto *ri_that = dynamic_cast(&other); + if (!ri_that) { + return false; + } + const auto *ri_this = dynamic_cast(this); + assert(ri_this); + return ri_this->equiv_to(*ri_that, offsets, other_offsets); + } +}; + +/** + * \brief Refinement of RoseInstrBase to use for instructions that have + * just a single target member, called "target". + */ +template +class RoseInstrBaseOneTarget + : public RoseInstrBase { +public: + void update_target(const RoseInstruction *old_target, + const RoseInstruction *new_target) override { + RoseInstrType *ri = dynamic_cast(this); + assert(ri); + if (ri->target == old_target) { + ri->target = new_target; + } + } +}; + +/** + * \brief Refinement of RoseInstrBase to use for instructions that have no + * targets. + */ +template +class RoseInstrBaseNoTargets + : public RoseInstrBase { +public: + void update_target(const RoseInstruction *, + const RoseInstruction *) override {} +}; + +/** + * \brief Refinement of RoseInstrBaseNoTargets to use for instructions that + * have no members at all, just an opcode. + */ +template +class RoseInstrBaseTrivial + : public RoseInstrBaseNoTargets { +public: + virtual bool operator==(const RoseInstrType &) const { return true; } + + size_t hash() const override { + return boost::hash_value(static_cast(Opcode)); + } + + bool equiv_to(const RoseInstrType &, const RoseInstruction::OffsetMap &, + const RoseInstruction::OffsetMap &) const { + return true; + } +}; + +//// +//// Concrete implementation classes start here. +//// + +class RoseInstrAnchoredDelay + : public RoseInstrBaseOneTarget { +public: + rose_group groups; + u32 anch_id; + const RoseInstruction *target; + + RoseInstrAnchoredDelay(rose_group groups_in, u32 anch_id_in, + const RoseInstruction *target_in) + : groups(groups_in), anch_id(anch_id_in), target(target_in) {} + + bool operator==(const RoseInstrAnchoredDelay &ri) const { + return groups == ri.groups && anch_id == ri.anch_id + && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), groups, anch_id); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrAnchoredDelay &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return groups == ri.groups && anch_id == ri.anch_id + && offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckLitEarly + : public RoseInstrBaseOneTarget { +public: + u32 min_offset; + const RoseInstruction *target; + + RoseInstrCheckLitEarly(u32 min_offset_in, const RoseInstruction *target_in) + : min_offset(min_offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckLitEarly &ri) const { + return min_offset == ri.min_offset && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), min_offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckLitEarly &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return min_offset == ri.min_offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckGroups + : public RoseInstrBaseNoTargets { +public: + rose_group groups; + + explicit RoseInstrCheckGroups(rose_group groups_in) : groups(groups_in) {} + + bool operator==(const RoseInstrCheckGroups &ri) const { + return groups == ri.groups; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), groups); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckGroups &ri, const OffsetMap &, + const OffsetMap &) const { + return groups == ri.groups; + } +}; + +class RoseInstrCheckOnlyEod + : public RoseInstrBaseOneTarget { +public: + const RoseInstruction *target; + + explicit RoseInstrCheckOnlyEod(const RoseInstruction *target_in) + : target(target_in) {} + + bool operator==(const RoseInstrCheckOnlyEod &ri) const { + return target == ri.target; + } + + size_t hash() const override { + return boost::hash_value(static_cast(opcode)); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckOnlyEod &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckBounds + : public RoseInstrBaseOneTarget { +public: + u64a min_bound; + u64a max_bound; + const RoseInstruction *target; + + RoseInstrCheckBounds(u64a min, u64a max, const RoseInstruction *target_in) + : min_bound(min), max_bound(max), target(target_in) {} + + bool operator==(const RoseInstrCheckBounds &ri) const { + return min_bound == ri.min_bound && max_bound == ri.max_bound && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), min_bound, max_bound); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckBounds &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return min_bound == ri.min_bound && max_bound == ri.max_bound && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckNotHandled + : public RoseInstrBaseOneTarget { +public: + u32 key; + const RoseInstruction *target; + + RoseInstrCheckNotHandled(u32 key_in, const RoseInstruction *target_in) + : key(key_in), target(target_in) {} + + bool operator==(const RoseInstrCheckNotHandled &ri) const { + return key == ri.key && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), key); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckNotHandled &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return key == ri.key && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckSingleLookaround + : public RoseInstrBaseOneTarget { +public: + s8 offset; + u32 reach_index; + const RoseInstruction *target; + + RoseInstrCheckSingleLookaround(s8 offset_in, u32 reach_index_in, + const RoseInstruction *target_in) + : offset(offset_in), reach_index(reach_index_in), target(target_in) {} + + bool operator==(const RoseInstrCheckSingleLookaround &ri) const { + return offset == ri.offset && reach_index == ri.reach_index && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), offset, reach_index); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckSingleLookaround &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return offset == ri.offset && reach_index == ri.reach_index && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckLookaround + : public RoseInstrBaseOneTarget { +public: + u32 look_index; + u32 reach_index; + u32 count; + const RoseInstruction *target; + + RoseInstrCheckLookaround(u32 look_index_in, u32 reach_index_in, + u32 count_in, const RoseInstruction *target_in) + : look_index(look_index_in), reach_index(reach_index_in), + count(count_in), target(target_in) {} + + bool operator==(const RoseInstrCheckLookaround &ri) const { + return look_index == ri.look_index && reach_index == ri.reach_index && + count == ri.count && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), look_index, reach_index, + count); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckLookaround &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return look_index == ri.look_index && reach_index == ri.reach_index && + count == ri.count && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMask + : public RoseInstrBaseOneTarget { +public: + u64a and_mask; + u64a cmp_mask; + u64a neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckMask(u64a and_mask_in, u64a cmp_mask_in, u64a neg_mask_in, + s32 offset_in, const RoseInstruction *target_in) + : and_mask(and_mask_in), cmp_mask(cmp_mask_in), neg_mask(neg_mask_in), + offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMask &ri) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), and_mask, cmp_mask, neg_mask, + offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMask &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMask32 + : public RoseInstrBaseOneTarget { +public: + std::array and_mask; + std::array cmp_mask; + u32 neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckMask32(std::array and_mask_in, + std::array cmp_mask_in, u32 neg_mask_in, + s32 offset_in, const RoseInstruction *target_in) + : and_mask(std::move(and_mask_in)), cmp_mask(std::move(cmp_mask_in)), + neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMask32 &ri) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), and_mask, cmp_mask, neg_mask, + offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMask32 &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckByte + : public RoseInstrBaseOneTarget { +public: + u8 and_mask; + u8 cmp_mask; + u8 negation; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckByte(u8 and_mask_in, u8 cmp_mask_in, u8 negation_in, + s32 offset_in, const RoseInstruction *target_in) + : and_mask(and_mask_in), cmp_mask(cmp_mask_in), negation(negation_in), + offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckByte &ri) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + negation == ri.negation && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), and_mask, cmp_mask, negation, + offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckByte &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + negation == ri.negation && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckShufti16x8 + : public RoseInstrBaseOneTarget { +public: + std::array nib_mask; + std::array bucket_select_mask; + u32 neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckShufti16x8(std::array nib_mask_in, + std::array bucket_select_mask_in, + u32 neg_mask_in, s32 offset_in, + const RoseInstruction *target_in) + : nib_mask(std::move(nib_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), + neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckShufti16x8 &ri) const { + return nib_mask == ri.nib_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), nib_mask, + bucket_select_mask, neg_mask, offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckShufti16x8 &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return nib_mask == ri.nib_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckShufti32x8 + : public RoseInstrBaseOneTarget { +public: + std::array hi_mask; + std::array lo_mask; + std::array bucket_select_mask; + u32 neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckShufti32x8(std::array hi_mask_in, + std::array lo_mask_in, + std::array bucket_select_mask_in, + u32 neg_mask_in, s32 offset_in, + const RoseInstruction *target_in) + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), + neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckShufti32x8 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), hi_mask, lo_mask, + bucket_select_mask, neg_mask, offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckShufti32x8 &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckShufti16x16 + : public RoseInstrBaseOneTarget { +public: + std::array hi_mask; + std::array lo_mask; + std::array bucket_select_mask; + u32 neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckShufti16x16(std::array hi_mask_in, + std::array lo_mask_in, + std::array bucket_select_mask_in, + u32 neg_mask_in, s32 offset_in, + const RoseInstruction *target_in) + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), + neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckShufti16x16 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), hi_mask, lo_mask, + bucket_select_mask, neg_mask, offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckShufti16x16 &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckShufti32x16 + : public RoseInstrBaseOneTarget { +public: + std::array hi_mask; + std::array lo_mask; + std::array bucket_select_mask_hi; + std::array bucket_select_mask_lo; + u32 neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckShufti32x16(std::array hi_mask_in, + std::array lo_mask_in, + std::array bucket_select_mask_hi_in, + std::array bucket_select_mask_lo_in, + u32 neg_mask_in, s32 offset_in, + const RoseInstruction *target_in) + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask_hi(std::move(bucket_select_mask_hi_in)), + bucket_select_mask_lo(std::move(bucket_select_mask_lo_in)), + neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckShufti32x16 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask_hi == ri.bucket_select_mask_hi && + bucket_select_mask_lo == ri.bucket_select_mask_lo && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), hi_mask, lo_mask, + bucket_select_mask_hi, bucket_select_mask_lo, + neg_mask, offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckShufti32x16 &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask_hi == ri.bucket_select_mask_hi && + bucket_select_mask_lo == ri.bucket_select_mask_lo && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckInfix + : public RoseInstrBaseOneTarget { +public: + u32 queue; + u32 lag; + ReportID report; + const RoseInstruction *target; + + RoseInstrCheckInfix(u32 queue_in, u32 lag_in, ReportID report_in, + const RoseInstruction *target_in) + : queue(queue_in), lag(lag_in), report(report_in), target(target_in) {} + + bool operator==(const RoseInstrCheckInfix &ri) const { + return queue == ri.queue && lag == ri.lag && report == ri.report && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), queue, lag, report); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckInfix &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return queue == ri.queue && lag == ri.lag && report == ri.report && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckPrefix + : public RoseInstrBaseOneTarget { +public: + u32 queue; + u32 lag; + ReportID report; + const RoseInstruction *target; + + RoseInstrCheckPrefix(u32 queue_in, u32 lag_in, ReportID report_in, + const RoseInstruction *target_in) + : queue(queue_in), lag(lag_in), report(report_in), target(target_in) {} + + bool operator==(const RoseInstrCheckPrefix &ri) const { + return queue == ri.queue && lag == ri.lag && report == ri.report && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), queue, lag, report); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckPrefix &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return queue == ri.queue && lag == ri.lag && report == ri.report && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrPushDelayed + : public RoseInstrBaseNoTargets { +public: + u8 delay; + u32 index; + + RoseInstrPushDelayed(u8 delay_in, u32 index_in) + : delay(delay_in), index(index_in) {} + + bool operator==(const RoseInstrPushDelayed &ri) const { + return delay == ri.delay && index == ri.index; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), delay, index); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrPushDelayed &ri, const OffsetMap &, + const OffsetMap &) const { + return delay == ri.delay && index == ri.index; + } +}; + +class RoseInstrCatchUp + : public RoseInstrBaseTrivial { +public: + ~RoseInstrCatchUp() override; +}; + +class RoseInstrCatchUpMpv + : public RoseInstrBaseTrivial { +public: + ~RoseInstrCatchUpMpv() override; +}; + +class RoseInstrSomAdjust + : public RoseInstrBaseNoTargets { +public: + u32 distance; + + explicit RoseInstrSomAdjust(u32 distance_in) : distance(distance_in) {} + + bool operator==(const RoseInstrSomAdjust &ri) const { + return distance == ri.distance; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), distance); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSomAdjust &ri, const OffsetMap &, + const OffsetMap &) const { + return distance == ri.distance; + } +}; + +class RoseInstrSomLeftfix + : public RoseInstrBaseNoTargets { +public: + u32 queue; + u32 lag; + + RoseInstrSomLeftfix(u32 queue_in, u32 lag_in) + : queue(queue_in), lag(lag_in) {} + + bool operator==(const RoseInstrSomLeftfix &ri) const { + return queue == ri.queue && lag == ri.lag; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), queue, lag); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSomLeftfix &ri, const OffsetMap &, + const OffsetMap &) const { + return queue == ri.queue && lag == ri.lag; + } +}; + +class RoseInstrSomFromReport + : public RoseInstrBaseNoTargets { +public: + som_operation som; + + RoseInstrSomFromReport() { + std::memset(&som, 0, sizeof(som)); + } + + bool operator==(const RoseInstrSomFromReport &ri) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), som.type, som.onmatch); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSomFromReport &ri, const OffsetMap &, + const OffsetMap &) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } +}; + +class RoseInstrSomZero + : public RoseInstrBaseTrivial { +public: + ~RoseInstrSomZero() override; +}; + +class RoseInstrTriggerInfix + : public RoseInstrBaseNoTargets { +public: + u8 cancel; + u32 queue; + u32 event; + + RoseInstrTriggerInfix(u8 cancel_in, u32 queue_in, u32 event_in) + : cancel(cancel_in), queue(queue_in), event(event_in) {} + + bool operator==(const RoseInstrTriggerInfix &ri) const { + return cancel == ri.cancel && queue == ri.queue && event == ri.event; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), cancel, queue, event); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrTriggerInfix &ri, const OffsetMap &, + const OffsetMap &) const { + return cancel == ri.cancel && queue == ri.queue && event == ri.event; + } +}; + +class RoseInstrTriggerSuffix + : public RoseInstrBaseNoTargets { +public: + u32 queue; + u32 event; + + RoseInstrTriggerSuffix(u32 queue_in, u32 event_in) + : queue(queue_in), event(event_in) {} + + bool operator==(const RoseInstrTriggerSuffix &ri) const { + return queue == ri.queue && event == ri.event; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), queue, event); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrTriggerSuffix &ri, const OffsetMap &, + const OffsetMap &) const { + return queue == ri.queue && event == ri.event; + } +}; + +class RoseInstrDedupe + : public RoseInstrBaseOneTarget { +public: + u8 quash_som; + u32 dkey; + s32 offset_adjust; + const RoseInstruction *target; + + RoseInstrDedupe(u8 quash_som_in, u32 dkey_in, s32 offset_adjust_in, + const RoseInstruction *target_in) + : quash_som(quash_som_in), dkey(dkey_in), + offset_adjust(offset_adjust_in), target(target_in) {} + + bool operator==(const RoseInstrDedupe &ri) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + offset_adjust == ri.offset_adjust && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), quash_som, dkey, + offset_adjust); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrDedupe &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + offset_adjust == ri.offset_adjust && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrDedupeSom + : public RoseInstrBaseOneTarget { +public: + u8 quash_som; + u32 dkey; + s32 offset_adjust; + const RoseInstruction *target; + + RoseInstrDedupeSom(u8 quash_som_in, u32 dkey_in, s32 offset_adjust_in, + const RoseInstruction *target_in) + : quash_som(quash_som_in), dkey(dkey_in), + offset_adjust(offset_adjust_in), target(target_in) {} + + bool operator==(const RoseInstrDedupeSom &ri) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + offset_adjust == ri.offset_adjust && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), quash_som, dkey, + offset_adjust); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrDedupeSom &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + offset_adjust == ri.offset_adjust && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrReportChain + : public RoseInstrBaseNoTargets { +public: + u32 event; + u64a top_squash_distance; + + RoseInstrReportChain(u32 event_in, u32 top_squash_distance_in) + : event(event_in), top_squash_distance(top_squash_distance_in) {} + + bool operator==(const RoseInstrReportChain &ri) const { + return event == ri.event && + top_squash_distance == ri.top_squash_distance; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), event, top_squash_distance); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportChain &ri, const OffsetMap &, + const OffsetMap &) const { + return event == ri.event && + top_squash_distance == ri.top_squash_distance; + } +}; + +class RoseInstrReportSomInt + : public RoseInstrBaseNoTargets { +public: + som_operation som; + + RoseInstrReportSomInt() { + std::memset(&som, 0, sizeof(som)); + } + + bool operator==(const RoseInstrReportSomInt &ri) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), som.type, som.onmatch); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportSomInt &ri, const OffsetMap &, + const OffsetMap &) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } +}; + +class RoseInstrReportSomAware + : public RoseInstrBaseNoTargets { +public: + som_operation som; + + RoseInstrReportSomAware() { + std::memset(&som, 0, sizeof(som)); + } + + bool operator==(const RoseInstrReportSomAware &ri) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), som.type, som.onmatch); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportSomAware &ri, const OffsetMap &, + const OffsetMap &) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } +}; + +class RoseInstrReport + : public RoseInstrBaseNoTargets { +public: + ReportID onmatch; + s32 offset_adjust; + + RoseInstrReport(ReportID onmatch_in, s32 offset_adjust_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in) {} + + bool operator==(const RoseInstrReport &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), onmatch, offset_adjust); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReport &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } +}; + +class RoseInstrReportExhaust + : public RoseInstrBaseNoTargets { +public: + ReportID onmatch; + s32 offset_adjust; + u32 ekey; + + RoseInstrReportExhaust(ReportID onmatch_in, s32 offset_adjust_in, + u32 ekey_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in), ekey(ekey_in) {} + + bool operator==(const RoseInstrReportExhaust &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + ekey == ri.ekey; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), onmatch, offset_adjust, ekey); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportExhaust &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + ekey == ri.ekey; + } +}; + +class RoseInstrReportSom + : public RoseInstrBaseNoTargets { +public: + ReportID onmatch; + s32 offset_adjust; + + RoseInstrReportSom(ReportID onmatch_in, s32 offset_adjust_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in) {} + + bool operator==(const RoseInstrReportSom &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), onmatch, offset_adjust); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportSom &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } +}; + +class RoseInstrReportSomExhaust + : public RoseInstrBaseNoTargets { +public: + ReportID onmatch; + s32 offset_adjust; + u32 ekey; + + RoseInstrReportSomExhaust(ReportID onmatch_in, s32 offset_adjust_in, + u32 ekey_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in), ekey(ekey_in) {} + + bool operator==(const RoseInstrReportSomExhaust &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + ekey == ri.ekey; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), onmatch, offset_adjust, ekey); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportSomExhaust &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + ekey == ri.ekey; + } +}; + +class RoseInstrDedupeAndReport + : public RoseInstrBaseOneTarget { +public: + u8 quash_som; + u32 dkey; + ReportID onmatch; + s32 offset_adjust; + const RoseInstruction *target; + + RoseInstrDedupeAndReport(u8 quash_som_in, u32 dkey_in, ReportID onmatch_in, + s32 offset_adjust_in, + const RoseInstruction *target_in) + : quash_som(quash_som_in), dkey(dkey_in), onmatch(onmatch_in), + offset_adjust(offset_adjust_in), target(target_in) {} + + bool operator==(const RoseInstrDedupeAndReport &ri) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), quash_som, dkey, onmatch, + offset_adjust); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrDedupeAndReport &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrFinalReport + : public RoseInstrBaseNoTargets { +public: + ReportID onmatch; + s32 offset_adjust; + + RoseInstrFinalReport(ReportID onmatch_in, s32 offset_adjust_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in) {} + + bool operator==(const RoseInstrFinalReport &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), onmatch, offset_adjust); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrFinalReport &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } +}; + +class RoseInstrCheckExhausted + : public RoseInstrBaseOneTarget { +public: + u32 ekey; + const RoseInstruction *target; + + RoseInstrCheckExhausted(u32 ekey_in, const RoseInstruction *target_in) + : ekey(ekey_in), target(target_in) {} + + bool operator==(const RoseInstrCheckExhausted &ri) const { + return ekey == ri.ekey && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), ekey); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckExhausted &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return ekey == ri.ekey && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMinLength + : public RoseInstrBaseOneTarget { +public: + s32 end_adj; + u64a min_length; + const RoseInstruction *target; + + RoseInstrCheckMinLength(s32 end_adj_in, u64a min_length_in, + const RoseInstruction *target_in) + : end_adj(end_adj_in), min_length(min_length_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMinLength &ri) const { + return end_adj == ri.end_adj && min_length == ri.min_length && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), end_adj, min_length); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMinLength &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return end_adj == ri.end_adj && min_length == ri.min_length && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrSetState + : public RoseInstrBaseNoTargets { +public: + u32 index; + + explicit RoseInstrSetState(u32 index_in) : index(index_in) {} + + bool operator==(const RoseInstrSetState &ri) const { + return index == ri.index; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), index); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSetState &ri, const OffsetMap &, + const OffsetMap &) const { + return index == ri.index; + } +}; + +class RoseInstrSetGroups + : public RoseInstrBaseNoTargets { +public: + rose_group groups; + + explicit RoseInstrSetGroups(rose_group groups_in) : groups(groups_in) {} + + bool operator==(const RoseInstrSetGroups &ri) const { + return groups == ri.groups; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), groups); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSetGroups &ri, const OffsetMap &, + const OffsetMap &) const { + return groups == ri.groups; + } +}; + +class RoseInstrSquashGroups + : public RoseInstrBaseNoTargets { +public: + rose_group groups; + + explicit RoseInstrSquashGroups(rose_group groups_in) : groups(groups_in) {} + + bool operator==(const RoseInstrSquashGroups &ri) const { + return groups == ri.groups; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), groups); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSquashGroups &ri, const OffsetMap &, + const OffsetMap &) const { + return groups == ri.groups; + } +}; + +class RoseInstrCheckState + : public RoseInstrBaseOneTarget { +public: + u32 index; + const RoseInstruction *target; + + RoseInstrCheckState(u32 index_in, const RoseInstruction *target_in) + : index(index_in), target(target_in) {} + + bool operator==(const RoseInstrCheckState &ri) const { + return index == ri.index && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), index); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckState &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return index == ri.index && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrSparseIterBegin + : public RoseInstrBase { +public: + u32 num_keys; // total number of multibit keys + std::vector> jump_table; + const RoseInstruction *target; + + RoseInstrSparseIterBegin(u32 num_keys_in, + const RoseInstruction *target_in) + : num_keys(num_keys_in), target(target_in) {} + + bool operator==(const RoseInstrSparseIterBegin &ri) const { + return num_keys == ri.num_keys && jump_table == ri.jump_table && + target == ri.target; + } + + size_t hash() const override { + size_t v = hash_all(static_cast(opcode), num_keys); + for (const u32 &key : jump_table | boost::adaptors::map_keys) { + boost::hash_combine(v, key); + } + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + void update_target(const RoseInstruction *old_target, + const RoseInstruction *new_target) override { + if (target == old_target) { + target = new_target; + } + for (auto &jump : jump_table) { + if (jump.second == old_target) { + jump.second = new_target; + } + } + } + + bool equiv_to(const RoseInstrSparseIterBegin &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + if (iter_offset != ri.iter_offset || + offsets.at(target) != other_offsets.at(ri.target)) { + return false; + } + if (jump_table.size() != ri.jump_table.size()) { + return false; + } + auto it1 = jump_table.begin(), it2 = ri.jump_table.begin(); + for (; it1 != jump_table.end(); ++it1, ++it2) { + if (it1->first != it2->first) { + return false; + } + if (offsets.at(it1->second) != other_offsets.at(it2->second)) { + return false; + } + } + return true; + } + +private: + friend class RoseInstrSparseIterNext; + + // These variables allow us to use the same multibit iterator and jump + // table in subsequent SPARSE_ITER_NEXT write() operations. + mutable bool is_written = false; + mutable u32 iter_offset = 0; + mutable u32 jump_table_offset = 0; +}; + +class RoseInstrSparseIterNext + : public RoseInstrBase { +public: + u32 state; + const RoseInstrSparseIterBegin *begin; + const RoseInstruction *target; + + RoseInstrSparseIterNext(u32 state_in, + const RoseInstrSparseIterBegin *begin_in, + const RoseInstruction *target_in) + : state(state_in), begin(begin_in), target(target_in) {} + + bool operator==(const RoseInstrSparseIterNext &ri) const { + return state == ri.state && begin == ri.begin && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), state); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + void update_target(const RoseInstruction *old_target, + const RoseInstruction *new_target) override { + if (target == old_target) { + target = new_target; + } + if (begin == old_target) { + assert(new_target->code() == ROSE_INSTR_SPARSE_ITER_BEGIN); + begin = static_cast(new_target); + } + } + + bool equiv_to(const RoseInstrSparseIterNext &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return state == ri.state && + offsets.at(begin) == other_offsets.at(ri.begin) && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrSparseIterAny + : public RoseInstrBaseOneTarget { +public: + u32 num_keys; // total number of multibit keys + std::vector keys; + const RoseInstruction *target; + + RoseInstrSparseIterAny(u32 num_keys_in, std::vector keys_in, + const RoseInstruction *target_in) + : num_keys(num_keys_in), keys(std::move(keys_in)), target(target_in) {} + + bool operator==(const RoseInstrSparseIterAny &ri) const { + return num_keys == ri.num_keys && keys == ri.keys && + target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), num_keys, keys); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSparseIterAny &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return num_keys == ri.num_keys && keys == ri.keys && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrEnginesEod + : public RoseInstrBaseNoTargets { +public: + u32 iter_offset; + + explicit RoseInstrEnginesEod(u32 iter_in) : iter_offset(iter_in) {} + + bool operator==(const RoseInstrEnginesEod &ri) const { + return iter_offset == ri.iter_offset; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), iter_offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrEnginesEod &ri, const OffsetMap &, + const OffsetMap &) const { + return iter_offset == ri.iter_offset; + } +}; + +class RoseInstrSuffixesEod + : public RoseInstrBaseTrivial { +public: + ~RoseInstrSuffixesEod() override; +}; + +class RoseInstrMatcherEod : public RoseInstrBaseTrivial { +public: + ~RoseInstrMatcherEod() override; +}; + +class RoseInstrCheckLongLit + : public RoseInstrBaseOneTarget { +public: + std::string literal; + const RoseInstruction *target; + + RoseInstrCheckLongLit(std::string literal_in, + const RoseInstruction *target_in) + : literal(std::move(literal_in)), target(target_in) {} + + bool operator==(const RoseInstrCheckLongLit &ri) const { + return literal == ri.literal && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), literal); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckLongLit &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return literal == ri.literal && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckLongLitNocase + : public RoseInstrBaseOneTarget { +public: + std::string literal; + const RoseInstruction *target; + + RoseInstrCheckLongLitNocase(std::string literal_in, + const RoseInstruction *target_in) + : literal(std::move(literal_in)), target(target_in) { + upperString(literal); + } + + bool operator==(const RoseInstrCheckLongLitNocase &ri) const { + return literal == ri.literal && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), literal); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckLongLitNocase &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return literal == ri.literal && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMedLit + : public RoseInstrBaseOneTarget { +public: + std::string literal; + const RoseInstruction *target; + + explicit RoseInstrCheckMedLit(std::string literal_in, + const RoseInstruction *target_in) + : literal(std::move(literal_in)), target(target_in) {} + + bool operator==(const RoseInstrCheckMedLit &ri) const { + return literal == ri.literal && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), literal); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMedLit &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return literal == ri.literal && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMedLitNocase + : public RoseInstrBaseOneTarget { +public: + std::string literal; + const RoseInstruction *target; + + explicit RoseInstrCheckMedLitNocase(std::string literal_in, + const RoseInstruction *target_in) + : literal(std::move(literal_in)), target(target_in) { + upperString(literal); + } + + bool operator==(const RoseInstrCheckMedLitNocase &ri) const { + return literal == ri.literal && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), literal); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMedLitNocase &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return literal == ri.literal && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrClearWorkDone + : public RoseInstrBaseTrivial { +public: + ~RoseInstrClearWorkDone() override; +}; + +class RoseInstrMultipathLookaround + : public RoseInstrBaseOneTarget { +public: + u32 look_index; + u32 reach_index; + u32 count; + s32 last_start; + std::array start_mask; + const RoseInstruction *target; + + RoseInstrMultipathLookaround(u32 look_index_in, u32 reach_index_in, + u32 count_in, s32 last_start_in, + std::array start_mask_in, + const RoseInstruction *target_in) + : look_index(look_index_in), reach_index(reach_index_in), + count(count_in), last_start(last_start_in), + start_mask(std::move(start_mask_in)), target(target_in) {} + + bool operator==(const RoseInstrMultipathLookaround &ri) const { + return look_index == ri.look_index && reach_index == ri.reach_index && + count == ri.count && last_start == ri.last_start && + start_mask == ri.start_mask && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), look_index, reach_index, + count, last_start, start_mask); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrMultipathLookaround &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return look_index == ri.look_index && reach_index == ri.reach_index && + count == ri.count && last_start == ri.last_start && + start_mask == ri.start_mask && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMultipathShufti16x8 + : public RoseInstrBaseOneTarget { +public: + std::array nib_mask; + std::array bucket_select_mask; + std::array data_select_mask; + u16 hi_bits_mask; + u16 lo_bits_mask; + u16 neg_mask; + s32 base_offset; + s32 last_start; + const RoseInstruction *target; + + RoseInstrCheckMultipathShufti16x8(std::array nib_mask_in, + std::array bucket_select_mask_in, + std::array data_select_mask_in, + u16 hi_bits_mask_in, u16 lo_bits_mask_in, + u16 neg_mask_in, s32 base_offset_in, + s32 last_start_in, + const RoseInstruction *target_in) + : nib_mask(std::move(nib_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), + data_select_mask(std::move(data_select_mask_in)), + hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), + neg_mask(neg_mask_in), base_offset(base_offset_in), + last_start(last_start_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMultipathShufti16x8 &ri) const { + return nib_mask == ri.nib_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && + neg_mask == ri.neg_mask && base_offset == ri.base_offset && + last_start == ri.last_start && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), nib_mask, + bucket_select_mask, data_select_mask, hi_bits_mask, + lo_bits_mask, neg_mask, base_offset, last_start); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMultipathShufti16x8 &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return nib_mask == ri.nib_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && + base_offset == ri.base_offset && last_start == ri.last_start && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMultipathShufti32x8 + : public RoseInstrBaseOneTarget { +public: + std::array hi_mask; + std::array lo_mask; + std::array bucket_select_mask; + std::array data_select_mask; + u32 hi_bits_mask; + u32 lo_bits_mask; + u32 neg_mask; + s32 base_offset; + s32 last_start; + const RoseInstruction *target; + + RoseInstrCheckMultipathShufti32x8(std::array hi_mask_in, + std::array lo_mask_in, + std::array bucket_select_mask_in, + std::array data_select_mask_in, + u32 hi_bits_mask_in, u32 lo_bits_mask_in, + u32 neg_mask_in, s32 base_offset_in, + s32 last_start_in, + const RoseInstruction *target_in) + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), + data_select_mask(std::move(data_select_mask_in)), + hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), + neg_mask(neg_mask_in), base_offset(base_offset_in), + last_start(last_start_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMultipathShufti32x8 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && + neg_mask == ri.neg_mask && base_offset == ri.base_offset && + last_start == ri.last_start && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), hi_mask, lo_mask, + bucket_select_mask, data_select_mask, hi_bits_mask, + lo_bits_mask, neg_mask, base_offset, last_start); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMultipathShufti32x8 &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && + base_offset == ri.base_offset && last_start == ri.last_start && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMultipathShufti32x16 + : public RoseInstrBaseOneTarget { +public: + std::array hi_mask; + std::array lo_mask; + std::array bucket_select_mask_hi; + std::array bucket_select_mask_lo; + std::array data_select_mask; + u32 hi_bits_mask; + u32 lo_bits_mask; + u32 neg_mask; + s32 base_offset; + s32 last_start; + const RoseInstruction *target; + + RoseInstrCheckMultipathShufti32x16(std::array hi_mask_in, + std::array lo_mask_in, + std::array bucket_select_mask_hi_in, + std::array bucket_select_mask_lo_in, + std::array data_select_mask_in, + u32 hi_bits_mask_in, u32 lo_bits_mask_in, + u32 neg_mask_in, s32 base_offset_in, + s32 last_start_in, + const RoseInstruction *target_in) + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask_hi(std::move(bucket_select_mask_hi_in)), + bucket_select_mask_lo(std::move(bucket_select_mask_lo_in)), + data_select_mask(std::move(data_select_mask_in)), + hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), + neg_mask(neg_mask_in), base_offset(base_offset_in), + last_start(last_start_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMultipathShufti32x16 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask_hi == ri.bucket_select_mask_hi && + bucket_select_mask_lo == ri.bucket_select_mask_lo && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && + neg_mask == ri.neg_mask && base_offset == ri.base_offset && + last_start == ri.last_start && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), hi_mask, lo_mask, + bucket_select_mask_hi, bucket_select_mask_lo, + data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask, + base_offset, last_start); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMultipathShufti32x16 &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask_hi == ri.bucket_select_mask_hi && + bucket_select_mask_lo == ri.bucket_select_mask_lo && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && + base_offset == ri.base_offset && last_start == ri.last_start && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMultipathShufti64 + : public RoseInstrBaseOneTarget { +public: + std::array hi_mask; + std::array lo_mask; + std::array bucket_select_mask; + std::array data_select_mask; + u64a hi_bits_mask; + u64a lo_bits_mask; + u64a neg_mask; + s32 base_offset; + s32 last_start; + const RoseInstruction *target; + + RoseInstrCheckMultipathShufti64(std::array hi_mask_in, + std::array lo_mask_in, + std::array bucket_select_mask_in, + std::array data_select_mask_in, + u64a hi_bits_mask_in, u64a lo_bits_mask_in, + u64a neg_mask_in, s32 base_offset_in, + s32 last_start_in, + const RoseInstruction *target_in) + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), + data_select_mask(std::move(data_select_mask_in)), + hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), + neg_mask(neg_mask_in), base_offset(base_offset_in), + last_start(last_start_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMultipathShufti64 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && + neg_mask == ri.neg_mask && base_offset == ri.base_offset && + last_start == ri.last_start && target == ri.target; + } + + size_t hash() const override { + return hash_all(static_cast(opcode), hi_mask, lo_mask, + bucket_select_mask, data_select_mask, hi_bits_mask, + lo_bits_mask, neg_mask, base_offset, last_start); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMultipathShufti64 &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && + base_offset == ri.base_offset && last_start == ri.last_start && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrEnd + : public RoseInstrBaseTrivial { +public: + ~RoseInstrEnd() override; +}; + +} +#endif diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index 5cf06200..c319eed2 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -26,11 +26,9 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "rose_build_engine_blob.h" #include "rose_build_program.h" -#include "util/container.h" -#include "util/multibit_build.h" -#include "util/verify_types.h" + +#include "rose_build_instructions.h" #include #include @@ -39,584 +37,8 @@ using namespace std; namespace ue2 { -/* Destructors to avoid weak vtables. */ - -RoseInstruction::~RoseInstruction() = default; -RoseInstrCatchUp::~RoseInstrCatchUp() = default; -RoseInstrCatchUpMpv::~RoseInstrCatchUpMpv() = default; -RoseInstrSomZero::~RoseInstrSomZero() = default; -RoseInstrSuffixesEod::~RoseInstrSuffixesEod() = default; -RoseInstrMatcherEod::~RoseInstrMatcherEod() = default; -RoseInstrEnd::~RoseInstrEnd() = default; -RoseInstrClearWorkDone::~RoseInstrClearWorkDone() = default; - using OffsetMap = RoseInstruction::OffsetMap; -static -u32 calc_jump(const OffsetMap &offset_map, const RoseInstruction *from, - const RoseInstruction *to) { - DEBUG_PRINTF("computing relative jump from %p to %p\n", from, to); - assert(from && contains(offset_map, from)); - assert(to && contains(offset_map, to)); - - u32 from_offset = offset_map.at(from); - u32 to_offset = offset_map.at(to); - DEBUG_PRINTF("offsets: %u -> %u\n", from_offset, to_offset); - assert(from_offset <= to_offset); - - return to_offset - from_offset; -} - -void RoseInstrAnchoredDelay::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->groups = groups; - inst->anch_id = anch_id; - inst->done_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckLitEarly::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->min_offset = min_offset; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckGroups::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->groups = groups; -} - -void RoseInstrCheckOnlyEod::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckBounds::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->min_bound = min_bound; - inst->max_bound = max_bound; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckNotHandled::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->key = key; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckSingleLookaround::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->offset = offset; - inst->reach_index = reach_index; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckLookaround::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->look_index = look_index; - inst->reach_index = reach_index; - inst->count = count; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckMask::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->and_mask = and_mask; - inst->cmp_mask = cmp_mask; - inst->neg_mask = neg_mask; - inst->offset = offset; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckMask32::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - copy(begin(and_mask), end(and_mask), inst->and_mask); - copy(begin(cmp_mask), end(cmp_mask), inst->cmp_mask); - inst->neg_mask = neg_mask; - inst->offset = offset; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckByte::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->and_mask = and_mask; - inst->cmp_mask = cmp_mask; - inst->negation = negation; - inst->offset = offset; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckShufti16x8::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - copy(begin(nib_mask), end(nib_mask), inst->nib_mask); - copy(begin(bucket_select_mask), end(bucket_select_mask), - inst->bucket_select_mask); - inst->neg_mask = neg_mask; - inst->offset = offset; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckShufti32x8::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - copy(begin(hi_mask), end(hi_mask), inst->hi_mask); - copy(begin(lo_mask), end(lo_mask), inst->lo_mask); - copy(begin(bucket_select_mask), end(bucket_select_mask), - inst->bucket_select_mask); - - inst->neg_mask = neg_mask; - inst->offset = offset; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckShufti16x16::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - copy(begin(hi_mask), end(hi_mask), inst->hi_mask); - copy(begin(lo_mask), end(lo_mask), inst->lo_mask); - copy(begin(bucket_select_mask), end(bucket_select_mask), - inst->bucket_select_mask); - inst->neg_mask = neg_mask; - inst->offset = offset; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckShufti32x16::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - copy(begin(hi_mask), end(hi_mask), inst->hi_mask); - copy(begin(lo_mask), end(lo_mask), inst->lo_mask); - copy(begin(bucket_select_mask_hi), end(bucket_select_mask_hi), - inst->bucket_select_mask_hi); - copy(begin(bucket_select_mask_lo), end(bucket_select_mask_lo), - inst->bucket_select_mask_lo); - inst->neg_mask = neg_mask; - inst->offset = offset; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckInfix::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->queue = queue; - inst->lag = lag; - inst->report = report; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckPrefix::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->queue = queue; - inst->lag = lag; - inst->report = report; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrPushDelayed::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->delay = delay; - inst->index = index; -} - -void RoseInstrSomAdjust::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->distance = distance; -} - -void RoseInstrSomLeftfix::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->queue = queue; - inst->lag = lag; -} - -void RoseInstrSomFromReport::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->som = som; -} - -void RoseInstrTriggerInfix::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->cancel = cancel; - inst->queue = queue; - inst->event = event; -} - -void RoseInstrTriggerSuffix::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->queue = queue; - inst->event = event; -} - -void RoseInstrDedupe::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->quash_som = quash_som; - inst->dkey = dkey; - inst->offset_adjust = offset_adjust; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrDedupeSom::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->quash_som = quash_som; - inst->dkey = dkey; - inst->offset_adjust = offset_adjust; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrReportChain::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->event = event; - inst->top_squash_distance = top_squash_distance; -} - -void RoseInstrReportSomInt::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->som = som; -} - -void RoseInstrReportSomAware::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->som = som; -} - -void RoseInstrReport::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->onmatch = onmatch; - inst->offset_adjust = offset_adjust; -} - -void RoseInstrReportExhaust::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->onmatch = onmatch; - inst->offset_adjust = offset_adjust; - inst->ekey = ekey; -} - -void RoseInstrReportSom::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->onmatch = onmatch; - inst->offset_adjust = offset_adjust; -} - -void RoseInstrReportSomExhaust::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->onmatch = onmatch; - inst->offset_adjust = offset_adjust; - inst->ekey = ekey; -} - -void RoseInstrDedupeAndReport::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->quash_som = quash_som; - inst->dkey = dkey; - inst->onmatch = onmatch; - inst->offset_adjust = offset_adjust; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrFinalReport::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->onmatch = onmatch; - inst->offset_adjust = offset_adjust; -} - -void RoseInstrCheckExhausted::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->ekey = ekey; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckMinLength::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->end_adj = end_adj; - inst->min_length = min_length; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrSetState::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->index = index; -} - -void RoseInstrSetGroups::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->groups = groups; -} - -void RoseInstrSquashGroups::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->groups = groups; -} - -void RoseInstrCheckState::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->index = index; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrSparseIterBegin::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->fail_jump = calc_jump(offset_map, this, target); - - // Resolve and write the multibit sparse iterator and the jump table. - vector keys; - vector jump_offsets; - for (const auto &jump : jump_table) { - keys.push_back(jump.first); - assert(contains(offset_map, jump.second)); - jump_offsets.push_back(offset_map.at(jump.second)); - } - - auto iter = mmbBuildSparseIterator(keys, num_keys); - assert(!iter.empty()); - inst->iter_offset = blob.add_iterator(iter); - inst->jump_table = blob.add(jump_offsets.begin(), jump_offsets.end()); - - // Store offsets for corresponding SPARSE_ITER_NEXT operations. - is_written = true; - iter_offset = inst->iter_offset; - jump_table_offset = inst->jump_table; -} - -void RoseInstrSparseIterNext::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->state = state; - inst->fail_jump = calc_jump(offset_map, this, target); - - // Use the same sparse iterator and jump table as the SPARSE_ITER_BEGIN - // instruction. - assert(begin); - assert(contains(offset_map, begin)); - assert(begin->is_written); - inst->iter_offset = begin->iter_offset; - inst->jump_table = begin->jump_table_offset; -} - -void RoseInstrSparseIterAny::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->fail_jump = calc_jump(offset_map, this, target); - - // Write the multibit sparse iterator. - auto iter = mmbBuildSparseIterator(keys, num_keys); - assert(!iter.empty()); - inst->iter_offset = blob.add_iterator(iter); -} - -void RoseInstrEnginesEod::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->iter_offset = iter_offset; -} - -void RoseInstrCheckLongLit::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - assert(!literal.empty()); - inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); - inst->lit_length = verify_u32(literal.size()); - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckLongLitNocase::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - assert(!literal.empty()); - inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); - inst->lit_length = verify_u32(literal.size()); - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckMedLit::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - assert(!literal.empty()); - inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); - inst->lit_length = verify_u32(literal.size()); - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckMedLitNocase::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - assert(!literal.empty()); - inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); - inst->lit_length = verify_u32(literal.size()); - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrMultipathLookaround::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - inst->look_index = look_index; - inst->reach_index = reach_index; - inst->count = count; - inst->last_start = last_start; - copy(begin(start_mask), end(start_mask), inst->start_mask); - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckMultipathShufti16x8::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) - const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - copy(begin(nib_mask), end(nib_mask), inst->nib_mask); - copy(begin(bucket_select_mask), begin(bucket_select_mask) + 16, - inst->bucket_select_mask); - copy(begin(data_select_mask), begin(data_select_mask) + 16, - inst->data_select_mask); - inst->hi_bits_mask = hi_bits_mask; - inst->lo_bits_mask = lo_bits_mask; - inst->neg_mask = neg_mask; - inst->base_offset = base_offset; - inst->last_start = last_start; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckMultipathShufti32x8::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) - const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - copy(begin(hi_mask), begin(hi_mask) + 16, inst->hi_mask); - copy(begin(lo_mask), begin(lo_mask) + 16, inst->lo_mask); - copy(begin(bucket_select_mask), begin(bucket_select_mask) + 32, - inst->bucket_select_mask); - copy(begin(data_select_mask), begin(data_select_mask) + 32, - inst->data_select_mask); - inst->hi_bits_mask = hi_bits_mask; - inst->lo_bits_mask = lo_bits_mask; - inst->neg_mask = neg_mask; - inst->base_offset = base_offset; - inst->last_start = last_start; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckMultipathShufti32x16::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - copy(begin(hi_mask), end(hi_mask), inst->hi_mask); - copy(begin(lo_mask), end(lo_mask), inst->lo_mask); - copy(begin(bucket_select_mask_hi), begin(bucket_select_mask_hi) + 32, - inst->bucket_select_mask_hi); - copy(begin(bucket_select_mask_lo), begin(bucket_select_mask_lo) + 32, - inst->bucket_select_mask_lo); - copy(begin(data_select_mask), begin(data_select_mask) + 32, - inst->data_select_mask); - inst->hi_bits_mask = hi_bits_mask; - inst->lo_bits_mask = lo_bits_mask; - inst->neg_mask = neg_mask; - inst->base_offset = base_offset; - inst->last_start = last_start; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckMultipathShufti64::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast(dest); - copy(begin(hi_mask), begin(hi_mask) + 16, inst->hi_mask); - copy(begin(lo_mask), begin(lo_mask) + 16, inst->lo_mask); - copy(begin(bucket_select_mask), end(bucket_select_mask), - inst->bucket_select_mask); - copy(begin(data_select_mask), end(data_select_mask), - inst->data_select_mask); - inst->hi_bits_mask = hi_bits_mask; - inst->lo_bits_mask = lo_bits_mask; - inst->neg_mask = neg_mask; - inst->base_offset = base_offset; - inst->last_start = last_start; - inst->fail_jump = calc_jump(offset_map, this, target); -} - static OffsetMap makeOffsetMap(const RoseProgram &program, u32 *total_len) { OffsetMap offset_map; @@ -633,9 +55,114 @@ OffsetMap makeOffsetMap(const RoseProgram &program, u32 *total_len) { return offset_map; } +RoseProgram::RoseProgram() { + prog.push_back(make_unique()); +} + +RoseProgram::~RoseProgram() = default; + +RoseProgram::RoseProgram(RoseProgram &&) = default; +RoseProgram &RoseProgram::operator=(RoseProgram &&) = default; + +bool RoseProgram::empty() const { + assert(!prog.empty()); + assert(prog.back()->code() == ROSE_INSTR_END); + // Empty if we only have one element, the END instruction. + return next(prog.begin()) == prog.end(); +} + +const RoseInstruction *RoseProgram::end_instruction() const { + assert(!prog.empty()); + assert(prog.back()->code() == ROSE_INSTR_END); + + return prog.back().get(); +} + +void RoseProgram::update_targets(RoseProgram::iterator it, + RoseProgram::iterator it_end, + const RoseInstruction *old_target, + const RoseInstruction *new_target) { + assert(old_target && new_target && old_target != new_target); + for (; it != it_end; ++it) { + unique_ptr &ri = *it; + assert(ri); + ri->update_target(old_target, new_target); + } +} + +RoseProgram::iterator RoseProgram::insert(RoseProgram::iterator it, + unique_ptr ri) { + assert(!prog.empty()); + assert(it != end()); + assert(prog.back()->code() == ROSE_INSTR_END); + + return prog.insert(it, move(ri)); +} + +RoseProgram::iterator RoseProgram::insert(RoseProgram::iterator it, + RoseProgram &&block) { + assert(!prog.empty()); + assert(it != end()); + assert(prog.back()->code() == ROSE_INSTR_END); + + if (block.empty()) { + return it; + } + + const RoseInstruction *end_ptr = block.end_instruction(); + assert(end_ptr->code() == ROSE_INSTR_END); + block.prog.pop_back(); + + const RoseInstruction *new_target = it->get(); + update_targets(block.prog.begin(), block.prog.end(), end_ptr, new_target); + + // Workaround: container insert() for ranges doesn't return an iterator + // in the version of the STL distributed with gcc 4.8. + auto dist = distance(prog.begin(), it); + prog.insert(it, make_move_iterator(block.prog.begin()), + make_move_iterator(block.prog.end())); + it = prog.begin(); + advance(it, dist); + return it; +} + RoseProgram::iterator RoseProgram::erase(RoseProgram::iterator first, - RoseProgram::iterator last) { - return prog.erase(first, last); + RoseProgram::iterator last) { + return prog.erase(first, last); +} + +void RoseProgram::add_before_end(std::unique_ptr ri) { + assert(!prog.empty()); + insert(std::prev(prog.end()), std::move(ri)); +} + +void RoseProgram::add_before_end(RoseProgram &&block) { + assert(!prog.empty()); + assert(prog.back()->code() == ROSE_INSTR_END); + + if (block.empty()) { + return; + } + + insert(prev(prog.end()), move(block)); +} + +void RoseProgram::add_block(RoseProgram &&block) { + assert(!prog.empty()); + assert(prog.back()->code() == ROSE_INSTR_END); + + if (block.empty()) { + return; + } + + // Replace pointers to the current END with pointers to the first + // instruction in the new sequence. + const RoseInstruction *end_ptr = end_instruction(); + prog.pop_back(); + update_targets(prog.begin(), prog.end(), end_ptr, + block.prog.front().get()); + prog.insert(prog.end(), make_move_iterator(block.prog.begin()), + make_move_iterator(block.prog.end())); } bytecode_ptr writeProgram(RoseEngineBlob &blob, @@ -657,6 +184,15 @@ bytecode_ptr writeProgram(RoseEngineBlob &blob, return bytecode; } +size_t RoseProgramHash::operator()(const RoseProgram &program) const { + size_t v = 0; + for (const auto &ri : program) { + assert(ri); + boost::hash_combine(v, ri->hash()); + } + return v; +} + bool RoseProgramEquivalence::operator()(const RoseProgram &prog1, const RoseProgram &prog2) const { if (prog1.size() != prog2.size()) { diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index 9c74d488..c25aab61 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -31,2121 +31,19 @@ #include "rose_build_impl.h" #include "rose_program.h" -#include "som/som_operation.h" #include "util/bytecode_ptr.h" -#include "util/container.h" #include "util/hash.h" #include "util/make_unique.h" #include "util/ue2_containers.h" -#include "util/ue2string.h" -#include -#include #include -#include #include namespace ue2 { class RoseEngineBlob; - -/** - * \brief Abstract base class representing a single Rose instruction. - */ -class RoseInstruction { -public: - virtual ~RoseInstruction(); - - /** \brief Opcode used for the instruction in the bytecode. */ - virtual RoseInstructionCode code() const = 0; - - /** - * \brief Simple hash used for program equivalence. - * - * Note that pointers (jumps, for example) should not be used when - * calculating the hash: they will be converted to instruction offsets when - * compared later. - */ - virtual size_t hash() const = 0; - - /** \brief Length of the bytecode instruction in bytes. */ - virtual size_t byte_length() const = 0; - - using OffsetMap = unordered_map; - - /** - * \brief Writes a concrete implementation of this instruction. - * - * Other data that this instruction depends on is written directly into the - * blob, while the instruction structure itself (of size given by - * the byte_length() function) is written to dest. - */ - virtual void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const = 0; - - /** - * \brief Update a target pointer. - * - * If this instruction contains any reference to the old target, replace it - * with the new one. - */ - virtual void update_target(const RoseInstruction *old_target, - const RoseInstruction *new_target) = 0; - - /** - * \brief True if these instructions are equivalent within their own - * programs. - * - * Checks that any pointers to other instructions point to the same - * offsets. - */ - bool equiv(const RoseInstruction &other, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return equiv_impl(other, offsets, other_offsets); - } - -private: - virtual bool equiv_impl(const RoseInstruction &other, - const OffsetMap &offsets, - const OffsetMap &other_offsets) const = 0; -}; - -/** - * \brief Templated implementation class to handle boring boilerplate code. - */ -template -class RoseInstrBase : public RoseInstruction { -protected: - static constexpr RoseInstructionCode opcode = Opcode; - using impl_type = ImplType; - -public: - RoseInstructionCode code() const override { return opcode; } - - size_t byte_length() const override { - return sizeof(impl_type); - } - - /** - * Note: this implementation simply zeroes the destination region and - * writes in the correct opcode. This is sufficient for trivial - * instructions, but instructions with data members will want to override - * it. - */ - void write(void *dest, RoseEngineBlob &, - const RoseInstruction::OffsetMap &) const override { - assert(dest != nullptr); - assert(ISALIGNED_N(dest, ROSE_INSTR_MIN_ALIGN)); - - impl_type *inst = static_cast(dest); - memset(inst, 0, sizeof(impl_type)); - inst->code = verify_u8(opcode); - } - -private: - bool equiv_impl(const RoseInstruction &other, const OffsetMap &offsets, - const OffsetMap &other_offsets) const override { - const auto *ri_that = dynamic_cast(&other); - if (!ri_that) { - return false; - } - const auto *ri_this = dynamic_cast(this); - assert(ri_this); - return ri_this->equiv_to(*ri_that, offsets, other_offsets); - } -}; - -/** - * \brief Refinement of RoseInstrBase to use for instructions that have - * just a single target member, called "target". - */ -template -class RoseInstrBaseOneTarget - : public RoseInstrBase { -public: - void update_target(const RoseInstruction *old_target, - const RoseInstruction *new_target) override { - RoseInstrType *ri = dynamic_cast(this); - assert(ri); - if (ri->target == old_target) { - ri->target = new_target; - } - } -}; - -/** - * \brief Refinement of RoseInstrBase to use for instructions that have no - * targets. - */ -template -class RoseInstrBaseNoTargets - : public RoseInstrBase { -public: - void update_target(const RoseInstruction *, - const RoseInstruction *) override {} -}; - -/** - * \brief Refinement of RoseInstrBaseNoTargets to use for instructions that - * have no members at all, just an opcode. - */ -template -class RoseInstrBaseTrivial - : public RoseInstrBaseNoTargets { -public: - virtual bool operator==(const RoseInstrType &) const { return true; } - - size_t hash() const override { - return boost::hash_value(static_cast(Opcode)); - } - - bool equiv_to(const RoseInstrType &, const RoseInstruction::OffsetMap &, - const RoseInstruction::OffsetMap &) const { - return true; - } -}; - -//// -//// Concrete implementation classes start here. -//// - -class RoseInstrAnchoredDelay - : public RoseInstrBaseOneTarget { -public: - rose_group groups; - u32 anch_id; - const RoseInstruction *target; - - RoseInstrAnchoredDelay(rose_group groups_in, u32 anch_id_in, - const RoseInstruction *target_in) - : groups(groups_in), anch_id(anch_id_in), target(target_in) {} - - bool operator==(const RoseInstrAnchoredDelay &ri) const { - return groups == ri.groups && anch_id == ri.anch_id - && target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), groups, anch_id); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrAnchoredDelay &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return groups == ri.groups && anch_id == ri.anch_id - && offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckLitEarly - : public RoseInstrBaseOneTarget { -public: - u32 min_offset; - const RoseInstruction *target; - - RoseInstrCheckLitEarly(u32 min_offset_in, const RoseInstruction *target_in) - : min_offset(min_offset_in), target(target_in) {} - - bool operator==(const RoseInstrCheckLitEarly &ri) const { - return min_offset == ri.min_offset && target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), min_offset); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckLitEarly &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return min_offset == ri.min_offset && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckGroups - : public RoseInstrBaseNoTargets { -public: - rose_group groups; - - explicit RoseInstrCheckGroups(rose_group groups_in) : groups(groups_in) {} - - bool operator==(const RoseInstrCheckGroups &ri) const { - return groups == ri.groups; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), groups); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckGroups &ri, const OffsetMap &, - const OffsetMap &) const { - return groups == ri.groups; - } -}; - -class RoseInstrCheckOnlyEod - : public RoseInstrBaseOneTarget { -public: - const RoseInstruction *target; - - explicit RoseInstrCheckOnlyEod(const RoseInstruction *target_in) - : target(target_in) {} - - bool operator==(const RoseInstrCheckOnlyEod &ri) const { - return target == ri.target; - } - - size_t hash() const override { - return boost::hash_value(static_cast(opcode)); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckOnlyEod &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckBounds - : public RoseInstrBaseOneTarget { -public: - u64a min_bound; - u64a max_bound; - const RoseInstruction *target; - - RoseInstrCheckBounds(u64a min, u64a max, const RoseInstruction *target_in) - : min_bound(min), max_bound(max), target(target_in) {} - - bool operator==(const RoseInstrCheckBounds &ri) const { - return min_bound == ri.min_bound && max_bound == ri.max_bound && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), min_bound, max_bound); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckBounds &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return min_bound == ri.min_bound && max_bound == ri.max_bound && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckNotHandled - : public RoseInstrBaseOneTarget { -public: - u32 key; - const RoseInstruction *target; - - RoseInstrCheckNotHandled(u32 key_in, const RoseInstruction *target_in) - : key(key_in), target(target_in) {} - - bool operator==(const RoseInstrCheckNotHandled &ri) const { - return key == ri.key && target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), key); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckNotHandled &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return key == ri.key && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckSingleLookaround - : public RoseInstrBaseOneTarget { -public: - s8 offset; - u32 reach_index; - const RoseInstruction *target; - - RoseInstrCheckSingleLookaround(s8 offset_in, u32 reach_index_in, - const RoseInstruction *target_in) - : offset(offset_in), reach_index(reach_index_in), target(target_in) {} - - bool operator==(const RoseInstrCheckSingleLookaround &ri) const { - return offset == ri.offset && reach_index == ri.reach_index && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), offset, reach_index); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckSingleLookaround &ri, - const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return offset == ri.offset && reach_index == ri.reach_index && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckLookaround - : public RoseInstrBaseOneTarget { -public: - u32 look_index; - u32 reach_index; - u32 count; - const RoseInstruction *target; - - RoseInstrCheckLookaround(u32 look_index_in, u32 reach_index_in, - u32 count_in, const RoseInstruction *target_in) - : look_index(look_index_in), reach_index(reach_index_in), - count(count_in), target(target_in) {} - - bool operator==(const RoseInstrCheckLookaround &ri) const { - return look_index == ri.look_index && reach_index == ri.reach_index && - count == ri.count && target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), look_index, reach_index, - count); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckLookaround &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return look_index == ri.look_index && reach_index == ri.reach_index && - count == ri.count && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckMask - : public RoseInstrBaseOneTarget { -public: - u64a and_mask; - u64a cmp_mask; - u64a neg_mask; - s32 offset; - const RoseInstruction *target; - - RoseInstrCheckMask(u64a and_mask_in, u64a cmp_mask_in, u64a neg_mask_in, - s32 offset_in, const RoseInstruction *target_in) - : and_mask(and_mask_in), cmp_mask(cmp_mask_in), neg_mask(neg_mask_in), - offset(offset_in), target(target_in) {} - - bool operator==(const RoseInstrCheckMask &ri) const { - return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), and_mask, cmp_mask, neg_mask, - offset); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckMask &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckMask32 - : public RoseInstrBaseOneTarget { -public: - std::array and_mask; - std::array cmp_mask; - u32 neg_mask; - s32 offset; - const RoseInstruction *target; - - RoseInstrCheckMask32(std::array and_mask_in, - std::array cmp_mask_in, u32 neg_mask_in, - s32 offset_in, const RoseInstruction *target_in) - : and_mask(std::move(and_mask_in)), cmp_mask(std::move(cmp_mask_in)), - neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} - - bool operator==(const RoseInstrCheckMask32 &ri) const { - return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), and_mask, cmp_mask, neg_mask, - offset); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckMask32 &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckByte - : public RoseInstrBaseOneTarget { -public: - u8 and_mask; - u8 cmp_mask; - u8 negation; - s32 offset; - const RoseInstruction *target; - - RoseInstrCheckByte(u8 and_mask_in, u8 cmp_mask_in, u8 negation_in, - s32 offset_in, const RoseInstruction *target_in) - : and_mask(and_mask_in), cmp_mask(cmp_mask_in), negation(negation_in), - offset(offset_in), target(target_in) {} - - bool operator==(const RoseInstrCheckByte &ri) const { - return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && - negation == ri.negation && offset == ri.offset && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), and_mask, cmp_mask, negation, - offset); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckByte &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && - negation == ri.negation && offset == ri.offset && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckShufti16x8 - : public RoseInstrBaseOneTarget { -public: - std::array nib_mask; - std::array bucket_select_mask; - u32 neg_mask; - s32 offset; - const RoseInstruction *target; - - RoseInstrCheckShufti16x8(std::array nib_mask_in, - std::array bucket_select_mask_in, - u32 neg_mask_in, s32 offset_in, - const RoseInstruction *target_in) - : nib_mask(std::move(nib_mask_in)), - bucket_select_mask(std::move(bucket_select_mask_in)), - neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} - - bool operator==(const RoseInstrCheckShufti16x8 &ri) const { - return nib_mask == ri.nib_mask && - bucket_select_mask == ri.bucket_select_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), nib_mask, - bucket_select_mask, neg_mask, offset); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckShufti16x8 &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return nib_mask == ri.nib_mask && - bucket_select_mask == ri.bucket_select_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckShufti32x8 - : public RoseInstrBaseOneTarget { -public: - std::array hi_mask; - std::array lo_mask; - std::array bucket_select_mask; - u32 neg_mask; - s32 offset; - const RoseInstruction *target; - - RoseInstrCheckShufti32x8(std::array hi_mask_in, - std::array lo_mask_in, - std::array bucket_select_mask_in, - u32 neg_mask_in, s32 offset_in, - const RoseInstruction *target_in) - : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), - bucket_select_mask(std::move(bucket_select_mask_in)), - neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} - - bool operator==(const RoseInstrCheckShufti32x8 &ri) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask == ri.bucket_select_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), hi_mask, lo_mask, - bucket_select_mask, neg_mask, offset); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckShufti32x8 &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask == ri.bucket_select_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckShufti16x16 - : public RoseInstrBaseOneTarget { -public: - std::array hi_mask; - std::array lo_mask; - std::array bucket_select_mask; - u32 neg_mask; - s32 offset; - const RoseInstruction *target; - - RoseInstrCheckShufti16x16(std::array hi_mask_in, - std::array lo_mask_in, - std::array bucket_select_mask_in, - u32 neg_mask_in, s32 offset_in, - const RoseInstruction *target_in) - : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), - bucket_select_mask(std::move(bucket_select_mask_in)), - neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} - - bool operator==(const RoseInstrCheckShufti16x16 &ri) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask == ri.bucket_select_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), hi_mask, lo_mask, - bucket_select_mask, neg_mask, offset); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckShufti16x16 &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask == ri.bucket_select_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckShufti32x16 - : public RoseInstrBaseOneTarget { -public: - std::array hi_mask; - std::array lo_mask; - std::array bucket_select_mask_hi; - std::array bucket_select_mask_lo; - u32 neg_mask; - s32 offset; - const RoseInstruction *target; - - RoseInstrCheckShufti32x16(std::array hi_mask_in, - std::array lo_mask_in, - std::array bucket_select_mask_hi_in, - std::array bucket_select_mask_lo_in, - u32 neg_mask_in, s32 offset_in, - const RoseInstruction *target_in) - : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), - bucket_select_mask_hi(std::move(bucket_select_mask_hi_in)), - bucket_select_mask_lo(std::move(bucket_select_mask_lo_in)), - neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} - - bool operator==(const RoseInstrCheckShufti32x16 &ri) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask_hi == ri.bucket_select_mask_hi && - bucket_select_mask_lo == ri.bucket_select_mask_lo && - neg_mask == ri.neg_mask && offset == ri.offset && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), hi_mask, lo_mask, - bucket_select_mask_hi, bucket_select_mask_lo, - neg_mask, offset); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckShufti32x16 &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask_hi == ri.bucket_select_mask_hi && - bucket_select_mask_lo == ri.bucket_select_mask_lo && - neg_mask == ri.neg_mask && offset == ri.offset && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckInfix - : public RoseInstrBaseOneTarget { -public: - u32 queue; - u32 lag; - ReportID report; - const RoseInstruction *target; - - RoseInstrCheckInfix(u32 queue_in, u32 lag_in, ReportID report_in, - const RoseInstruction *target_in) - : queue(queue_in), lag(lag_in), report(report_in), target(target_in) {} - - bool operator==(const RoseInstrCheckInfix &ri) const { - return queue == ri.queue && lag == ri.lag && report == ri.report && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), queue, lag, report); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckInfix &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return queue == ri.queue && lag == ri.lag && report == ri.report && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckPrefix - : public RoseInstrBaseOneTarget { -public: - u32 queue; - u32 lag; - ReportID report; - const RoseInstruction *target; - - RoseInstrCheckPrefix(u32 queue_in, u32 lag_in, ReportID report_in, - const RoseInstruction *target_in) - : queue(queue_in), lag(lag_in), report(report_in), target(target_in) {} - - bool operator==(const RoseInstrCheckPrefix &ri) const { - return queue == ri.queue && lag == ri.lag && report == ri.report && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), queue, lag, report); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckPrefix &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return queue == ri.queue && lag == ri.lag && report == ri.report && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrPushDelayed - : public RoseInstrBaseNoTargets { -public: - u8 delay; - u32 index; - - RoseInstrPushDelayed(u8 delay_in, u32 index_in) - : delay(delay_in), index(index_in) {} - - bool operator==(const RoseInstrPushDelayed &ri) const { - return delay == ri.delay && index == ri.index; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), delay, index); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrPushDelayed &ri, const OffsetMap &, - const OffsetMap &) const { - return delay == ri.delay && index == ri.index; - } -}; - -class RoseInstrCatchUp - : public RoseInstrBaseTrivial { -public: - ~RoseInstrCatchUp() override; -}; - -class RoseInstrCatchUpMpv - : public RoseInstrBaseTrivial { -public: - ~RoseInstrCatchUpMpv() override; -}; - -class RoseInstrSomAdjust - : public RoseInstrBaseNoTargets { -public: - u32 distance; - - explicit RoseInstrSomAdjust(u32 distance_in) : distance(distance_in) {} - - bool operator==(const RoseInstrSomAdjust &ri) const { - return distance == ri.distance; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), distance); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrSomAdjust &ri, const OffsetMap &, - const OffsetMap &) const { - return distance == ri.distance; - } -}; - -class RoseInstrSomLeftfix - : public RoseInstrBaseNoTargets { -public: - u32 queue; - u32 lag; - - RoseInstrSomLeftfix(u32 queue_in, u32 lag_in) - : queue(queue_in), lag(lag_in) {} - - bool operator==(const RoseInstrSomLeftfix &ri) const { - return queue == ri.queue && lag == ri.lag; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), queue, lag); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrSomLeftfix &ri, const OffsetMap &, - const OffsetMap &) const { - return queue == ri.queue && lag == ri.lag; - } -}; - -class RoseInstrSomFromReport - : public RoseInstrBaseNoTargets { -public: - som_operation som; - - RoseInstrSomFromReport() { - std::memset(&som, 0, sizeof(som)); - } - - bool operator==(const RoseInstrSomFromReport &ri) const { - return std::memcmp(&som, &ri.som, sizeof(som)) == 0; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), som.type, som.onmatch); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrSomFromReport &ri, const OffsetMap &, - const OffsetMap &) const { - return std::memcmp(&som, &ri.som, sizeof(som)) == 0; - } -}; - -class RoseInstrSomZero - : public RoseInstrBaseTrivial { -public: - ~RoseInstrSomZero() override; -}; - -class RoseInstrTriggerInfix - : public RoseInstrBaseNoTargets { -public: - u8 cancel; - u32 queue; - u32 event; - - RoseInstrTriggerInfix(u8 cancel_in, u32 queue_in, u32 event_in) - : cancel(cancel_in), queue(queue_in), event(event_in) {} - - bool operator==(const RoseInstrTriggerInfix &ri) const { - return cancel == ri.cancel && queue == ri.queue && event == ri.event; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), cancel, queue, event); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrTriggerInfix &ri, const OffsetMap &, - const OffsetMap &) const { - return cancel == ri.cancel && queue == ri.queue && event == ri.event; - } -}; - -class RoseInstrTriggerSuffix - : public RoseInstrBaseNoTargets { -public: - u32 queue; - u32 event; - - RoseInstrTriggerSuffix(u32 queue_in, u32 event_in) - : queue(queue_in), event(event_in) {} - - bool operator==(const RoseInstrTriggerSuffix &ri) const { - return queue == ri.queue && event == ri.event; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), queue, event); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrTriggerSuffix &ri, const OffsetMap &, - const OffsetMap &) const { - return queue == ri.queue && event == ri.event; - } -}; - -class RoseInstrDedupe - : public RoseInstrBaseOneTarget { -public: - u8 quash_som; - u32 dkey; - s32 offset_adjust; - const RoseInstruction *target; - - RoseInstrDedupe(u8 quash_som_in, u32 dkey_in, s32 offset_adjust_in, - const RoseInstruction *target_in) - : quash_som(quash_som_in), dkey(dkey_in), - offset_adjust(offset_adjust_in), target(target_in) {} - - bool operator==(const RoseInstrDedupe &ri) const { - return quash_som == ri.quash_som && dkey == ri.dkey && - offset_adjust == ri.offset_adjust && target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), quash_som, dkey, - offset_adjust); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrDedupe &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return quash_som == ri.quash_som && dkey == ri.dkey && - offset_adjust == ri.offset_adjust && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrDedupeSom - : public RoseInstrBaseOneTarget { -public: - u8 quash_som; - u32 dkey; - s32 offset_adjust; - const RoseInstruction *target; - - RoseInstrDedupeSom(u8 quash_som_in, u32 dkey_in, s32 offset_adjust_in, - const RoseInstruction *target_in) - : quash_som(quash_som_in), dkey(dkey_in), - offset_adjust(offset_adjust_in), target(target_in) {} - - bool operator==(const RoseInstrDedupeSom &ri) const { - return quash_som == ri.quash_som && dkey == ri.dkey && - offset_adjust == ri.offset_adjust && target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), quash_som, dkey, - offset_adjust); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrDedupeSom &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return quash_som == ri.quash_som && dkey == ri.dkey && - offset_adjust == ri.offset_adjust && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrReportChain - : public RoseInstrBaseNoTargets { -public: - u32 event; - u64a top_squash_distance; - - RoseInstrReportChain(u32 event_in, u32 top_squash_distance_in) - : event(event_in), top_squash_distance(top_squash_distance_in) {} - - bool operator==(const RoseInstrReportChain &ri) const { - return event == ri.event && - top_squash_distance == ri.top_squash_distance; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), event, top_squash_distance); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrReportChain &ri, const OffsetMap &, - const OffsetMap &) const { - return event == ri.event && - top_squash_distance == ri.top_squash_distance; - } -}; - -class RoseInstrReportSomInt - : public RoseInstrBaseNoTargets { -public: - som_operation som; - - RoseInstrReportSomInt() { - std::memset(&som, 0, sizeof(som)); - } - - bool operator==(const RoseInstrReportSomInt &ri) const { - return std::memcmp(&som, &ri.som, sizeof(som)) == 0; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), som.type, som.onmatch); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrReportSomInt &ri, const OffsetMap &, - const OffsetMap &) const { - return std::memcmp(&som, &ri.som, sizeof(som)) == 0; - } -}; - -class RoseInstrReportSomAware - : public RoseInstrBaseNoTargets { -public: - som_operation som; - - RoseInstrReportSomAware() { - std::memset(&som, 0, sizeof(som)); - } - - bool operator==(const RoseInstrReportSomAware &ri) const { - return std::memcmp(&som, &ri.som, sizeof(som)) == 0; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), som.type, som.onmatch); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrReportSomAware &ri, const OffsetMap &, - const OffsetMap &) const { - return std::memcmp(&som, &ri.som, sizeof(som)) == 0; - } -}; - -class RoseInstrReport - : public RoseInstrBaseNoTargets { -public: - ReportID onmatch; - s32 offset_adjust; - - RoseInstrReport(ReportID onmatch_in, s32 offset_adjust_in) - : onmatch(onmatch_in), offset_adjust(offset_adjust_in) {} - - bool operator==(const RoseInstrReport &ri) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), onmatch, offset_adjust); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrReport &ri, const OffsetMap &, - const OffsetMap &) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; - } -}; - -class RoseInstrReportExhaust - : public RoseInstrBaseNoTargets { -public: - ReportID onmatch; - s32 offset_adjust; - u32 ekey; - - RoseInstrReportExhaust(ReportID onmatch_in, s32 offset_adjust_in, - u32 ekey_in) - : onmatch(onmatch_in), offset_adjust(offset_adjust_in), ekey(ekey_in) {} - - bool operator==(const RoseInstrReportExhaust &ri) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && - ekey == ri.ekey; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), onmatch, offset_adjust, ekey); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrReportExhaust &ri, const OffsetMap &, - const OffsetMap &) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && - ekey == ri.ekey; - } -}; - -class RoseInstrReportSom - : public RoseInstrBaseNoTargets { -public: - ReportID onmatch; - s32 offset_adjust; - - RoseInstrReportSom(ReportID onmatch_in, s32 offset_adjust_in) - : onmatch(onmatch_in), offset_adjust(offset_adjust_in) {} - - bool operator==(const RoseInstrReportSom &ri) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), onmatch, offset_adjust); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrReportSom &ri, const OffsetMap &, - const OffsetMap &) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; - } -}; - -class RoseInstrReportSomExhaust - : public RoseInstrBaseNoTargets { -public: - ReportID onmatch; - s32 offset_adjust; - u32 ekey; - - RoseInstrReportSomExhaust(ReportID onmatch_in, s32 offset_adjust_in, - u32 ekey_in) - : onmatch(onmatch_in), offset_adjust(offset_adjust_in), ekey(ekey_in) {} - - bool operator==(const RoseInstrReportSomExhaust &ri) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && - ekey == ri.ekey; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), onmatch, offset_adjust, ekey); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrReportSomExhaust &ri, const OffsetMap &, - const OffsetMap &) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && - ekey == ri.ekey; - } -}; - -class RoseInstrDedupeAndReport - : public RoseInstrBaseOneTarget { -public: - u8 quash_som; - u32 dkey; - ReportID onmatch; - s32 offset_adjust; - const RoseInstruction *target; - - RoseInstrDedupeAndReport(u8 quash_som_in, u32 dkey_in, ReportID onmatch_in, - s32 offset_adjust_in, - const RoseInstruction *target_in) - : quash_som(quash_som_in), dkey(dkey_in), onmatch(onmatch_in), - offset_adjust(offset_adjust_in), target(target_in) {} - - bool operator==(const RoseInstrDedupeAndReport &ri) const { - return quash_som == ri.quash_som && dkey == ri.dkey && - onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), quash_som, dkey, onmatch, - offset_adjust); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrDedupeAndReport &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return quash_som == ri.quash_som && dkey == ri.dkey && - onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrFinalReport - : public RoseInstrBaseNoTargets { -public: - ReportID onmatch; - s32 offset_adjust; - - RoseInstrFinalReport(ReportID onmatch_in, s32 offset_adjust_in) - : onmatch(onmatch_in), offset_adjust(offset_adjust_in) {} - - bool operator==(const RoseInstrFinalReport &ri) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), onmatch, offset_adjust); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrFinalReport &ri, const OffsetMap &, - const OffsetMap &) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; - } -}; - -class RoseInstrCheckExhausted - : public RoseInstrBaseOneTarget { -public: - u32 ekey; - const RoseInstruction *target; - - RoseInstrCheckExhausted(u32 ekey_in, const RoseInstruction *target_in) - : ekey(ekey_in), target(target_in) {} - - bool operator==(const RoseInstrCheckExhausted &ri) const { - return ekey == ri.ekey && target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), ekey); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckExhausted &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return ekey == ri.ekey && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckMinLength - : public RoseInstrBaseOneTarget { -public: - s32 end_adj; - u64a min_length; - const RoseInstruction *target; - - RoseInstrCheckMinLength(s32 end_adj_in, u64a min_length_in, - const RoseInstruction *target_in) - : end_adj(end_adj_in), min_length(min_length_in), target(target_in) {} - - bool operator==(const RoseInstrCheckMinLength &ri) const { - return end_adj == ri.end_adj && min_length == ri.min_length && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), end_adj, min_length); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckMinLength &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return end_adj == ri.end_adj && min_length == ri.min_length && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrSetState - : public RoseInstrBaseNoTargets { -public: - u32 index; - - explicit RoseInstrSetState(u32 index_in) : index(index_in) {} - - bool operator==(const RoseInstrSetState &ri) const { - return index == ri.index; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), index); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrSetState &ri, const OffsetMap &, - const OffsetMap &) const { - return index == ri.index; - } -}; - -class RoseInstrSetGroups - : public RoseInstrBaseNoTargets { -public: - rose_group groups; - - explicit RoseInstrSetGroups(rose_group groups_in) : groups(groups_in) {} - - bool operator==(const RoseInstrSetGroups &ri) const { - return groups == ri.groups; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), groups); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrSetGroups &ri, const OffsetMap &, - const OffsetMap &) const { - return groups == ri.groups; - } -}; - -class RoseInstrSquashGroups - : public RoseInstrBaseNoTargets { -public: - rose_group groups; - - explicit RoseInstrSquashGroups(rose_group groups_in) : groups(groups_in) {} - - bool operator==(const RoseInstrSquashGroups &ri) const { - return groups == ri.groups; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), groups); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrSquashGroups &ri, const OffsetMap &, - const OffsetMap &) const { - return groups == ri.groups; - } -}; - -class RoseInstrCheckState - : public RoseInstrBaseOneTarget { -public: - u32 index; - const RoseInstruction *target; - - RoseInstrCheckState(u32 index_in, const RoseInstruction *target_in) - : index(index_in), target(target_in) {} - - bool operator==(const RoseInstrCheckState &ri) const { - return index == ri.index && target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), index); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckState &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return index == ri.index && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrSparseIterBegin - : public RoseInstrBase { -public: - u32 num_keys; // total number of multibit keys - std::vector> jump_table; - const RoseInstruction *target; - - RoseInstrSparseIterBegin(u32 num_keys_in, - const RoseInstruction *target_in) - : num_keys(num_keys_in), target(target_in) {} - - bool operator==(const RoseInstrSparseIterBegin &ri) const { - return num_keys == ri.num_keys && jump_table == ri.jump_table && - target == ri.target; - } - - size_t hash() const override { - size_t v = hash_all(static_cast(opcode), num_keys); - for (const u32 &key : jump_table | boost::adaptors::map_keys) { - boost::hash_combine(v, key); - } - return v; - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - void update_target(const RoseInstruction *old_target, - const RoseInstruction *new_target) override { - if (target == old_target) { - target = new_target; - } - for (auto &jump : jump_table) { - if (jump.second == old_target) { - jump.second = new_target; - } - } - } - - bool equiv_to(const RoseInstrSparseIterBegin &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - if (iter_offset != ri.iter_offset || - offsets.at(target) != other_offsets.at(ri.target)) { - return false; - } - if (jump_table.size() != ri.jump_table.size()) { - return false; - } - auto it1 = jump_table.begin(), it2 = ri.jump_table.begin(); - for (; it1 != jump_table.end(); ++it1, ++it2) { - if (it1->first != it2->first) { - return false; - } - if (offsets.at(it1->second) != other_offsets.at(it2->second)) { - return false; - } - } - return true; - } - -private: - friend class RoseInstrSparseIterNext; - - // These variables allow us to use the same multibit iterator and jump - // table in subsequent SPARSE_ITER_NEXT write() operations. - mutable bool is_written = false; - mutable u32 iter_offset = 0; - mutable u32 jump_table_offset = 0; -}; - -class RoseInstrSparseIterNext - : public RoseInstrBase { -public: - u32 state; - const RoseInstrSparseIterBegin *begin; - const RoseInstruction *target; - - RoseInstrSparseIterNext(u32 state_in, - const RoseInstrSparseIterBegin *begin_in, - const RoseInstruction *target_in) - : state(state_in), begin(begin_in), target(target_in) {} - - bool operator==(const RoseInstrSparseIterNext &ri) const { - return state == ri.state && begin == ri.begin && target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), state); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - void update_target(const RoseInstruction *old_target, - const RoseInstruction *new_target) override { - if (target == old_target) { - target = new_target; - } - if (begin == old_target) { - assert(new_target->code() == ROSE_INSTR_SPARSE_ITER_BEGIN); - begin = static_cast(new_target); - } - } - - bool equiv_to(const RoseInstrSparseIterNext &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return state == ri.state && - offsets.at(begin) == other_offsets.at(ri.begin) && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrSparseIterAny - : public RoseInstrBaseOneTarget { -public: - u32 num_keys; // total number of multibit keys - std::vector keys; - const RoseInstruction *target; - - RoseInstrSparseIterAny(u32 num_keys_in, std::vector keys_in, - const RoseInstruction *target_in) - : num_keys(num_keys_in), keys(std::move(keys_in)), target(target_in) {} - - bool operator==(const RoseInstrSparseIterAny &ri) const { - return num_keys == ri.num_keys && keys == ri.keys && - target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), num_keys, keys); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrSparseIterAny &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return num_keys == ri.num_keys && keys == ri.keys && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrEnginesEod - : public RoseInstrBaseNoTargets { -public: - u32 iter_offset; - - explicit RoseInstrEnginesEod(u32 iter_in) : iter_offset(iter_in) {} - - bool operator==(const RoseInstrEnginesEod &ri) const { - return iter_offset == ri.iter_offset; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), iter_offset); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrEnginesEod &ri, const OffsetMap &, - const OffsetMap &) const { - return iter_offset == ri.iter_offset; - } -}; - -class RoseInstrSuffixesEod - : public RoseInstrBaseTrivial { -public: - ~RoseInstrSuffixesEod() override; -}; - -class RoseInstrMatcherEod : public RoseInstrBaseTrivial { -public: - ~RoseInstrMatcherEod() override; -}; - -class RoseInstrCheckLongLit - : public RoseInstrBaseOneTarget { -public: - std::string literal; - const RoseInstruction *target; - - RoseInstrCheckLongLit(std::string literal_in, - const RoseInstruction *target_in) - : literal(std::move(literal_in)), target(target_in) {} - - bool operator==(const RoseInstrCheckLongLit &ri) const { - return literal == ri.literal && target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), literal); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckLongLit &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return literal == ri.literal && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckLongLitNocase - : public RoseInstrBaseOneTarget { -public: - std::string literal; - const RoseInstruction *target; - - RoseInstrCheckLongLitNocase(std::string literal_in, - const RoseInstruction *target_in) - : literal(std::move(literal_in)), target(target_in) { - upperString(literal); - } - - bool operator==(const RoseInstrCheckLongLitNocase &ri) const { - return literal == ri.literal && target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), literal); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckLongLitNocase &ri, - const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return literal == ri.literal && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckMedLit - : public RoseInstrBaseOneTarget { -public: - std::string literal; - const RoseInstruction *target; - - explicit RoseInstrCheckMedLit(std::string literal_in, - const RoseInstruction *target_in) - : literal(std::move(literal_in)), target(target_in) {} - - bool operator==(const RoseInstrCheckMedLit &ri) const { - return literal == ri.literal && target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), literal); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckMedLit &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return literal == ri.literal && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckMedLitNocase - : public RoseInstrBaseOneTarget { -public: - std::string literal; - const RoseInstruction *target; - - explicit RoseInstrCheckMedLitNocase(std::string literal_in, - const RoseInstruction *target_in) - : literal(std::move(literal_in)), target(target_in) { - upperString(literal); - } - - bool operator==(const RoseInstrCheckMedLitNocase &ri) const { - return literal == ri.literal && target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), literal); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckMedLitNocase &ri, - const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return literal == ri.literal && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrClearWorkDone - : public RoseInstrBaseTrivial { -public: - ~RoseInstrClearWorkDone() override; -}; - -class RoseInstrMultipathLookaround - : public RoseInstrBaseOneTarget { -public: - u32 look_index; - u32 reach_index; - u32 count; - s32 last_start; - std::array start_mask; - const RoseInstruction *target; - - RoseInstrMultipathLookaround(u32 look_index_in, u32 reach_index_in, - u32 count_in, s32 last_start_in, - std::array start_mask_in, - const RoseInstruction *target_in) - : look_index(look_index_in), reach_index(reach_index_in), - count(count_in), last_start(last_start_in), - start_mask(std::move(start_mask_in)), target(target_in) {} - - bool operator==(const RoseInstrMultipathLookaround &ri) const { - return look_index == ri.look_index && reach_index == ri.reach_index && - count == ri.count && last_start == ri.last_start && - start_mask == ri.start_mask && target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), look_index, reach_index, - count, last_start, start_mask); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrMultipathLookaround &ri, - const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return look_index == ri.look_index && reach_index == ri.reach_index && - count == ri.count && last_start == ri.last_start && - start_mask == ri.start_mask && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckMultipathShufti16x8 - : public RoseInstrBaseOneTarget { -public: - std::array nib_mask; - std::array bucket_select_mask; - std::array data_select_mask; - u16 hi_bits_mask; - u16 lo_bits_mask; - u16 neg_mask; - s32 base_offset; - s32 last_start; - const RoseInstruction *target; - - RoseInstrCheckMultipathShufti16x8(std::array nib_mask_in, - std::array bucket_select_mask_in, - std::array data_select_mask_in, - u16 hi_bits_mask_in, u16 lo_bits_mask_in, - u16 neg_mask_in, s32 base_offset_in, - s32 last_start_in, - const RoseInstruction *target_in) - : nib_mask(std::move(nib_mask_in)), - bucket_select_mask(std::move(bucket_select_mask_in)), - data_select_mask(std::move(data_select_mask_in)), - hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), - neg_mask(neg_mask_in), base_offset(base_offset_in), - last_start(last_start_in), target(target_in) {} - - bool operator==(const RoseInstrCheckMultipathShufti16x8 &ri) const { - return nib_mask == ri.nib_mask && - bucket_select_mask == ri.bucket_select_mask && - data_select_mask == ri.data_select_mask && - hi_bits_mask == ri.hi_bits_mask && - lo_bits_mask == ri.lo_bits_mask && - neg_mask == ri.neg_mask && base_offset == ri.base_offset && - last_start == ri.last_start && target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), nib_mask, - bucket_select_mask, data_select_mask, hi_bits_mask, - lo_bits_mask, neg_mask, base_offset, last_start); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckMultipathShufti16x8 &ri, - const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return nib_mask == ri.nib_mask && - bucket_select_mask == ri.bucket_select_mask && - data_select_mask == ri.data_select_mask && - hi_bits_mask == ri.hi_bits_mask && - lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && - base_offset == ri.base_offset && last_start == ri.last_start && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckMultipathShufti32x8 - : public RoseInstrBaseOneTarget { -public: - std::array hi_mask; - std::array lo_mask; - std::array bucket_select_mask; - std::array data_select_mask; - u32 hi_bits_mask; - u32 lo_bits_mask; - u32 neg_mask; - s32 base_offset; - s32 last_start; - const RoseInstruction *target; - - RoseInstrCheckMultipathShufti32x8(std::array hi_mask_in, - std::array lo_mask_in, - std::array bucket_select_mask_in, - std::array data_select_mask_in, - u32 hi_bits_mask_in, u32 lo_bits_mask_in, - u32 neg_mask_in, s32 base_offset_in, - s32 last_start_in, - const RoseInstruction *target_in) - : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), - bucket_select_mask(std::move(bucket_select_mask_in)), - data_select_mask(std::move(data_select_mask_in)), - hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), - neg_mask(neg_mask_in), base_offset(base_offset_in), - last_start(last_start_in), target(target_in) {} - - bool operator==(const RoseInstrCheckMultipathShufti32x8 &ri) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask == ri.bucket_select_mask && - data_select_mask == ri.data_select_mask && - hi_bits_mask == ri.hi_bits_mask && - lo_bits_mask == ri.lo_bits_mask && - neg_mask == ri.neg_mask && base_offset == ri.base_offset && - last_start == ri.last_start && target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), hi_mask, lo_mask, - bucket_select_mask, data_select_mask, hi_bits_mask, - lo_bits_mask, neg_mask, base_offset, last_start); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckMultipathShufti32x8 &ri, - const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask == ri.bucket_select_mask && - data_select_mask == ri.data_select_mask && - hi_bits_mask == ri.hi_bits_mask && - lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && - base_offset == ri.base_offset && last_start == ri.last_start && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckMultipathShufti32x16 - : public RoseInstrBaseOneTarget { -public: - std::array hi_mask; - std::array lo_mask; - std::array bucket_select_mask_hi; - std::array bucket_select_mask_lo; - std::array data_select_mask; - u32 hi_bits_mask; - u32 lo_bits_mask; - u32 neg_mask; - s32 base_offset; - s32 last_start; - const RoseInstruction *target; - - RoseInstrCheckMultipathShufti32x16(std::array hi_mask_in, - std::array lo_mask_in, - std::array bucket_select_mask_hi_in, - std::array bucket_select_mask_lo_in, - std::array data_select_mask_in, - u32 hi_bits_mask_in, u32 lo_bits_mask_in, - u32 neg_mask_in, s32 base_offset_in, - s32 last_start_in, - const RoseInstruction *target_in) - : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), - bucket_select_mask_hi(std::move(bucket_select_mask_hi_in)), - bucket_select_mask_lo(std::move(bucket_select_mask_lo_in)), - data_select_mask(std::move(data_select_mask_in)), - hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), - neg_mask(neg_mask_in), base_offset(base_offset_in), - last_start(last_start_in), target(target_in) {} - - bool operator==(const RoseInstrCheckMultipathShufti32x16 &ri) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask_hi == ri.bucket_select_mask_hi && - bucket_select_mask_lo == ri.bucket_select_mask_lo && - data_select_mask == ri.data_select_mask && - hi_bits_mask == ri.hi_bits_mask && - lo_bits_mask == ri.lo_bits_mask && - neg_mask == ri.neg_mask && base_offset == ri.base_offset && - last_start == ri.last_start && target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), hi_mask, lo_mask, - bucket_select_mask_hi, bucket_select_mask_lo, - data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask, - base_offset, last_start); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckMultipathShufti32x16 &ri, - const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask_hi == ri.bucket_select_mask_hi && - bucket_select_mask_lo == ri.bucket_select_mask_lo && - data_select_mask == ri.data_select_mask && - hi_bits_mask == ri.hi_bits_mask && - lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && - base_offset == ri.base_offset && last_start == ri.last_start && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckMultipathShufti64 - : public RoseInstrBaseOneTarget { -public: - std::array hi_mask; - std::array lo_mask; - std::array bucket_select_mask; - std::array data_select_mask; - u64a hi_bits_mask; - u64a lo_bits_mask; - u64a neg_mask; - s32 base_offset; - s32 last_start; - const RoseInstruction *target; - - RoseInstrCheckMultipathShufti64(std::array hi_mask_in, - std::array lo_mask_in, - std::array bucket_select_mask_in, - std::array data_select_mask_in, - u64a hi_bits_mask_in, u64a lo_bits_mask_in, - u64a neg_mask_in, s32 base_offset_in, - s32 last_start_in, - const RoseInstruction *target_in) - : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), - bucket_select_mask(std::move(bucket_select_mask_in)), - data_select_mask(std::move(data_select_mask_in)), - hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), - neg_mask(neg_mask_in), base_offset(base_offset_in), - last_start(last_start_in), target(target_in) {} - - bool operator==(const RoseInstrCheckMultipathShufti64 &ri) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask == ri.bucket_select_mask && - data_select_mask == ri.data_select_mask && - hi_bits_mask == ri.hi_bits_mask && - lo_bits_mask == ri.lo_bits_mask && - neg_mask == ri.neg_mask && base_offset == ri.base_offset && - last_start == ri.last_start && target == ri.target; - } - - size_t hash() const override { - return hash_all(static_cast(opcode), hi_mask, lo_mask, - bucket_select_mask, data_select_mask, hi_bits_mask, - lo_bits_mask, neg_mask, base_offset, last_start); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckMultipathShufti64 &ri, - const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask == ri.bucket_select_mask && - data_select_mask == ri.data_select_mask && - hi_bits_mask == ri.hi_bits_mask && - lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && - base_offset == ri.base_offset && last_start == ri.last_start && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrEnd - : public RoseInstrBaseTrivial { -public: - ~RoseInstrEnd() override; -}; +class RoseInstruction; /** * \brief Container for a list of program instructions. @@ -2155,16 +53,14 @@ private: std::vector> prog; public: - RoseProgram() { - prog.push_back(make_unique()); - } + RoseProgram(); + ~RoseProgram(); + RoseProgram(const RoseProgram &) = delete; + RoseProgram(RoseProgram &&); + RoseProgram &operator=(const RoseProgram &) = delete; + RoseProgram &operator=(RoseProgram &&); - bool empty() const { - assert(!prog.empty()); - assert(prog.back()->code() == ROSE_INSTR_END); - // Empty if we only have one element, the END instruction. - return std::next(prog.begin()) == prog.end(); - } + bool empty() const; size_t size() const { return prog.size(); } @@ -2188,58 +84,15 @@ public: const_reverse_iterator rend() const { return prog.rend(); } /** \brief Retrieve a pointer to the terminating ROSE_INSTR_END. */ - const RoseInstruction *end_instruction() const { - assert(!prog.empty()); - assert(prog.back()->code() == ROSE_INSTR_END); - - return prog.back().get(); - } + const RoseInstruction *end_instruction() const; static void update_targets(iterator it, iterator it_end, const RoseInstruction *old_target, - const RoseInstruction *new_target) { - assert(old_target && new_target && old_target != new_target); - for (; it != it_end; ++it) { - std::unique_ptr &ri = *it; - assert(ri); - ri->update_target(old_target, new_target); - } - } + const RoseInstruction *new_target); - iterator insert(iterator it, std::unique_ptr ri) { - assert(!prog.empty()); - assert(it != end()); - assert(prog.back()->code() == ROSE_INSTR_END); + iterator insert(iterator it, std::unique_ptr ri); - return prog.insert(it, std::move(ri)); - } - - iterator insert(iterator it, RoseProgram &&block) { - assert(!prog.empty()); - assert(it != end()); - assert(prog.back()->code() == ROSE_INSTR_END); - - if (block.empty()) { - return it; - } - - const RoseInstruction *end_ptr = block.end_instruction(); - assert(end_ptr->code() == ROSE_INSTR_END); - block.prog.pop_back(); - - const RoseInstruction *new_target = it->get(); - update_targets(block.prog.begin(), block.prog.end(), end_ptr, - new_target); - - // Workaround: container insert() for ranges doesn't return an iterator - // in the version of the STL distributed with gcc 4.8. - auto dist = distance(prog.begin(), it); - prog.insert(it, std::make_move_iterator(block.prog.begin()), - std::make_move_iterator(block.prog.end())); - it = prog.begin(); - std::advance(it, dist); - return it; - } + iterator insert(iterator it, RoseProgram &&block); /* Note: takes iterator rather than const_iterator to support toolchains * with pre-C++11 standard libraries (i.e., gcc-4.8). */ @@ -2249,10 +102,7 @@ public: * \brief Adds this instruction to the program just before the terminating * ROSE_INSTR_END. */ - void add_before_end(std::unique_ptr ri) { - assert(!prog.empty()); - insert(std::prev(prog.end()), std::move(ri)); - } + void add_before_end(std::unique_ptr ri); /** * \brief Adds this block to the program just before the terminating @@ -2260,40 +110,14 @@ public: * * Any existing instruction that was jumping to end continues to do so. */ - void add_before_end(RoseProgram &&block) { - assert(!prog.empty()); - assert(prog.back()->code() == ROSE_INSTR_END); - - if (block.empty()) { - return; - } - - insert(std::prev(prog.end()), std::move(block)); - } - + void add_before_end(RoseProgram &&block); /** * \brief Append this program block, replacing our current ROSE_INSTR_END. * * Any existing instruction that was jumping to end, now leads to the newly * added block. */ - void add_block(RoseProgram &&block) { - assert(!prog.empty()); - assert(prog.back()->code() == ROSE_INSTR_END); - - if (block.empty()) { - return; - } - - // Replace pointers to the current END with pointers to the first - // instruction in the new sequence. - const RoseInstruction *end_ptr = end_instruction(); - prog.pop_back(); - update_targets(prog.begin(), prog.end(), end_ptr, - block.prog.front().get()); - prog.insert(prog.end(), std::make_move_iterator(block.prog.begin()), - std::make_move_iterator(block.prog.end())); - } + void add_block(RoseProgram &&block); /** * \brief Replace the instruction pointed to by the given iterator. @@ -2301,13 +125,10 @@ public: template void replace(Iter it, std::unique_ptr ri) { assert(!prog.empty()); - assert(prog.back()->code() == ROSE_INSTR_END); const RoseInstruction *old_ptr = it->get(); *it = move(ri); update_targets(prog.begin(), prog.end(), old_ptr, it->get()); - - assert(prog.back()->code() == ROSE_INSTR_END); } }; @@ -2316,14 +137,7 @@ bytecode_ptr writeProgram(RoseEngineBlob &blob, class RoseProgramHash { public: - size_t operator()(const RoseProgram &program) const { - size_t v = 0; - for (const auto &ri : program) { - assert(ri); - boost::hash_combine(v, ri->hash()); - } - return v; - } + size_t operator()(const RoseProgram &program) const; }; class RoseProgramEquivalence { From 1878b9a857a4353327278a3ee50e5bfb27ec3e76 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 24 Apr 2017 14:40:47 +1000 Subject: [PATCH 273/326] report_manager: use unordered externalIdMap --- src/util/report_manager.cpp | 5 +++-- src/util/report_manager.h | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/util/report_manager.cpp b/src/util/report_manager.cpp index 9c72da07..6f6bd0e8 100644 --- a/src/util/report_manager.cpp +++ b/src/util/report_manager.cpp @@ -174,8 +174,9 @@ u32 ReportManager::getDkey(const Report &r) const { void ReportManager::registerExtReport(ReportID id, const external_report_info &ext) { - if (contains(externalIdMap, id)) { - const external_report_info &eri = externalIdMap.at(id); + auto it = externalIdMap.find(id); + if (it != externalIdMap.end()) { + const external_report_info &eri = it->second; if (eri.highlander != ext.highlander) { /* we have a problem */ ostringstream out; diff --git a/src/util/report_manager.h b/src/util/report_manager.h index f76aff22..3fcad330 100644 --- a/src/util/report_manager.h +++ b/src/util/report_manager.h @@ -38,6 +38,7 @@ #include "util/compile_error.h" #include "util/noncopyable.h" #include "util/report.h" +#include "util/ue2_containers.h" #include #include @@ -140,7 +141,7 @@ private: /** \brief Mapping from external match ids to information about that * id. */ - std::map externalIdMap; + unordered_map externalIdMap; /** \brief Mapping from expression index to exhaustion key. */ std::map toExhaustibleKeyMap; From 9258592d0be4c28753e2a5746abe63f28370a7ce Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 24 Apr 2017 14:58:20 +1000 Subject: [PATCH 274/326] report_manager: more use of unordered_map --- src/util/report.h | 24 +++++++++++++++++++++--- src/util/report_manager.cpp | 2 +- src/util/report_manager.h | 8 ++++---- 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/src/util/report.h b/src/util/report.h index 24ecca9d..a8e233ff 100644 --- a/src/util/report.h +++ b/src/util/report.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,9 +34,10 @@ #ifndef UTIL_REPORT_H #define UTIL_REPORT_H -#include "util/exhaust.h" // for INVALID_EKEY -#include "order_check.h" #include "ue2common.h" +#include "util/exhaust.h" // for INVALID_EKEY +#include "util/hash.h" +#include "util/order_check.h" #include @@ -195,6 +196,23 @@ bool operator<(const Report &a, const Report &b) { return false; } +inline +bool operator==(const Report &a, const Report &b) { + return a.type == b.type && a.quashSom == b.quashSom && + a.minOffset == b.minOffset && a.maxOffset == b.maxOffset && + a.minLength == b.minLength && a.ekey == b.ekey && + a.offsetAdjust == b.offsetAdjust && a.onmatch == b.onmatch && + a.revNfaIndex == b.revNfaIndex && a.somDistance == b.somDistance && + a.topSquashDistance == b.topSquashDistance; +} + +inline +size_t hash_value(const Report &r) { + return hash_all(r.type, r.quashSom, r.minOffset, r.maxOffset, r.minLength, + r.ekey, r.offsetAdjust, r.onmatch, r.revNfaIndex, + r.somDistance, r.topSquashDistance); +} + static inline Report makeECallback(u32 report, s32 offsetAdjust, u32 ekey) { Report ir(EXTERNAL_CALLBACK, report); diff --git a/src/util/report_manager.cpp b/src/util/report_manager.cpp index 6f6bd0e8..a846eb25 100644 --- a/src/util/report_manager.cpp +++ b/src/util/report_manager.cpp @@ -67,7 +67,7 @@ u32 ReportManager::getInternalId(const Report &ir) { u32 size = reportIds.size(); reportIds.push_back(ir); - reportIdToInternalMap[ir] = size; + reportIdToInternalMap.emplace(ir, size); DEBUG_PRINTF("new report %u\n", size); return size; } diff --git a/src/util/report_manager.h b/src/util/report_manager.h index 3fcad330..95e14a2c 100644 --- a/src/util/report_manager.h +++ b/src/util/report_manager.h @@ -130,14 +130,14 @@ private: std::vector reportIds; /** \brief Mapping from Report to ID (inverse of \ref reportIds - * vector). */ - std::map reportIdToInternalMap; + * vector). */ + unordered_map reportIdToInternalMap; /** \brief Mapping from ReportID to dedupe key. */ - std::map reportIdToDedupeKey; + unordered_map reportIdToDedupeKey; /** \brief Mapping from ReportID to Rose program offset in bytecode. */ - std::map reportIdToProgramOffset; + unordered_map reportIdToProgramOffset; /** \brief Mapping from external match ids to information about that * id. */ From 82838f5728ea4f2194bb6dee327c429bfdd8bdc5 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 26 Apr 2017 11:21:19 +1000 Subject: [PATCH 275/326] rose_build: move dedupe analysis into own file --- CMakeLists.txt | 1 + src/rose/rose_build_dedupe.cpp | 388 +++++++++++++++++++++++++++++++++ src/rose/rose_build_misc.cpp | 348 ----------------------------- 3 files changed, 389 insertions(+), 348 deletions(-) create mode 100644 src/rose/rose_build_dedupe.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index f03969ec..650bcf20 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -947,6 +947,7 @@ SET (hs_SRCS src/rose/rose_build_compile.cpp src/rose/rose_build_convert.cpp src/rose/rose_build_convert.h + src/rose/rose_build_dedupe.cpp src/rose/rose_build_engine_blob.h src/rose/rose_build_exclusive.cpp src/rose/rose_build_exclusive.h diff --git a/src/rose/rose_build_dedupe.cpp b/src/rose/rose_build_dedupe.cpp new file mode 100644 index 00000000..dbff7aa7 --- /dev/null +++ b/src/rose/rose_build_dedupe.cpp @@ -0,0 +1,388 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rose_build_impl.h" +#include "nfa/castlecompile.h" +#include "nfagraph/ng_repeat.h" +#include "util/compile_context.h" +#include "util/boundary_reports.h" +#include "util/make_unique.h" +#include "util/report_manager.h" + +using namespace std; + +namespace ue2 { + +static +bool requiresDedupe(const NGHolder &h, const ue2::flat_set &reports, + const Grey &grey) { + /* TODO: tighten */ + NFAVertex seen_vert = NGHolder::null_vertex(); + + for (auto v : inv_adjacent_vertices_range(h.accept, h)) { + if (has_intersection(h[v].reports, reports)) { + if (seen_vert != NGHolder::null_vertex()) { + return true; + } + seen_vert = v; + } + } + + for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) { + if (has_intersection(h[v].reports, reports)) { + if (seen_vert != NGHolder::null_vertex()) { + return true; + } + seen_vert = v; + } + } + + if (seen_vert) { + /* if the reporting vertex is part of of a terminal repeat, the + * construction process may reform the graph splitting it into two + * vertices (pos, cyclic) and hence require dedupe */ + vector repeats; + findRepeats(h, grey.minExtBoundedRepeatSize, &repeats); + for (const auto &repeat : repeats) { + if (find(repeat.vertices.begin(), repeat.vertices.end(), + seen_vert) != repeat.vertices.end()) { + return true; + } + } + } + + return false; +} + +class RoseDedupeAuxImpl : public RoseDedupeAux { +public: + explicit RoseDedupeAuxImpl(const RoseBuildImpl &build_in); + bool requiresDedupeSupport( + const ue2::flat_set &reports) const override; + +private: + bool hasSafeMultiReports(const ue2::flat_set &reports) const; + + const RoseBuildImpl &build; + map> vert_map; //!< ordinary literals + map> sb_vert_map; //!< small block literals + map> suffix_map; + map> outfix_map; + map> puff_map; + + unordered_set live_reports; //!< all live internal reports. +}; + +unique_ptr RoseBuildImpl::generateDedupeAux() const { + return ue2::make_unique(*this); +} + +RoseDedupeAux::~RoseDedupeAux() = default; + +RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &build_in) + : build(build_in) { + const RoseGraph &g = build.g; + + set suffixes; + + for (auto v : vertices_range(g)) { + insert(&live_reports, g[v].reports); + + // Literals in the small block table are "shadow" copies of literals in + // the other tables that do not run in the same runtime invocation. + // Dedupe key assignment will be taken care of by the real literals. + if (build.hasLiteralInTable(v, ROSE_ANCHORED_SMALL_BLOCK)) { + for (const auto &report_id : g[v].reports) { + sb_vert_map[report_id].insert(v); + } + } else { + for (const auto &report_id : g[v].reports) { + vert_map[report_id].insert(v); + } + } + + // Several vertices may share a suffix, so we collect the set of + // suffixes first to avoid repeating work. + if (g[v].suffix) { + suffixes.insert(g[v].suffix); + } + } + + for (const auto &suffix : suffixes) { + for (const auto &report_id : all_reports(suffix)) { + suffix_map[report_id].insert(suffix); + live_reports.insert(report_id); + } + } + + for (const auto &outfix : build.outfixes) { + for (const auto &report_id : all_reports(outfix)) { + outfix_map[report_id].insert(&outfix); + live_reports.insert(report_id); + } + } + + if (build.mpv_outfix) { + auto *mpv = build.mpv_outfix->mpv(); + for (const auto &puff : mpv->puffettes) { + puff_map[puff.report].insert(&puff); + live_reports.insert(puff.report); + } + for (const auto &puff : mpv->triggered_puffettes) { + puff_map[puff.report].insert(&puff); + live_reports.insert(puff.report); + } + } + + // Collect live reports from boundary reports. + insert(&live_reports, build.boundary.report_at_0); + insert(&live_reports, build.boundary.report_at_0_eod); + insert(&live_reports, build.boundary.report_at_eod); + + DEBUG_PRINTF("%zu of %zu reports are live\n", live_reports.size(), + build.rm.numReports()); +} + +static +vector makePath(const rose_literal_id &lit) { + vector path(begin(lit.s), end(lit.s)); + for (u32 i = 0; i < lit.delay; i++) { + path.push_back(CharReach::dot()); + } + return path; +} + +/** + * \brief True if one of the given literals overlaps with the suffix of + * another, meaning that they could arrive at the same offset. + */ +static +bool literalsCouldRace(const rose_literal_id &lit1, + const rose_literal_id &lit2) { + DEBUG_PRINTF("compare %s (delay %u) and %s (delay %u)\n", + dumpString(lit1.s).c_str(), lit1.delay, + dumpString(lit2.s).c_str(), lit2.delay); + + // Add dots on the end of each literal for delay. + const auto v1 = makePath(lit1); + const auto v2 = makePath(lit2); + + // See if the smaller path is a suffix of the larger path. + const auto *smaller = v1.size() < v2.size() ? &v1 : &v2; + const auto *bigger = v1.size() < v2.size() ? &v2 : &v1; + auto r = mismatch(smaller->rbegin(), smaller->rend(), bigger->rbegin(), + overlaps); + return r.first == smaller->rend(); +} + +bool RoseDedupeAuxImpl::hasSafeMultiReports( + const flat_set &reports) const { + if (reports.size() <= 1) { + return true; + } + + /* We have more than one ReportID corresponding to the external ID that is + * presented to the user. These may differ in offset adjustment, bounds + * checks, etc. */ + + /* TODO: work out if these differences will actually cause problems */ + + /* One common case where we know we don't have a problem is if there are + * precisely two reports, one for the main Rose path and one for the + * "small block matcher" path. */ + if (reports.size() == 2) { + ReportID id1 = *reports.begin(); + ReportID id2 = *reports.rbegin(); + + bool has_verts_1 = contains(vert_map, id1); + bool has_verts_2 = contains(vert_map, id2); + bool has_sb_verts_1 = contains(sb_vert_map, id1); + bool has_sb_verts_2 = contains(sb_vert_map, id2); + + if (has_verts_1 != has_verts_2 && has_sb_verts_1 != has_sb_verts_2) { + DEBUG_PRINTF("two reports, one full and one small block: ok\n"); + return true; + } + } + + DEBUG_PRINTF("more than one report\n"); + return false; +} + +bool RoseDedupeAuxImpl::requiresDedupeSupport( + const flat_set &reports_in) const { + /* TODO: this could be expanded to check for offset or character + constraints */ + + // We don't want to consider dead reports (tracked by ReportManager but no + // longer used) for the purposes of assigning dupe keys. + flat_set reports; + for (auto id : reports_in) { + if (contains(live_reports, id)) { + reports.insert(id); + } + } + + DEBUG_PRINTF("live reports: %s\n", as_string_list(reports).c_str()); + + const RoseGraph &g = build.g; + + bool has_suffix = false; + bool has_outfix = false; + + if (!hasSafeMultiReports(reports)) { + DEBUG_PRINTF("multiple reports not safe\n"); + return true; + } + + set roles; + set suffixes; + set outfixes; + set puffettes; + for (ReportID r : reports) { + if (contains(vert_map, r)) { + insert(&roles, vert_map.at(r)); + } + if (contains(suffix_map, r)) { + insert(&suffixes, suffix_map.at(r)); + } + + if (contains(outfix_map, r)) { + insert(&outfixes, outfix_map.at(r)); + } + + if (contains(puff_map, r)) { + insert(&puffettes, puff_map.at(r)); + } + } + + /* roles */ + + map lits; // Literal ID -> count of occurrences. + + const bool has_role = !roles.empty(); + for (auto v : roles) { + for (const auto &lit : g[v].literals) { + lits[lit]++; + } + if (g[v].eod_accept) { + // Literals plugged into this EOD accept must be taken into account + // as well. + for (auto u : inv_adjacent_vertices_range(v, g)) { + for (const auto &lit : g[u].literals) { + lits[lit]++; + } + } + } + } + + /* literals */ + + for (const auto &m : lits) { + if (m.second > 1) { + DEBUG_PRINTF("lit %u used by >1 reporting roles\n", m.first); + return true; + } + } + + for (auto it = begin(lits); it != end(lits); ++it) { + const auto &lit1 = build.literals.right.at(it->first); + for (auto jt = next(it); jt != end(lits); ++jt) { + const auto &lit2 = build.literals.right.at(jt->first); + if (literalsCouldRace(lit1, lit2)) { + DEBUG_PRINTF("literals could race\n"); + return true; + } + } + } + + /* suffixes */ + + for (const auto &suffix : suffixes) { + if (has_suffix || has_role) { + return true; /* scope for badness */ + } + + has_suffix = true; + + /* some lesser suffix engines (nfas, haig, castle) can raise multiple + * matches for a report id at the same offset if there are multiple + * report states live. */ + if (suffix.haig()) { + return true; + } + if (suffix.graph() && + requiresDedupe(*suffix.graph(), reports, build.cc.grey)) { + return true; + } + if (suffix.castle() && requiresDedupe(*suffix.castle(), reports)) { + return true; + } + } + + /* outfixes */ + + for (const auto &outfix_ptr : outfixes) { + assert(outfix_ptr); + const OutfixInfo &out = *outfix_ptr; + + if (has_outfix || has_role || has_suffix) { + return true; + } + has_outfix = true; + + if (out.haig()) { + return true; /* haig may report matches with different SOM at the + same offset */ + } + + if (out.holder() && + requiresDedupe(*out.holder(), reports, build.cc.grey)) { + return true; + } + } + + /* mpv */ + for (UNUSED const auto &puff : puffettes) { + if (has_outfix || has_role || has_suffix) { + return true; + } + has_outfix = true; + } + + /* boundary */ + if (has_intersection(build.boundary.report_at_eod, reports)) { + if (has_outfix || has_role || has_suffix) { + return true; + } + } + + return false; +} + +} // namespace ue2 diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 1e353a58..44044cb9 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -529,354 +529,6 @@ u32 RoseBuildImpl::getNewLiteralId() { return id; } -static -bool requiresDedupe(const NGHolder &h, const ue2::flat_set &reports, - const Grey &grey) { - /* TODO: tighten */ - NFAVertex seen_vert = NGHolder::null_vertex(); - - for (auto v : inv_adjacent_vertices_range(h.accept, h)) { - if (has_intersection(h[v].reports, reports)) { - if (seen_vert != NGHolder::null_vertex()) { - return true; - } - seen_vert = v; - } - } - - for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) { - if (has_intersection(h[v].reports, reports)) { - if (seen_vert != NGHolder::null_vertex()) { - return true; - } - seen_vert = v; - } - } - - if (seen_vert) { - /* if the reporting vertex is part of of a terminal repeat, the - * construction process may reform the graph splitting it into two - * vertices (pos, cyclic) and hence require dedupe */ - vector repeats; - findRepeats(h, grey.minExtBoundedRepeatSize, &repeats); - for (const auto &repeat : repeats) { - if (find(repeat.vertices.begin(), repeat.vertices.end(), - seen_vert) != repeat.vertices.end()) { - return true; - } - } - } - - return false; -} - -class RoseDedupeAuxImpl : public RoseDedupeAux { -public: - explicit RoseDedupeAuxImpl(const RoseBuildImpl &tbi_in); - bool requiresDedupeSupport( - const ue2::flat_set &reports) const override; - -private: - bool hasSafeMultiReports(const ue2::flat_set &reports) const; - - const RoseBuildImpl &tbi; - map> vert_map; //!< ordinary literals - map> sb_vert_map; //!< small block literals - map> suffix_map; - map> outfix_map; - map> puff_map; - - unordered_set live_reports; //!< all live internal reports. -}; - -unique_ptr RoseBuildImpl::generateDedupeAux() const { - return ue2::make_unique(*this); -} - -RoseDedupeAux::~RoseDedupeAux() { -} - -RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &tbi_in) - : tbi(tbi_in) { - const RoseGraph &g = tbi.g; - - set suffixes; - - for (auto v : vertices_range(g)) { - insert(&live_reports, g[v].reports); - - // Literals in the small block table are "shadow" copies of literals in - // the other tables that do not run in the same runtime invocation. - // Dedupe key assignment will be taken care of by the real literals. - if (tbi.hasLiteralInTable(v, ROSE_ANCHORED_SMALL_BLOCK)) { - for (const auto &report_id : g[v].reports) { - sb_vert_map[report_id].insert(v); - } - } else { - for (const auto &report_id : g[v].reports) { - vert_map[report_id].insert(v); - } - } - - // Several vertices may share a suffix, so we collect the set of - // suffixes first to avoid repeating work. - if (g[v].suffix) { - suffixes.insert(g[v].suffix); - } - } - - for (const auto &suffix : suffixes) { - for (const auto &report_id : all_reports(suffix)) { - suffix_map[report_id].insert(suffix); - live_reports.insert(report_id); - } - } - - for (const auto &outfix : tbi.outfixes) { - for (const auto &report_id : all_reports(outfix)) { - outfix_map[report_id].insert(&outfix); - live_reports.insert(report_id); - } - } - - if (tbi.mpv_outfix) { - auto *mpv = tbi.mpv_outfix->mpv(); - for (const auto &puff : mpv->puffettes) { - puff_map[puff.report].insert(&puff); - live_reports.insert(puff.report); - } - for (const auto &puff : mpv->triggered_puffettes) { - puff_map[puff.report].insert(&puff); - live_reports.insert(puff.report); - } - } - - // Collect live reports from boundary reports. - insert(&live_reports, tbi.boundary.report_at_0); - insert(&live_reports, tbi.boundary.report_at_0_eod); - insert(&live_reports, tbi.boundary.report_at_eod); - - DEBUG_PRINTF("%zu of %zu reports are live\n", live_reports.size(), - tbi.rm.numReports()); -} - -static -vector makePath(const rose_literal_id &lit) { - vector path(begin(lit.s), end(lit.s)); - for (u32 i = 0; i < lit.delay; i++) { - path.push_back(CharReach::dot()); - } - return path; -} - -/** - * \brief True if one of the given literals overlaps with the suffix of - * another, meaning that they could arrive at the same offset. - */ -static -bool literalsCouldRace(const rose_literal_id &lit1, - const rose_literal_id &lit2) { - DEBUG_PRINTF("compare %s (delay %u) and %s (delay %u)\n", - dumpString(lit1.s).c_str(), lit1.delay, - dumpString(lit2.s).c_str(), lit2.delay); - - // Add dots on the end of each literal for delay. - const auto v1 = makePath(lit1); - const auto v2 = makePath(lit2); - - // See if the smaller path is a suffix of the larger path. - const auto *smaller = v1.size() < v2.size() ? &v1 : &v2; - const auto *bigger = v1.size() < v2.size() ? &v2 : &v1; - auto r = mismatch(smaller->rbegin(), smaller->rend(), bigger->rbegin(), - overlaps); - return r.first == smaller->rend(); -} - -bool RoseDedupeAuxImpl::hasSafeMultiReports( - const flat_set &reports) const { - if (reports.size() <= 1) { - return true; - } - - /* We have more than one ReportID corresponding to the external ID that is - * presented to the user. These may differ in offset adjustment, bounds - * checks, etc. */ - - /* TODO: work out if these differences will actually cause problems */ - - /* One common case where we know we don't have a problem is if there are - * precisely two reports, one for the main Rose path and one for the - * "small block matcher" path. */ - if (reports.size() == 2) { - ReportID id1 = *reports.begin(); - ReportID id2 = *reports.rbegin(); - - bool has_verts_1 = contains(vert_map, id1); - bool has_verts_2 = contains(vert_map, id2); - bool has_sb_verts_1 = contains(sb_vert_map, id1); - bool has_sb_verts_2 = contains(sb_vert_map, id2); - - if (has_verts_1 != has_verts_2 && has_sb_verts_1 != has_sb_verts_2) { - DEBUG_PRINTF("two reports, one full and one small block: ok\n"); - return true; - } - } - - DEBUG_PRINTF("more than one report\n"); - return false; -} - -bool RoseDedupeAuxImpl::requiresDedupeSupport( - const flat_set &reports_in) const { - /* TODO: this could be expanded to check for offset or character - constraints */ - - // We don't want to consider dead reports (tracked by ReportManager but no - // longer used) for the purposes of assigning dupe keys. - flat_set reports; - for (auto id : reports_in) { - if (contains(live_reports, id)) { - reports.insert(id); - } - } - - DEBUG_PRINTF("live reports: %s\n", as_string_list(reports).c_str()); - - const RoseGraph &g = tbi.g; - - bool has_suffix = false; - bool has_outfix = false; - - if (!hasSafeMultiReports(reports)) { - DEBUG_PRINTF("multiple reports not safe\n"); - return true; - } - - set roles; - set suffixes; - set outfixes; - set puffettes; - for (ReportID r : reports) { - if (contains(vert_map, r)) { - insert(&roles, vert_map.at(r)); - } - if (contains(suffix_map, r)) { - insert(&suffixes, suffix_map.at(r)); - } - - if (contains(outfix_map, r)) { - insert(&outfixes, outfix_map.at(r)); - } - - if (contains(puff_map, r)) { - insert(&puffettes, puff_map.at(r)); - } - } - - /* roles */ - - map lits; // Literal ID -> count of occurrences. - - const bool has_role = !roles.empty(); - for (auto v : roles) { - for (const auto &lit : g[v].literals) { - lits[lit]++; - } - if (g[v].eod_accept) { - // Literals plugged into this EOD accept must be taken into account - // as well. - for (auto u : inv_adjacent_vertices_range(v, g)) { - for (const auto &lit : g[u].literals) { - lits[lit]++; - } - } - } - } - - /* literals */ - - for (const auto &m : lits) { - if (m.second > 1) { - DEBUG_PRINTF("lit %u used by >1 reporting roles\n", m.first); - return true; - } - } - - for (auto it = begin(lits); it != end(lits); ++it) { - const auto &lit1 = tbi.literals.right.at(it->first); - for (auto jt = next(it); jt != end(lits); ++jt) { - const auto &lit2 = tbi.literals.right.at(jt->first); - if (literalsCouldRace(lit1, lit2)) { - DEBUG_PRINTF("literals could race\n"); - return true; - } - } - } - - /* suffixes */ - - for (const auto &suffix : suffixes) { - if (has_suffix || has_role) { - return true; /* scope for badness */ - } - - has_suffix = true; - - /* some lesser suffix engines (nfas, haig, castle) can raise multiple - * matches for a report id at the same offset if there are multiple - * report states live. */ - if (suffix.haig()) { - return true; - } - if (suffix.graph() && - requiresDedupe(*suffix.graph(), reports, tbi.cc.grey)) { - return true; - } - if (suffix.castle() && requiresDedupe(*suffix.castle(), reports)) { - return true; - } - } - - /* outfixes */ - - for (const auto &outfix_ptr : outfixes) { - assert(outfix_ptr); - const OutfixInfo &out = *outfix_ptr; - - if (has_outfix || has_role || has_suffix) { - return true; - } - has_outfix = true; - - if (out.haig()) { - return true; /* haig may report matches with different SOM at the - same offset */ - } - - if (out.holder() && - requiresDedupe(*out.holder(), reports, tbi.cc.grey)) { - return true; - } - } - - /* mpv */ - for (UNUSED const auto &puff : puffettes) { - if (has_outfix || has_role || has_suffix) { - return true; - } - has_outfix = true; - } - - /* boundary */ - if (has_intersection(tbi.boundary.report_at_eod, reports)) { - if (has_outfix || has_role || has_suffix) { - return true; - } - } - - return false; -} - bool operator<(const RoseEdgeProps &a, const RoseEdgeProps &b) { ORDER_CHECK(minBound); ORDER_CHECK(maxBound); From bb29aeb2986797688687f05e11e16d623ccfa00b Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 26 Apr 2017 13:45:31 +1000 Subject: [PATCH 276/326] rose: shift program construction functions to rose_build_program --- CMakeLists.txt | 1 + src/rose/rose_build_bytecode.cpp | 2549 ++---------------------------- src/rose/rose_build_impl.h | 5 + src/rose/rose_build_misc.cpp | 53 + src/rose/rose_build_program.cpp | 2150 +++++++++++++++++++++++++ src/rose/rose_build_program.h | 160 +- src/rose/rose_build_resources.h | 57 + 7 files changed, 2567 insertions(+), 2408 deletions(-) create mode 100644 src/rose/rose_build_resources.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 650bcf20..bc42c659 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -971,6 +971,7 @@ SET (hs_SRCS src/rose/rose_build_misc.cpp src/rose/rose_build_program.cpp src/rose/rose_build_program.h + src/rose/rose_build_resources.h src/rose/rose_build_role_aliasing.cpp src/rose/rose_build_scatter.cpp src/rose/rose_build_scatter.h diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 94927558..636af0a6 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -38,11 +38,11 @@ #include "rose_build_exclusive.h" #include "rose_build_groups.h" #include "rose_build_infix.h" -#include "rose_build_instructions.h" #include "rose_build_long_lit.h" #include "rose_build_lookaround.h" #include "rose_build_matchers.h" #include "rose_build_program.h" +#include "rose_build_resources.h" #include "rose_build_scatter.h" #include "rose_build_util.h" #include "rose_build_width.h" @@ -82,7 +82,6 @@ #include "util/compile_context.h" #include "util/compile_error.h" #include "util/container.h" -#include "util/dump_charclass.h" #include "util/fatbit_build.h" #include "util/graph_range.h" #include "util/make_unique.h" @@ -133,56 +132,6 @@ namespace ue2 { namespace /* anon */ { -static constexpr u32 INVALID_QUEUE = ~0U; - -struct left_build_info { - // Constructor for an engine implementation. - left_build_info(u32 q, u32 l, u32 t, rose_group sm, - const std::vector &stops, u32 max_ql, u8 cm_count, - const CharReach &cm_cr) - : queue(q), lag(l), transient(t), squash_mask(sm), stopAlphabet(stops), - max_queuelen(max_ql), countingMiracleCount(cm_count), - countingMiracleReach(cm_cr) {} - - // Constructor for a lookaround implementation. - explicit left_build_info(const vector> &looks) - : has_lookaround(true), lookaround(looks) {} - - u32 queue = INVALID_QUEUE; /* uniquely idents the left_build_info */ - u32 lag = 0; - u32 transient = 0; - rose_group squash_mask = ~rose_group{0}; - vector stopAlphabet; - u32 max_queuelen = 0; - u8 countingMiracleCount = 0; - CharReach countingMiracleReach; - u32 countingMiracleOffset = 0; /* populated later when laying out bytecode */ - /* leftfix can be completely implemented with lookaround */ - bool has_lookaround = false; - vector> lookaround; // alternative implementation to the NFA -}; - -/** - * \brief Structure tracking which resources are used by this Rose instance at - * runtime. - * - * We use this to control how much initialisation we need to do at the - * beginning of a stream/block at runtime. - */ -struct RoseResources { - bool has_outfixes = false; - bool has_suffixes = false; - bool has_leftfixes = false; - bool has_literals = false; - bool has_states = false; - bool checks_groups = false; - bool has_lit_delay = false; - bool has_lit_check = false; // long literal support - bool has_anchored = false; - bool has_floating = false; - bool has_eod = false; -}; - struct build_context : noncopyable { /** \brief information about engines to the left of a vertex */ map leftfix_info; @@ -190,27 +139,15 @@ struct build_context : noncopyable { /** \brief mapping from suffix to queue index. */ map suffixes; + /** \brief engine info by queue. */ + map engine_info_by_queue; + /** \brief Simple cache of programs written to engine blob, used for * deduplication. */ ue2::unordered_map program_cache; - /** \brief LookEntry list cache, so that we can reuse the look index and - * reach index for the same lookaround. */ - ue2::unordered_map>, - pair> lookaround_cache; - - /** \brief Lookaround table for Rose roles. */ - vector>> lookaround; - - /** \brief Lookaround look table size. */ - size_t lookTableSize = 0; - - /** \brief Lookaround reach table size. - * since single path lookaround and multi-path lookaround have different - * bitvectors range (32 and 256), we need to maintain both look table size - * and reach table size. */ - size_t reachTableSize = 0; + lookaround_info lookarounds; /** \brief State indices, for those roles that have them. * Each vertex present has a unique state index in the range @@ -225,17 +162,10 @@ struct build_context : noncopyable { * that need hash table support. */ vector longLiterals; - /** \brief Long literal length threshold, used in streaming mode. */ - size_t longLitLengthThreshold = 0; - /** \brief Contents of the Rose bytecode immediately following the * RoseEngine. */ RoseEngineBlob engine_blob; - /** \brief True if reports need CATCH_UP instructions to catch up suffixes, - * outfixes etc. */ - bool needs_catchup; - /** \brief True if this Rose engine has an MPV engine. */ bool needs_mpv_catchup = false; @@ -243,34 +173,6 @@ struct build_context : noncopyable { RoseResources resources; }; -/** \brief Data only used during construction of various programs (literal, - * anchored, delay, etc). */ -struct ProgramBuild : noncopyable { - explicit ProgramBuild(u32 fMinLitOffset) - : floatingMinLiteralMatchOffset(fMinLitOffset) { - } - - /** \brief Minimum offset of a match from the floating table. */ - const u32 floatingMinLiteralMatchOffset; - - /** \brief Mapping from vertex to key, for vertices with a - * CHECK_NOT_HANDLED instruction. */ - ue2::unordered_map handledKeys; - - /** \brief Mapping from Rose literal ID to anchored program index. */ - map anchored_programs; - - /** \brief Mapping from Rose literal ID to delayed program index. */ - map delay_programs; - - /** \brief Mapping from every vertex to the groups that must be on for that - * vertex to be reached. */ - ue2::unordered_map vertex_group_map; - - /** \brief Global bitmap of groups that can be squashed. */ - rose_group squashable_groups = 0; -}; - /** \brief subengine info including built engine and * corresponding triggering rose vertices */ struct ExclusiveSubengine { @@ -291,18 +193,7 @@ struct ExclusiveInfo : noncopyable { } static -const NFA *get_nfa_from_blob(const build_context &bc, u32 qi) { - assert(contains(bc.engineOffsets, qi)); - u32 nfa_offset = bc.engineOffsets.at(qi); - assert(nfa_offset >= bc.engine_blob.base_offset); - const NFA *n = (const NFA *)(bc.engine_blob.data() + nfa_offset - - bc.engine_blob.base_offset); - assert(n->queueIndex == qi); - return n; -} - -static -const NFA *add_nfa_to_blob(build_context &bc, NFA &nfa) { +void add_nfa_to_blob(build_context &bc, NFA &nfa) { u32 qi = nfa.queueIndex; u32 nfa_offset = bc.engine_blob.add(nfa, nfa.length); DEBUG_PRINTF("added nfa qi=%u, type=%u, length=%u at offset=%u\n", qi, @@ -310,10 +201,6 @@ const NFA *add_nfa_to_blob(build_context &bc, NFA &nfa) { assert(!contains(bc.engineOffsets, qi)); bc.engineOffsets.emplace(qi, nfa_offset); - - const NFA *n = get_nfa_from_blob(bc, qi); - assert(memcmp(&nfa, n, nfa.length) == 0); - return n; } static @@ -401,8 +288,8 @@ bool isPureFloating(const RoseResources &resources, const CompileContext &cc) { } if (cc.streaming && resources.has_lit_check) { - DEBUG_PRINTF("has long literals in streaming mode, which needs " - "long literal table support\n"); + DEBUG_PRINTF("has long literals in streaming mode, which needs long " + "literal table support\n"); return false; } @@ -719,8 +606,7 @@ buildRepeatEngine(const CastleProto &proto, static bytecode_ptr getDfa(raw_dfa &rdfa, bool is_transient, - const CompileContext &cc, - const ReportManager &rm) { + const CompileContext &cc, const ReportManager &rm) { // Unleash the Sheng!! auto dfa = shengCompile(rdfa, cc, rm, false); if (!dfa && !is_transient) { @@ -1155,6 +1041,31 @@ left_id updateLeftfixWithEager(RoseGraph &g, const eager_info &ei, return leftfix; } +static +void enforceEngineSizeLimit(const NFA *n, const Grey &grey) { + const size_t nfa_size = n->length; + // Global limit. + if (nfa_size > grey.limitEngineSize) { + throw ResourceLimitError(); + } + + // Type-specific limit checks follow. + + if (isDfaType(n->type)) { + if (nfa_size > grey.limitDFASize) { + throw ResourceLimitError(); + } + } else if (isNfaType(n->type)) { + if (nfa_size > grey.limitNFASize) { + throw ResourceLimitError(); + } + } else if (isLbrType(n->type)) { + if (nfa_size > grey.limitLBRSize) { + throw ResourceLimitError(); + } + } +} + static bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi, const map > &infixTriggers, @@ -1193,6 +1104,9 @@ bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi, setLeftNfaProperties(*nfa, leftfix); nfa->queueIndex = qi; + enforceEngineSizeLimit(nfa.get(), cc.grey); + bc.engine_info_by_queue.emplace(nfa->queueIndex, + engine_info(nfa.get(), is_transient)); if (!prefix && !leftfix.haig() && leftfix.graph() && nfaStuckOn(*leftfix.graph())) { @@ -1290,12 +1204,10 @@ void updateTops(const RoseGraph &g, const TamaInfo &tamaInfo, for (const auto &n : tamaInfo.subengines) { for (const auto &v : subengines[i].vertices) { if (is_suffix) { - tamaProto.add(n, g[v].index, g[v].suffix.top, - out_top_remap); + tamaProto.add(n, g[v].index, g[v].suffix.top, out_top_remap); } else { for (const auto &e : in_edges_range(v, g)) { - tamaProto.add(n, g[v].index, g[e].rose_top, - out_top_remap); + tamaProto.add(n, g[v].index, g[e].rose_top, out_top_remap); } } } @@ -1308,32 +1220,34 @@ shared_ptr constructContainerEngine(const RoseGraph &g, build_context &bc, const ExclusiveInfo &info, const u32 queue, - const bool is_suffix) { + const bool is_suffix, + const Grey &grey) { const auto &subengines = info.subengines; - auto tamaInfo = - constructTamaInfo(g, subengines, is_suffix); + auto tamaInfo = constructTamaInfo(g, subengines, is_suffix); map, u32> out_top_remap; auto n = buildTamarama(*tamaInfo, queue, out_top_remap); + enforceEngineSizeLimit(n.get(), grey); + bc.engine_info_by_queue.emplace(n->queueIndex, engine_info(n.get(), false)); add_nfa_to_blob(bc, *n); DEBUG_PRINTF("queue id:%u\n", queue); shared_ptr tamaProto = make_shared(); tamaProto->reports = info.reports; - updateTops(g, *tamaInfo, *tamaProto, subengines, - out_top_remap, is_suffix); + updateTops(g, *tamaInfo, *tamaProto, subengines, out_top_remap, is_suffix); return tamaProto; } static void buildInfixContainer(RoseGraph &g, build_context &bc, - const vector &exclusive_info) { + const vector &exclusive_info, + const Grey &grey) { // Build tamarama engine for (const auto &info : exclusive_info) { const u32 queue = info.queue; const auto &subengines = info.subengines; auto tamaProto = - constructContainerEngine(g, bc, info, queue, false); + constructContainerEngine(g, bc, info, queue, false, grey); for (const auto &sub : subengines) { const auto &verts = sub.vertices; @@ -1347,13 +1261,14 @@ void buildInfixContainer(RoseGraph &g, build_context &bc, static void buildSuffixContainer(RoseGraph &g, build_context &bc, - const vector &exclusive_info) { + const vector &exclusive_info, + const Grey &grey) { // Build tamarama engine for (const auto &info : exclusive_info) { const u32 queue = info.queue; const auto &subengines = info.subengines; - auto tamaProto = - constructContainerEngine(g, bc, info, queue, true); + auto tamaProto = constructContainerEngine(g, bc, info, queue, true, + grey); for (const auto &sub : subengines) { const auto &verts = sub.vertices; for (const auto &v : verts) { @@ -1488,7 +1403,7 @@ void buildExclusiveInfixes(RoseBuildImpl &build, build_context &bc, } updateExclusiveInfixProperties(build, exclusive_info, bc.leftfix_info, no_retrigger_queues); - buildInfixContainer(g, bc, exclusive_info); + buildInfixContainer(g, bc, exclusive_info, build.cc.grey); } static @@ -1560,8 +1475,7 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc, findInfixTriggers(tbi, &infixTriggers); if (cc.grey.allowTamarama && cc.streaming && !do_prefix) { - findExclusiveInfixes(tbi, bc, qif, infixTriggers, - no_retrigger_queues); + findExclusiveInfixes(tbi, bc, qif, infixTriggers, no_retrigger_queues); } for (auto v : vertices_range(g)) { @@ -1769,6 +1683,9 @@ void prepMpv(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired, u32 qi = mpv_outfix->get_queue(tbi.qif); nfa->queueIndex = qi; + enforceEngineSizeLimit(nfa.get(), tbi.cc.grey); + bc.engine_info_by_queue.emplace(nfa->queueIndex, + engine_info(nfa.get(), false)); DEBUG_PRINTF("built mpv\n"); @@ -1827,6 +1744,9 @@ bool prepOutfixes(RoseBuildImpl &tbi, build_context &bc, setOutfixProperties(*n, out); n->queueIndex = out.get_queue(tbi.qif); + enforceEngineSizeLimit(n.get(), tbi.cc.grey); + bc.engine_info_by_queue.emplace(n->queueIndex, + engine_info(n.get(), false)); if (!*historyRequired && requires_decompress_key(*n)) { *historyRequired = 1; @@ -1924,14 +1844,14 @@ void buildExclusiveSuffixes(RoseBuildImpl &build, build_context &bc, } updateExclusiveSuffixProperties(build, exclusive_info, no_retrigger_queues); - buildSuffixContainer(g, bc, exclusive_info); + buildSuffixContainer(g, bc, exclusive_info, build.cc.grey); } static void findExclusiveSuffixes(RoseBuildImpl &tbi, build_context &bc, - QueueIndexFactory &qif, - map> &suffixTriggers, - set *no_retrigger_queues) { + QueueIndexFactory &qif, + map> &suffixTriggers, + set *no_retrigger_queues) { const RoseGraph &g = tbi.g; map suffixes; @@ -2021,6 +1941,10 @@ bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc, setSuffixProperties(*n, s, tbi.rm); n->queueIndex = queue; + enforceEngineSizeLimit(n.get(), tbi.cc.grey); + bc.engine_info_by_queue.emplace(n->queueIndex, + engine_info(n.get(), false)); + if (s.graph() && nfaStuckOn(*s.graph())) { /* todo: have corresponding * haig analysis */ assert(!s.haig()); @@ -2114,44 +2038,28 @@ bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif, } static -void allocateStateSpace(const NFA *nfa, NfaInfo &nfa_info, bool is_transient, +void allocateStateSpace(const engine_info &eng_info, NfaInfo &nfa_info, RoseStateOffsets *so, u32 *scratchStateSize, u32 *streamStateSize, u32 *transientStateSize) { u32 state_offset; - if (is_transient) { + if (eng_info.transient) { // Transient engines do not use stream state, but must have room in // transient state (stored in scratch). state_offset = *transientStateSize; - *transientStateSize += nfa->streamStateSize; + *transientStateSize += eng_info.stream_size; } else { // Pack NFA stream state on to the end of the Rose stream state. state_offset = so->end; - so->end += nfa->streamStateSize; - *streamStateSize += nfa->streamStateSize; + so->end += eng_info.stream_size; + *streamStateSize += eng_info.stream_size; } nfa_info.stateOffset = state_offset; // Uncompressed state in scratch must be aligned. - u32 alignReq = state_alignment(*nfa); - assert(alignReq); - *scratchStateSize = ROUNDUP_N(*scratchStateSize, alignReq); + *scratchStateSize = ROUNDUP_N(*scratchStateSize, eng_info.scratch_align); nfa_info.fullStateOffset = *scratchStateSize; - *scratchStateSize += nfa->scratchStateSize; -} - -static -set -findTransientQueues(const map &leftfix_info) { - DEBUG_PRINTF("curating transient queues\n"); - set out; - for (const auto &left : leftfix_info | map_values) { - if (left.transient) { - DEBUG_PRINTF("q %u is transient\n", left.queue); - out.insert(left.queue); - } - } - return out; + *scratchStateSize += eng_info.scratch_size; } static @@ -2159,7 +2067,6 @@ void updateNfaState(const build_context &bc, vector &nfa_infos, RoseStateOffsets *so, u32 *scratchStateSize, u32 *streamStateSize, u32 *transientStateSize) { if (nfa_infos.empty()) { - assert(bc.engineOffsets.empty()); return; } @@ -2167,14 +2074,10 @@ void updateNfaState(const build_context &bc, vector &nfa_infos, *transientStateSize = 0; *scratchStateSize = 0; - auto transient_queues = findTransientQueues(bc.leftfix_info); - - for (const auto &m : bc.engineOffsets) { - const NFA *nfa = get_nfa_from_blob(bc, m.first); - u32 qi = nfa->queueIndex; - bool is_transient = contains(transient_queues, qi); + for (u32 qi = 0; qi < nfa_infos.size(); qi++) { NfaInfo &nfa_info = nfa_infos[qi]; - allocateStateSpace(nfa, nfa_info, is_transient, so, scratchStateSize, + const auto &eng_info = bc.engine_info_by_queue.at(qi); + allocateStateSpace(eng_info, nfa_info, so, scratchStateSize, streamStateSize, transientStateSize); } } @@ -2267,30 +2170,6 @@ u32 buildLastByteIter(const RoseGraph &g, build_context &bc) { return bc.engine_blob.add_iterator(iter); } -static -void enforceEngineSizeLimit(const NFA *n, const size_t nfa_size, const Grey &grey) { - // Global limit. - if (nfa_size > grey.limitEngineSize) { - throw ResourceLimitError(); - } - - // Type-specific limit checks follow. - - if (isDfaType(n->type)) { - if (nfa_size > grey.limitDFASize) { - throw ResourceLimitError(); - } - } else if (isNfaType(n->type)) { - if (nfa_size > grey.limitNFASize) { - throw ResourceLimitError(); - } - } else if (isLbrType(n->type)) { - if (nfa_size > grey.limitLBRSize) { - throw ResourceLimitError(); - } - } -} - static u32 findMinFloatingLiteralMatch(const RoseBuildImpl &build, const vector &anchored_dfas) { @@ -2363,8 +2242,8 @@ static u32 buildEodNfaIterator(build_context &bc, const u32 activeQueueCount) { vector keys; for (u32 qi = 0; qi < activeQueueCount; ++qi) { - const NFA *n = get_nfa_from_blob(bc, qi); - if (nfaAcceptsEod(n)) { + const auto &eng_info = bc.engine_info_by_queue.at(qi); + if (eng_info.accepts_eod) { DEBUG_PRINTF("nfa qi=%u accepts eod\n", qi); keys.push_back(qi); } @@ -2451,61 +2330,7 @@ void addSomRevNfas(build_context &bc, RoseEngine &proto, } static -void applyFinalSpecialisation(RoseProgram &program) { - assert(!program.empty()); - assert(program.back().code() == ROSE_INSTR_END); - if (program.size() < 2) { - return; - } - - /* Replace the second-to-last instruction (before END) with a one-shot - * specialisation if available. */ - auto it = next(program.rbegin()); - if (auto *ri = dynamic_cast(it->get())) { - DEBUG_PRINTF("replacing REPORT with FINAL_REPORT\n"); - program.replace(it, make_unique( - ri->onmatch, ri->offset_adjust)); - } -} - -static -void recordResources(RoseResources &resources, const RoseProgram &program) { - for (const auto &ri : program) { - switch (ri->code()) { - case ROSE_INSTR_TRIGGER_SUFFIX: - resources.has_suffixes = true; - break; - case ROSE_INSTR_TRIGGER_INFIX: - case ROSE_INSTR_CHECK_INFIX: - case ROSE_INSTR_CHECK_PREFIX: - case ROSE_INSTR_SOM_LEFTFIX: - resources.has_leftfixes = true; - break; - case ROSE_INSTR_SET_STATE: - case ROSE_INSTR_CHECK_STATE: - case ROSE_INSTR_SPARSE_ITER_BEGIN: - case ROSE_INSTR_SPARSE_ITER_NEXT: - resources.has_states = true; - break; - case ROSE_INSTR_CHECK_GROUPS: - resources.checks_groups = true; - break; - case ROSE_INSTR_PUSH_DELAYED: - resources.has_lit_delay = true; - break; - case ROSE_INSTR_CHECK_LONG_LIT: - case ROSE_INSTR_CHECK_LONG_LIT_NOCASE: - resources.has_lit_check = true; - break; - default: - break; - } - } -} - -static -void recordResources(RoseResources &resources, - const RoseBuildImpl &build, +void recordResources(RoseResources &resources, const RoseBuildImpl &build, const vector &fragments) { if (!build.outfixes.empty()) { resources.has_outfixes = true; @@ -2526,26 +2351,6 @@ void recordResources(RoseResources &resources, } } -static -void recordLongLiterals(vector &longLiterals, - const RoseProgram &program) { - for (const auto &ri : program) { - if (const auto *ri_check = - dynamic_cast(ri.get())) { - DEBUG_PRINTF("found CHECK_LONG_LIT for string '%s'\n", - escapeString(ri_check->literal).c_str()); - longLiterals.emplace_back(ri_check->literal, false); - continue; - } - if (const auto *ri_check = - dynamic_cast(ri.get())) { - DEBUG_PRINTF("found CHECK_LONG_LIT_NOCASE for string '%s'\n", - escapeString(ri_check->literal).c_str()); - longLiterals.emplace_back(ri_check->literal, true); - } - } -} - static u32 writeProgram(build_context &bc, RoseProgram &&program) { if (program.empty()) { @@ -2593,41 +2398,12 @@ u32 writeActiveLeftIter(RoseEngineBlob &engine_blob, return engine_blob.add_iterator(iter); } -static -bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e) { - const auto &g = build.g; - const auto v = target(e, g); - - if (!build.g[v].eod_accept) { - return false; - } - - // If there's a graph between us and EOD, we shouldn't be eager. - if (build.g[v].left) { - return false; - } - - // Must be exactly at EOD. - if (g[e].minBound != 0 || g[e].maxBound != 0) { - return false; - } - - // In streaming mode, we can only eagerly report EOD for literals in the - // EOD-anchored table, as that's the only time we actually know where EOD - // is. In block mode, we always have this information. - const auto u = source(e, g); - if (build.cc.streaming && !build.isInETable(u)) { - return false; - } - - return true; -} - static bool hasEodAnchors(const RoseBuildImpl &build, const build_context &bc, u32 outfixEndQueue) { for (u32 i = 0; i < outfixEndQueue; i++) { - if (nfaAcceptsEod(get_nfa_from_blob(bc, i))) { + const auto &eng_info = bc.engine_info_by_queue.at(i); + if (eng_info.accepts_eod) { DEBUG_PRINTF("outfix has eod\n"); return true; } @@ -2699,21 +2475,22 @@ void writeMultipathLookaround(const vector> &multi_look, } static -void writeLookaroundTables(build_context &bc, RoseEngine &proto) { - vector look_table(bc.lookTableSize, 0); - vector reach_table(bc.reachTableSize, 0); +void writeLookaroundTables(const lookaround_info &lookarounds, + RoseEngineBlob &engine_blob, RoseEngine &proto) { + vector look_table(lookarounds.lookTableSize, 0); + vector reach_table(lookarounds.reachTableSize, 0); s8 *look = look_table.data(); u8 *reach = reach_table.data(); - for (const auto &l : bc.lookaround) { - if (l.size() == 1) { - writeLookaround(l.front(), look, reach); + for (const auto &la : lookarounds.table) { + if (la.size() == 1) { + writeLookaround(la.front(), look, reach); } else { - writeMultipathLookaround(l, look, reach); + writeMultipathLookaround(la, look, reach); } } - proto.lookaroundTableOffset = bc.engine_blob.add_range(look_table); - proto.lookaroundReachOffset = bc.engine_blob.add_range(reach_table); + proto.lookaroundTableOffset = engine_blob.add_range(look_table); + proto.lookaroundReachOffset = engine_blob.add_range(reach_table); } static @@ -2750,9 +2527,6 @@ void writeNfaInfo(const RoseBuildImpl &build, build_context &bc, memset(infos.data(), 0, sizeof(NfaInfo) * queue_count); for (u32 qi = 0; qi < queue_count; qi++) { - const NFA *n = get_nfa_from_blob(bc, qi); - enforceEngineSizeLimit(n, n->length, build.cc.grey); - NfaInfo &info = infos[qi]; info.nfaOffset = bc.engineOffsets.at(qi); assert(qi < ekey_lists.size()); @@ -2806,1475 +2580,6 @@ bool hasBoundaryReports(const BoundaryReports &boundary) { return false; } -/** - * \brief True if the given vertex is a role that can only be switched on at - * EOD. - */ -static -bool onlyAtEod(const RoseBuildImpl &tbi, RoseVertex v) { - const RoseGraph &g = tbi.g; - - // All such roles have only (0,0) edges to vertices with the eod_accept - // property, and no other effects (suffixes, ordinary reports, etc, etc). - - if (isLeafNode(v, g) || !g[v].reports.empty() || g[v].suffix) { - return false; - } - - for (const auto &e : out_edges_range(v, g)) { - RoseVertex w = target(e, g); - if (!g[w].eod_accept) { - return false; - } - assert(!g[w].reports.empty()); - assert(g[w].literals.empty()); - - if (g[e].minBound || g[e].maxBound) { - return false; - } - } - - /* There is no pointing enforcing this check at runtime if - * this role is only fired by the eod event literal */ - if (tbi.eod_event_literal_id != MO_INVALID_IDX && - g[v].literals.size() == 1 && - *g[v].literals.begin() == tbi.eod_event_literal_id) { - return false; - } - - return true; -} - -static -void addLookaround(build_context &bc, - const vector> &look, - u32 &look_index, u32 &reach_index) { - // Check the cache. - auto it = bc.lookaround_cache.find(look); - if (it != bc.lookaround_cache.end()) { - look_index = verify_u32(it->second.first); - reach_index = verify_u32(it->second.second); - DEBUG_PRINTF("reusing look at idx %u\n", look_index); - DEBUG_PRINTF("reusing reach at idx %u\n", reach_index); - return; - } - - size_t look_idx = bc.lookTableSize; - size_t reach_idx = bc.reachTableSize; - - if (look.size() == 1) { - bc.lookTableSize += look.front().size(); - bc.reachTableSize += look.front().size() * REACH_BITVECTOR_LEN; - } else { - bc.lookTableSize += look.size(); - bc.reachTableSize += look.size() * MULTI_REACH_BITVECTOR_LEN; - } - - bc.lookaround_cache.emplace(look, make_pair(look_idx, reach_idx)); - bc.lookaround.emplace_back(look); - - DEBUG_PRINTF("adding look at idx %zu\n", look_idx); - DEBUG_PRINTF("adding reach at idx %zu\n", reach_idx); - look_index = verify_u32(look_idx); - reach_index = verify_u32(reach_idx); -} - -static -bool checkReachMask(const CharReach &cr, u8 &andmask, u8 &cmpmask) { - size_t reach_size = cr.count(); - assert(reach_size > 0); - // check whether entry_size is some power of 2. - if ((reach_size - 1) & reach_size) { - return false; - } - make_and_cmp_mask(cr, &andmask, &cmpmask); - if ((1 << popcount32((u8)(~andmask))) ^ reach_size) { - return false; - } - return true; -} - -static -bool checkReachWithFlip(const CharReach &cr, u8 &andmask, - u8 &cmpmask, u8 &flip) { - if (checkReachMask(cr, andmask, cmpmask)) { - flip = 0; - return true; - } - if (checkReachMask(~cr, andmask, cmpmask)) { - flip = 1; - return true; - } - return false; -} - -static -bool makeRoleByte(const vector &look, RoseProgram &program) { - if (look.size() == 1) { - const auto &entry = look[0]; - u8 andmask_u8, cmpmask_u8; - u8 flip; - if (!checkReachWithFlip(entry.reach, andmask_u8, cmpmask_u8, flip)) { - return false; - } - s32 checkbyte_offset = verify_s32(entry.offset); - DEBUG_PRINTF("CHECK BYTE offset=%d\n", checkbyte_offset); - const auto *end_inst = program.end_instruction(); - auto ri = make_unique(andmask_u8, cmpmask_u8, flip, - checkbyte_offset, end_inst); - program.add_before_end(move(ri)); - return true; - } - return false; -} - -static -bool makeRoleMask(const vector &look, RoseProgram &program) { - if (look.back().offset < look.front().offset + 8) { - s32 base_offset = verify_s32(look.front().offset); - u64a and_mask = 0; - u64a cmp_mask = 0; - u64a neg_mask = 0; - for (const auto &entry : look) { - u8 andmask_u8, cmpmask_u8, flip; - if (!checkReachWithFlip(entry.reach, andmask_u8, - cmpmask_u8, flip)) { - return false; - } - DEBUG_PRINTF("entry offset %d\n", entry.offset); - u32 shift = (entry.offset - base_offset) << 3; - and_mask |= (u64a)andmask_u8 << shift; - cmp_mask |= (u64a)cmpmask_u8 << shift; - if (flip) { - neg_mask |= 0xffLLU << shift; - } - } - DEBUG_PRINTF("CHECK MASK and_mask=%llx cmp_mask=%llx\n", - and_mask, cmp_mask); - const auto *end_inst = program.end_instruction(); - auto ri = make_unique(and_mask, cmp_mask, neg_mask, - base_offset, end_inst); - program.add_before_end(move(ri)); - return true; - } - return false; -} - -static UNUSED -string convertMaskstoString(u8 *p, int byte_len) { - string s; - for (int i = 0; i < byte_len; i++) { - u8 hi = *p >> 4; - u8 lo = *p & 0xf; - s += (char)(hi + (hi < 10 ? 48 : 87)); - s += (char)(lo + (lo < 10 ? 48 : 87)); - p++; - } - return s; -} - -static -bool makeRoleMask32(const vector &look, - RoseProgram &program) { - if (look.back().offset >= look.front().offset + 32) { - return false; - } - s32 base_offset = verify_s32(look.front().offset); - array and_mask, cmp_mask; - and_mask.fill(0); - cmp_mask.fill(0); - u32 neg_mask = 0; - for (const auto &entry : look) { - u8 andmask_u8, cmpmask_u8, flip; - if (!checkReachWithFlip(entry.reach, andmask_u8, - cmpmask_u8, flip)) { - return false; - } - u32 shift = entry.offset - base_offset; - assert(shift < 32); - and_mask[shift] = andmask_u8; - cmp_mask[shift] = cmpmask_u8; - if (flip) { - neg_mask |= 1 << shift; - } - } - - DEBUG_PRINTF("and_mask %s\n", - convertMaskstoString(and_mask.data(), 32).c_str()); - DEBUG_PRINTF("cmp_mask %s\n", - convertMaskstoString(cmp_mask.data(), 32).c_str()); - DEBUG_PRINTF("neg_mask %08x\n", neg_mask); - DEBUG_PRINTF("base_offset %d\n", base_offset); - - const auto *end_inst = program.end_instruction(); - auto ri = make_unique(and_mask, cmp_mask, neg_mask, - base_offset, end_inst); - program.add_before_end(move(ri)); - return true; -} - -// Sorting by the size of every bucket. -// Used in map, cmpNibble>. -struct cmpNibble { - bool operator()(const u32 data1, const u32 data2) const{ - u32 size1 = popcount32(data1 >> 16) * popcount32(data1 << 16); - u32 size2 = popcount32(data2 >> 16) * popcount32(data2 << 16); - return std::tie(size1, data1) < std::tie(size2, data2); - } -}; - -// Insert all pairs of bucket and offset into buckets. -static really_inline -void getAllBuckets(const vector &look, - map, cmpNibble> &buckets, u64a &neg_mask) { - s32 base_offset = verify_s32(look.front().offset); - for (const auto &entry : look) { - CharReach cr = entry.reach; - // Flip heavy character classes to save buckets. - if (cr.count() > 128 ) { - cr.flip(); - } else { - neg_mask ^= 1ULL << (entry.offset - base_offset); - } - map lo2hi; - // We treat Ascii Table as a 16x16 grid. - // Push every row in cr into lo2hi and mark the row number. - for (size_t i = cr.find_first(); i != CharReach::npos;) { - u8 it_hi = i >> 4; - u16 low_encode = 0; - while (i != CharReach::npos && (i >> 4) == it_hi) { - low_encode |= 1 << (i & 0xf); - i = cr.find_next(i); - } - lo2hi[low_encode] |= 1 << it_hi; - } - for (const auto &it : lo2hi) { - u32 hi_lo = (it.second << 16) | it.first; - buckets[hi_lo].push_back(entry.offset); - } - } -} - -// Once we have a new bucket, we'll try to combine it with all old buckets. -static really_inline -void nibUpdate(map &nib, u32 hi_lo) { - u16 hi = hi_lo >> 16; - u16 lo = hi_lo & 0xffff; - for (const auto pairs : nib) { - u32 old = pairs.first; - if ((old >> 16) == hi || (old & 0xffff) == lo) { - if (!nib[old | hi_lo]) { - nib[old | hi_lo] = nib[old] | nib[hi_lo]; - } - } - } -} - -static really_inline -void nibMaskUpdate(array &mask, u32 data, u8 bit_index) { - for (u8 index = 0; data > 0; data >>= 1, index++) { - if (data & 1) { - // 0 ~ 7 bucket in first 16 bytes, - // 8 ~ 15 bucket in second 16 bytes. - if (bit_index >= 8) { - mask[index + 16] |= 1 << (bit_index - 8); - } else { - mask[index] |= 1 << bit_index; - } - } - } -} - -static -bool getShuftiMasks(const vector &look, array &hi_mask, - array &lo_mask, u8 *bucket_select_hi, - u8 *bucket_select_lo, u64a &neg_mask, - u8 &bit_idx, size_t len) { - map nib; // map every bucket to its bucket number. - map, cmpNibble> bucket2offsets; - s32 base_offset = look.front().offset; - - bit_idx = 0; - neg_mask = ~0ULL; - - getAllBuckets(look, bucket2offsets, neg_mask); - - for (const auto &it : bucket2offsets) { - u32 hi_lo = it.first; - // New bucket. - if (!nib[hi_lo]) { - if ((bit_idx >= 8 && len == 64) || bit_idx >= 16) { - return false; - } - nib[hi_lo] = 1 << bit_idx; - - nibUpdate(nib, hi_lo); - nibMaskUpdate(hi_mask, hi_lo >> 16, bit_idx); - nibMaskUpdate(lo_mask, hi_lo & 0xffff, bit_idx); - bit_idx++; - } - - DEBUG_PRINTF("hi_lo %x bucket %x\n", hi_lo, nib[hi_lo]); - - // Update bucket_select_mask. - u8 nib_hi = nib[hi_lo] >> 8; - u8 nib_lo = nib[hi_lo] & 0xff; - for (const auto offset : it.second) { - bucket_select_hi[offset - base_offset] |= nib_hi; - bucket_select_lo[offset - base_offset] |= nib_lo; - } - } - return true; -} - -static -unique_ptr -makeCheckShufti16x8(u32 offset_range, u8 bucket_idx, - const array &hi_mask, const array &lo_mask, - const array &bucket_select_mask, - u32 neg_mask, s32 base_offset, - const RoseInstruction *end_inst) { - if (offset_range > 16 || bucket_idx > 8) { - return nullptr; - } - array nib_mask; - array bucket_select_mask_16; - copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin()); - copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin() + 16); - copy(bucket_select_mask.begin(), bucket_select_mask.begin() + 16, - bucket_select_mask_16.begin()); - return make_unique - (nib_mask, bucket_select_mask_16, - neg_mask & 0xffff, base_offset, end_inst); -} - -static -unique_ptr -makeCheckShufti32x8(u32 offset_range, u8 bucket_idx, - const array &hi_mask, const array &lo_mask, - const array &bucket_select_mask, - u32 neg_mask, s32 base_offset, - const RoseInstruction *end_inst) { - if (offset_range > 32 || bucket_idx > 8) { - return nullptr; - } - - array hi_mask_16; - array lo_mask_16; - copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_16.begin()); - copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_16.begin()); - return make_unique - (hi_mask_16, lo_mask_16, bucket_select_mask, - neg_mask, base_offset, end_inst); -} - -static -unique_ptr -makeCheckShufti16x16(u32 offset_range, u8 bucket_idx, - const array &hi_mask, const array &lo_mask, - const array &bucket_select_mask_lo, - const array &bucket_select_mask_hi, - u32 neg_mask, s32 base_offset, - const RoseInstruction *end_inst) { - if (offset_range > 16 || bucket_idx > 16) { - return nullptr; - } - - array bucket_select_mask_32; - copy(bucket_select_mask_lo.begin(), bucket_select_mask_lo.begin() + 16, - bucket_select_mask_32.begin()); - copy(bucket_select_mask_hi.begin(), bucket_select_mask_hi.begin() + 16, - bucket_select_mask_32.begin() + 16); - return make_unique - (hi_mask, lo_mask, bucket_select_mask_32, - neg_mask & 0xffff, base_offset, end_inst); -} -static -unique_ptr -makeCheckShufti32x16(u32 offset_range, u8 bucket_idx, - const array &hi_mask, const array &lo_mask, - const array &bucket_select_mask_lo, - const array &bucket_select_mask_hi, - u32 neg_mask, s32 base_offset, - const RoseInstruction *end_inst) { - if (offset_range > 32 || bucket_idx > 16) { - return nullptr; - } - - return make_unique - (hi_mask, lo_mask, bucket_select_mask_hi, - bucket_select_mask_lo, neg_mask, base_offset, end_inst); -} - -static -bool makeRoleShufti(const vector &look, - RoseProgram &program) { - - s32 base_offset = verify_s32(look.front().offset); - if (look.back().offset >= base_offset + 32) { - return false; - } - - u8 bucket_idx = 0; // number of buckets - u64a neg_mask_64; - array hi_mask; - array lo_mask; - array bucket_select_hi; - array bucket_select_lo; - hi_mask.fill(0); - lo_mask.fill(0); - bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8. - bucket_select_lo.fill(0); - - if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi.data(), - bucket_select_lo.data(), neg_mask_64, bucket_idx, 32)) { - return false; - } - u32 neg_mask = (u32)neg_mask_64; - - DEBUG_PRINTF("hi_mask %s\n", - convertMaskstoString(hi_mask.data(), 32).c_str()); - DEBUG_PRINTF("lo_mask %s\n", - convertMaskstoString(lo_mask.data(), 32).c_str()); - DEBUG_PRINTF("bucket_select_hi %s\n", - convertMaskstoString(bucket_select_hi.data(), 32).c_str()); - DEBUG_PRINTF("bucket_select_lo %s\n", - convertMaskstoString(bucket_select_lo.data(), 32).c_str()); - - const auto *end_inst = program.end_instruction(); - s32 offset_range = look.back().offset - base_offset + 1; - - auto ri = makeCheckShufti16x8(offset_range, bucket_idx, hi_mask, lo_mask, - bucket_select_lo, neg_mask, base_offset, - end_inst); - if (!ri) { - ri = makeCheckShufti32x8(offset_range, bucket_idx, hi_mask, lo_mask, - bucket_select_lo, neg_mask, base_offset, - end_inst); - } - if (!ri) { - ri = makeCheckShufti16x16(offset_range, bucket_idx, hi_mask, lo_mask, - bucket_select_lo, bucket_select_hi, - neg_mask, base_offset, end_inst); - } - if (!ri) { - ri = makeCheckShufti32x16(offset_range, bucket_idx, hi_mask, lo_mask, - bucket_select_lo, bucket_select_hi, - neg_mask, base_offset, end_inst); - } - assert(ri); - program.add_before_end(move(ri)); - - return true; -} - -/** - * Builds a lookaround instruction, or an appropriate specialization if one is - * available. - */ -static -void makeLookaroundInstruction(build_context &bc, const vector &look, - RoseProgram &program) { - assert(!look.empty()); - - if (makeRoleByte(look, program)) { - return; - } - - if (look.size() == 1) { - s8 offset = look.begin()->offset; - u32 look_idx, reach_idx; - vector> lookaround; - lookaround.emplace_back(look); - addLookaround(bc, lookaround, look_idx, reach_idx); - // We don't need look_idx here. - auto ri = make_unique(offset, reach_idx, - program.end_instruction()); - program.add_before_end(move(ri)); - return; - } - - if (makeRoleMask(look, program)) { - return; - } - - if (makeRoleMask32(look, program)) { - return; - } - - if (makeRoleShufti(look, program)) { - return; - } - - u32 look_idx, reach_idx; - vector> lookaround; - lookaround.emplace_back(look); - addLookaround(bc, lookaround, look_idx, reach_idx); - u32 look_count = verify_u32(look.size()); - - auto ri = make_unique(look_idx, reach_idx, - look_count, - program.end_instruction()); - program.add_before_end(move(ri)); -} - -#if defined(DEBUG) || defined(DUMP_SUPPORT) -static UNUSED -string dumpMultiLook(const vector &looks) { - ostringstream oss; - for (auto it = looks.begin(); it != looks.end(); ++it) { - if (it != looks.begin()) { - oss << ", "; - } - oss << "{" << int(it->offset) << ": " << describeClass(it->reach) << "}"; - } - return oss.str(); -} -#endif - -static -bool makeRoleMultipathShufti(const vector> &multi_look, - RoseProgram &program) { - if (multi_look.empty()) { - return false; - } - - // find the base offset - assert(!multi_look[0].empty()); - s32 base_offset = multi_look[0].front().offset; - s32 last_start = base_offset; - s32 end_offset = multi_look[0].back().offset; - size_t multi_len = 0; - - for (const auto &look : multi_look) { - assert(look.size() > 0); - multi_len += look.size(); - - LIMIT_TO_AT_MOST(&base_offset, look.front().offset); - ENSURE_AT_LEAST(&last_start, look.front().offset); - ENSURE_AT_LEAST(&end_offset, look.back().offset); - } - - assert(last_start < 0); - - if (end_offset - base_offset >= MULTIPATH_MAX_LEN) { - return false; - } - - if (multi_len <= 16) { - multi_len = 16; - } else if (multi_len <= 32) { - multi_len = 32; - } else if (multi_len <= 64) { - multi_len = 64; - } else { - DEBUG_PRINTF("too long for multi-path\n"); - return false; - } - - vector linear_look; - array data_select_mask; - data_select_mask.fill(0); - u64a hi_bits_mask = 0; - u64a lo_bits_mask = 0; - - for (const auto &look : multi_look) { - assert(linear_look.size() < 64); - lo_bits_mask |= 1LLU << linear_look.size(); - for (const auto &entry : look) { - assert(entry.offset - base_offset < MULTIPATH_MAX_LEN); - data_select_mask[linear_look.size()] = - verify_u8(entry.offset - base_offset); - linear_look.emplace_back(verify_s8(linear_look.size()), entry.reach); - } - hi_bits_mask |= 1LLU << (linear_look.size() - 1); - } - - u8 bit_index = 0; // number of buckets - u64a neg_mask; - array hi_mask; - array lo_mask; - array bucket_select_hi; - array bucket_select_lo; - hi_mask.fill(0); - lo_mask.fill(0); - bucket_select_hi.fill(0); - bucket_select_lo.fill(0); - - if (!getShuftiMasks(linear_look, hi_mask, lo_mask, bucket_select_hi.data(), - bucket_select_lo.data(), neg_mask, bit_index, - multi_len)) { - return false; - } - - DEBUG_PRINTF("hi_mask %s\n", - convertMaskstoString(hi_mask.data(), 16).c_str()); - DEBUG_PRINTF("lo_mask %s\n", - convertMaskstoString(lo_mask.data(), 16).c_str()); - DEBUG_PRINTF("bucket_select_hi %s\n", - convertMaskstoString(bucket_select_hi.data(), 64).c_str()); - DEBUG_PRINTF("bucket_select_lo %s\n", - convertMaskstoString(bucket_select_lo.data(), 64).c_str()); - DEBUG_PRINTF("data_select_mask %s\n", - convertMaskstoString(data_select_mask.data(), 64).c_str()); - DEBUG_PRINTF("hi_bits_mask %llx\n", hi_bits_mask); - DEBUG_PRINTF("lo_bits_mask %llx\n", lo_bits_mask); - DEBUG_PRINTF("neg_mask %llx\n", neg_mask); - DEBUG_PRINTF("base_offset %d\n", base_offset); - DEBUG_PRINTF("last_start %d\n", last_start); - - // Since we don't have 16x16 now, just call 32x16 instead. - if (bit_index > 8) { - assert(multi_len <= 32); - multi_len = 32; - } - - const auto *end_inst = program.end_instruction(); - assert(multi_len == 16 || multi_len == 32 || multi_len == 64); - if (multi_len == 16) { - neg_mask &= 0xffff; - assert(!(hi_bits_mask & ~0xffffULL)); - assert(!(lo_bits_mask & ~0xffffULL)); - assert(bit_index <=8); - array nib_mask; - copy(begin(lo_mask), begin(lo_mask) + 16, nib_mask.begin()); - copy(begin(hi_mask), begin(hi_mask) + 16, nib_mask.begin() + 16); - - auto ri = make_unique - (nib_mask, bucket_select_lo, data_select_mask, hi_bits_mask, - lo_bits_mask, neg_mask, base_offset, last_start, end_inst); - program.add_before_end(move(ri)); - } else if (multi_len == 32) { - neg_mask &= 0xffffffff; - assert(!(hi_bits_mask & ~0xffffffffULL)); - assert(!(lo_bits_mask & ~0xffffffffULL)); - if (bit_index <= 8) { - auto ri = make_unique - (hi_mask, lo_mask, bucket_select_lo, data_select_mask, - hi_bits_mask, lo_bits_mask, neg_mask, base_offset, - last_start, end_inst); - program.add_before_end(move(ri)); - } else { - auto ri = make_unique - (hi_mask, lo_mask, bucket_select_hi, bucket_select_lo, - data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask, - base_offset, last_start, end_inst); - program.add_before_end(move(ri)); - } - } else { - auto ri = make_unique - (hi_mask, lo_mask, bucket_select_lo, data_select_mask, - hi_bits_mask, lo_bits_mask, neg_mask, base_offset, - last_start, end_inst); - program.add_before_end(move(ri)); - } - return true; -} - -static -void makeRoleMultipathLookaround(build_context &bc, - const vector> &multi_look, - RoseProgram &program) { - assert(!multi_look.empty()); - assert(multi_look.size() <= MAX_LOOKAROUND_PATHS); - vector> ordered_look; - set look_offset; - - assert(!multi_look[0].empty()); - s32 last_start = multi_look[0][0].offset; - - // build offset table. - for (const auto &look : multi_look) { - assert(look.size() > 0); - last_start = max(last_start, (s32)look.begin()->offset); - - for (const auto &t : look) { - look_offset.insert(t.offset); - } - } - - array start_mask; - if (multi_look.size() < MAX_LOOKAROUND_PATHS) { - start_mask.fill((1 << multi_look.size()) - 1); - } else { - start_mask.fill(0xff); - } - - u32 path_idx = 0; - for (const auto &look : multi_look) { - for (const auto &t : look) { - assert(t.offset >= (int)*look_offset.begin()); - size_t update_offset = t.offset - *look_offset.begin() + 1; - if (update_offset < start_mask.size()) { - start_mask[update_offset] &= ~(1 << path_idx); - } - } - path_idx++; - } - - for (u32 i = 1; i < MULTIPATH_MAX_LEN; i++) { - start_mask[i] &= start_mask[i - 1]; - DEBUG_PRINTF("start_mask[%u] = %x\n", i, start_mask[i]); - } - - assert(look_offset.size() <= MULTIPATH_MAX_LEN); - - assert(last_start < 0); - - for (const auto &offset : look_offset) { - vector multi_entry; - multi_entry.resize(MAX_LOOKAROUND_PATHS); - - for (size_t i = 0; i < multi_look.size(); i++) { - for (const auto &t : multi_look[i]) { - if (t.offset == offset) { - multi_entry[i] = t; - } - } - } - ordered_look.emplace_back(multi_entry); - } - - u32 look_idx, reach_idx; - addLookaround(bc, ordered_look, look_idx, reach_idx); - u32 look_count = verify_u32(ordered_look.size()); - - auto ri = make_unique(look_idx, reach_idx, - look_count, last_start, - start_mask, - program.end_instruction()); - program.add_before_end(move(ri)); -} - -static -void makeRoleLookaround(const RoseBuildImpl &build, build_context &bc, - RoseVertex v, RoseProgram &program) { - if (!build.cc.grey.roseLookaroundMasks) { - return; - } - - vector> looks; - - // Lookaround from leftfix (mandatory). - if (contains(bc.leftfix_info, v) && bc.leftfix_info.at(v).has_lookaround) { - DEBUG_PRINTF("using leftfix lookaround\n"); - looks = bc.leftfix_info.at(v).lookaround; - } - - // We may be able to find more lookaround info (advisory) and merge it - // in. - if (looks.size() <= 1) { - vector look; - vector look_more; - if (!looks.empty()) { - look = move(looks.front()); - } - findLookaroundMasks(build, v, look_more); - mergeLookaround(look, look_more); - if (!look.empty()) { - makeLookaroundInstruction(bc, look, program); - } - return; - } - - if (!makeRoleMultipathShufti(looks, program)) { - assert(looks.size() <= 8); - makeRoleMultipathLookaround(bc, looks, program); - } -} - -static -void makeRoleCheckLeftfix(const RoseBuildImpl &build, - const map &leftfix_info, - RoseVertex v, RoseProgram &program) { - auto it = leftfix_info.find(v); - if (it == end(leftfix_info)) { - return; - } - const left_build_info &lni = it->second; - if (lni.has_lookaround) { - return; // Leftfix completely implemented by lookaround. - } - - assert(!build.cc.streaming || - build.g[v].left.lag <= MAX_STORED_LEFTFIX_LAG); - - bool is_prefix = build.isRootSuccessor(v); - const auto *end_inst = program.end_instruction(); - - unique_ptr ri; - if (is_prefix) { - ri = make_unique(lni.queue, build.g[v].left.lag, - build.g[v].left.leftfix_report, - end_inst); - } else { - ri = make_unique(lni.queue, build.g[v].left.lag, - build.g[v].left.leftfix_report, - end_inst); - } - program.add_before_end(move(ri)); -} - -static -void makeAnchoredLiteralDelay(const RoseBuildImpl &build, - const ProgramBuild &prog_build, u32 lit_id, - RoseProgram &program) { - // Only relevant for literals in the anchored table. - const rose_literal_id &lit = build.literals.right.at(lit_id); - if (lit.table != ROSE_ANCHORED) { - return; - } - - // If this literal match cannot occur after floatingMinLiteralMatchOffset, - // we do not need this check. - bool all_too_early = true; - rose_group groups = 0; - - const auto &lit_vertices = build.literal_info.at(lit_id).vertices; - for (RoseVertex v : lit_vertices) { - if (build.g[v].max_offset > prog_build.floatingMinLiteralMatchOffset) { - all_too_early = false; - } - groups |= build.g[v].groups; - } - - if (all_too_early) { - return; - } - - assert(contains(prog_build.anchored_programs, lit_id)); - u32 anch_id = prog_build.anchored_programs.at(lit_id); - - const auto *end_inst = program.end_instruction(); - auto ri = make_unique(groups, anch_id, end_inst); - program.add_before_end(move(ri)); -} - -static -void makeDedupe(const RoseBuildImpl &build, const Report &report, - RoseProgram &program) { - const auto *end_inst = program.end_instruction(); - auto ri = - make_unique(report.quashSom, build.rm.getDkey(report), - report.offsetAdjust, end_inst); - program.add_before_end(move(ri)); -} - -static -void makeDedupeSom(const RoseBuildImpl &build, const Report &report, - RoseProgram &program) { - const auto *end_inst = program.end_instruction(); - auto ri = make_unique(report.quashSom, - build.rm.getDkey(report), - report.offsetAdjust, end_inst); - program.add_before_end(move(ri)); -} - -static -void makeCatchup(const RoseBuildImpl &build, bool needs_catchup, - const flat_set &reports, RoseProgram &program) { - if (!needs_catchup) { - return; - } - - // Everything except the INTERNAL_ROSE_CHAIN report needs catchup to run - // before reports are triggered. - - auto report_needs_catchup = [&](const ReportID &id) { - const Report &report = build.rm.getReport(id); - return report.type != INTERNAL_ROSE_CHAIN; - }; - - if (!any_of(begin(reports), end(reports), report_needs_catchup)) { - DEBUG_PRINTF("none of the given reports needs catchup\n"); - return; - } - - program.add_before_end(make_unique()); -} - -static -void makeCatchupMpv(const RoseBuildImpl &build, bool needs_mpv_catchup, - ReportID id, RoseProgram &program) { - if (!needs_mpv_catchup) { - return; - } - - const Report &report = build.rm.getReport(id); - if (report.type == INTERNAL_ROSE_CHAIN) { - return; - } - - program.add_before_end(make_unique()); -} - -static -void writeSomOperation(const Report &report, som_operation *op) { - assert(op); - - memset(op, 0, sizeof(*op)); - - switch (report.type) { - case EXTERNAL_CALLBACK_SOM_REL: - op->type = SOM_EXTERNAL_CALLBACK_REL; - break; - case INTERNAL_SOM_LOC_SET: - op->type = SOM_INTERNAL_LOC_SET; - break; - case INTERNAL_SOM_LOC_SET_IF_UNSET: - op->type = SOM_INTERNAL_LOC_SET_IF_UNSET; - break; - case INTERNAL_SOM_LOC_SET_IF_WRITABLE: - op->type = SOM_INTERNAL_LOC_SET_IF_WRITABLE; - break; - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: - op->type = SOM_INTERNAL_LOC_SET_REV_NFA; - break; - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: - op->type = SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET; - break; - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: - op->type = SOM_INTERNAL_LOC_SET_REV_NFA_IF_WRITABLE; - break; - case INTERNAL_SOM_LOC_COPY: - op->type = SOM_INTERNAL_LOC_COPY; - break; - case INTERNAL_SOM_LOC_COPY_IF_WRITABLE: - op->type = SOM_INTERNAL_LOC_COPY_IF_WRITABLE; - break; - case INTERNAL_SOM_LOC_MAKE_WRITABLE: - op->type = SOM_INTERNAL_LOC_MAKE_WRITABLE; - break; - case EXTERNAL_CALLBACK_SOM_STORED: - op->type = SOM_EXTERNAL_CALLBACK_STORED; - break; - case EXTERNAL_CALLBACK_SOM_ABS: - op->type = SOM_EXTERNAL_CALLBACK_ABS; - break; - case EXTERNAL_CALLBACK_SOM_REV_NFA: - op->type = SOM_EXTERNAL_CALLBACK_REV_NFA; - break; - case INTERNAL_SOM_LOC_SET_FROM: - op->type = SOM_INTERNAL_LOC_SET_FROM; - break; - case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: - op->type = SOM_INTERNAL_LOC_SET_FROM_IF_WRITABLE; - break; - default: - // This report doesn't correspond to a SOM operation. - assert(0); - throw CompileError("Unable to generate bytecode."); - } - - op->onmatch = report.onmatch; - - switch (report.type) { - case EXTERNAL_CALLBACK_SOM_REV_NFA: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: - op->aux.revNfaIndex = report.revNfaIndex; - break; - default: - op->aux.somDistance = report.somDistance; - break; - } -} - -static -void makeReport(const RoseBuildImpl &build, const ReportID id, - const bool has_som, RoseProgram &program) { - assert(id < build.rm.numReports()); - const Report &report = build.rm.getReport(id); - - RoseProgram report_block; - const RoseInstruction *end_inst = report_block.end_instruction(); - - // Handle min/max offset checks. - if (report.minOffset > 0 || report.maxOffset < MAX_OFFSET) { - auto ri = make_unique(report.minOffset, - report.maxOffset, end_inst); - report_block.add_before_end(move(ri)); - } - - // If this report has an exhaustion key, we can check it in the program - // rather than waiting until we're in the callback adaptor. - if (report.ekey != INVALID_EKEY) { - auto ri = make_unique(report.ekey, end_inst); - report_block.add_before_end(move(ri)); - } - - // External SOM reports that aren't passthrough need their SOM value - // calculated. - if (isExternalSomReport(report) && - report.type != EXTERNAL_CALLBACK_SOM_PASS) { - auto ri = make_unique(); - writeSomOperation(report, &ri->som); - report_block.add_before_end(move(ri)); - } - - // Min length constraint. - if (report.minLength > 0) { - assert(build.hasSom); - auto ri = make_unique( - report.offsetAdjust, report.minLength, end_inst); - report_block.add_before_end(move(ri)); - } - - if (report.quashSom) { - report_block.add_before_end(make_unique()); - } - - switch (report.type) { - case EXTERNAL_CALLBACK: - if (!has_som) { - // Dedupe is only necessary if this report has a dkey, or if there - // are SOM reports to catch up. - bool needs_dedupe = build.rm.getDkey(report) != ~0U || build.hasSom; - if (report.ekey == INVALID_EKEY) { - if (needs_dedupe) { - report_block.add_before_end( - make_unique( - report.quashSom, build.rm.getDkey(report), - report.onmatch, report.offsetAdjust, end_inst)); - } else { - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust)); - } - } else { - if (needs_dedupe) { - makeDedupe(build, report, report_block); - } - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust, report.ekey)); - } - } else { // has_som - makeDedupeSom(build, report, report_block); - if (report.ekey == INVALID_EKEY) { - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust)); - } else { - report_block.add_before_end( - make_unique( - report.onmatch, report.offsetAdjust, report.ekey)); - } - } - break; - case INTERNAL_SOM_LOC_SET: - case INTERNAL_SOM_LOC_SET_IF_UNSET: - case INTERNAL_SOM_LOC_SET_IF_WRITABLE: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: - case INTERNAL_SOM_LOC_COPY: - case INTERNAL_SOM_LOC_COPY_IF_WRITABLE: - case INTERNAL_SOM_LOC_MAKE_WRITABLE: - case INTERNAL_SOM_LOC_SET_FROM: - case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: - if (has_som) { - auto ri = make_unique(); - writeSomOperation(report, &ri->som); - report_block.add_before_end(move(ri)); - } else { - auto ri = make_unique(); - writeSomOperation(report, &ri->som); - report_block.add_before_end(move(ri)); - } - break; - case INTERNAL_ROSE_CHAIN: { - report_block.add_before_end(make_unique( - report.onmatch, report.topSquashDistance)); - break; - } - case EXTERNAL_CALLBACK_SOM_REL: - case EXTERNAL_CALLBACK_SOM_STORED: - case EXTERNAL_CALLBACK_SOM_ABS: - case EXTERNAL_CALLBACK_SOM_REV_NFA: - makeDedupeSom(build, report, report_block); - if (report.ekey == INVALID_EKEY) { - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust)); - } else { - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust, report.ekey)); - } - break; - case EXTERNAL_CALLBACK_SOM_PASS: - makeDedupeSom(build, report, report_block); - if (report.ekey == INVALID_EKEY) { - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust)); - } else { - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust, report.ekey)); - } - break; - - default: - assert(0); - throw CompileError("Unable to generate bytecode."); - } - - assert(!report_block.empty()); - program.add_block(move(report_block)); -} - -static -void makeRoleReports(const RoseBuildImpl &build, const build_context &bc, - RoseVertex v, RoseProgram &program) { - const auto &g = build.g; - - /* we are a suffaig - need to update role to provide som to the - * suffix. */ - bool has_som = false; - if (g[v].left.tracksSom()) { - assert(contains(bc.leftfix_info, v)); - const left_build_info &lni = bc.leftfix_info.at(v); - program.add_before_end( - make_unique(lni.queue, g[v].left.lag)); - has_som = true; - } else if (g[v].som_adjust) { - program.add_before_end( - make_unique(g[v].som_adjust)); - has_som = true; - } - - const auto &reports = g[v].reports; - makeCatchup(build, bc.needs_catchup, reports, program); - - RoseProgram report_block; - for (ReportID id : reports) { - makeReport(build, id, has_som, report_block); - } - program.add_before_end(move(report_block)); -} - -static -void makeRoleSuffix(const RoseBuildImpl &build, const build_context &bc, - RoseVertex v, RoseProgram &program) { - const auto &g = build.g; - if (!g[v].suffix) { - return; - } - assert(contains(bc.suffixes, g[v].suffix)); - u32 qi = bc.suffixes.at(g[v].suffix); - assert(contains(bc.engineOffsets, qi)); - const NFA *nfa = get_nfa_from_blob(bc, qi); - u32 suffixEvent; - if (isContainerType(nfa->type)) { - auto tamaProto = g[v].suffix.tamarama.get(); - assert(tamaProto); - u32 top = (u32)MQE_TOP_FIRST + - tamaProto->top_remap.at(make_pair(g[v].index, - g[v].suffix.top)); - assert(top < MQE_INVALID); - suffixEvent = top; - } else if (isMultiTopType(nfa->type)) { - assert(!g[v].suffix.haig); - u32 top = (u32)MQE_TOP_FIRST + g[v].suffix.top; - assert(top < MQE_INVALID); - suffixEvent = top; - } else { - // DFAs/Puffs have no MQE_TOP_N support, so they get a classic TOP - // event. - assert(!g[v].suffix.graph || onlyOneTop(*g[v].suffix.graph)); - suffixEvent = MQE_TOP; - } - program.add_before_end( - make_unique(qi, suffixEvent)); -} - -static -void makeRoleGroups(const RoseBuildImpl &build, ProgramBuild &prog_build, - RoseVertex v, RoseProgram &program) { - const auto &g = build.g; - rose_group groups = g[v].groups; - if (!groups) { - return; - } - - // The set of "already on" groups as we process this vertex is the - // intersection of the groups set by our predecessors. - assert(in_degree(v, g) > 0); - rose_group already_on = ~rose_group{0}; - for (const auto &u : inv_adjacent_vertices_range(v, g)) { - already_on &= prog_build.vertex_group_map.at(u); - } - - DEBUG_PRINTF("already_on=0x%llx\n", already_on); - DEBUG_PRINTF("squashable=0x%llx\n", prog_build.squashable_groups); - DEBUG_PRINTF("groups=0x%llx\n", groups); - - already_on &= ~prog_build.squashable_groups; - DEBUG_PRINTF("squashed already_on=0x%llx\n", already_on); - - // We don't *have* to mask off the groups that we know are already on, but - // this will make bugs more apparent. - groups &= ~already_on; - - if (!groups) { - DEBUG_PRINTF("no new groups to set, skipping\n"); - return; - } - - program.add_before_end(make_unique(groups)); -} - -static -void makeRoleInfixTriggers(const RoseBuildImpl &build, const build_context &bc, - RoseVertex u, RoseProgram &program) { - const auto &g = build.g; - - vector infix_program; - - for (const auto &e : out_edges_range(u, g)) { - RoseVertex v = target(e, g); - if (!g[v].left) { - continue; - } - - assert(contains(bc.leftfix_info, v)); - const left_build_info &lbi = bc.leftfix_info.at(v); - if (lbi.has_lookaround) { - continue; - } - - const NFA *nfa = get_nfa_from_blob(bc, lbi.queue); - - // DFAs have no TOP_N support, so they get a classic MQE_TOP event. - u32 top; - if (isContainerType(nfa->type)) { - auto tamaProto = g[v].left.tamarama.get(); - assert(tamaProto); - top = MQE_TOP_FIRST + tamaProto->top_remap.at( - make_pair(g[v].index, g[e].rose_top)); - assert(top < MQE_INVALID); - } else if (!isMultiTopType(nfa->type)) { - assert(num_tops(g[v].left) == 1); - top = MQE_TOP; - } else { - top = MQE_TOP_FIRST + g[e].rose_top; - assert(top < MQE_INVALID); - } - - infix_program.emplace_back(g[e].rose_cancel_prev_top, lbi.queue, top); - } - - if (infix_program.empty()) { - return; - } - - // Order, de-dupe and add instructions to the end of program. - sort_and_unique(infix_program, [](const RoseInstrTriggerInfix &a, - const RoseInstrTriggerInfix &b) { - return tie(a.cancel, a.queue, a.event) < - tie(b.cancel, b.queue, b.event); - }); - for (const auto &ri : infix_program) { - program.add_before_end(make_unique(ri)); - } -} - -static -void makeRoleSetState(const unordered_map &roleStateIndices, - RoseVertex v, RoseProgram &program) { - // We only need this instruction if a state index has been assigned to this - // vertex. - auto it = roleStateIndices.find(v); - if (it == end(roleStateIndices)) { - return; - } - program.add_before_end(make_unique(it->second)); -} - -static -void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, - const RoseEdge &e, RoseProgram &program) { - const RoseGraph &g = build.g; - const RoseVertex u = source(e, g); - - // We know that we can trust the anchored table (DFA) to always deliver us - // literals at the correct offset. - if (build.isAnchored(v)) { - DEBUG_PRINTF("literal in anchored table, skipping bounds check\n"); - return; - } - - // Use the minimum literal length. - u32 lit_length = g[v].eod_accept ? 0 : verify_u32(build.minLiteralLen(v)); - - u64a min_bound = g[e].minBound + lit_length; - u64a max_bound = g[e].maxBound == ROSE_BOUND_INF - ? ROSE_BOUND_INF - : g[e].maxBound + lit_length; - - if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { - assert(g[u].fixedOffset()); - // Make offsets absolute. - min_bound += g[u].max_offset; - if (max_bound != ROSE_BOUND_INF) { - max_bound += g[u].max_offset; - } - } - - assert(max_bound <= ROSE_BOUND_INF); - assert(min_bound <= max_bound); - - // CHECK_BOUNDS instruction uses 64-bit bounds, so we can use MAX_OFFSET - // (max value of a u64a) to represent ROSE_BOUND_INF. - if (max_bound == ROSE_BOUND_INF) { - max_bound = MAX_OFFSET; - } - - // This instruction should be doing _something_ -- bounds should be tighter - // than just {length, inf}. - assert(min_bound > lit_length || max_bound < MAX_OFFSET); - - const auto *end_inst = program.end_instruction(); - program.add_before_end( - make_unique(min_bound, max_bound, end_inst)); -} - -static -void makeRoleCheckNotHandled(ProgramBuild &prog_build, RoseVertex v, - RoseProgram &program) { - u32 handled_key; - if (contains(prog_build.handledKeys, v)) { - handled_key = prog_build.handledKeys.at(v); - } else { - handled_key = verify_u32(prog_build.handledKeys.size()); - prog_build.handledKeys.emplace(v, handled_key); - } - - const auto *end_inst = program.end_instruction(); - auto ri = make_unique(handled_key, end_inst); - program.add_before_end(move(ri)); -} - -static -void makeRoleEagerEodReports(const RoseBuildImpl &build, build_context &bc, - RoseVertex v, RoseProgram &program) { - RoseProgram eod_program; - - for (const auto &e : out_edges_range(v, build.g)) { - if (canEagerlyReportAtEod(build, e)) { - RoseProgram block; - makeRoleReports(build, bc, target(e, build.g), block); - eod_program.add_block(move(block)); - } - } - - if (eod_program.empty()) { - return; - } - - if (!onlyAtEod(build, v)) { - // The rest of our program wasn't EOD anchored, so we need to guard - // these reports with a check. - const auto *end_inst = eod_program.end_instruction(); - eod_program.insert(begin(eod_program), - make_unique(end_inst)); - } - - program.add_before_end(move(eod_program)); -} - -/* Makes a program for a role/vertex given a specfic pred/in_edge. */ -static -RoseProgram makeRoleProgram(const RoseBuildImpl &build, build_context &bc, - ProgramBuild &prog_build, const RoseEdge &e) { - const RoseGraph &g = build.g; - auto v = target(e, g); - - RoseProgram program; - - // First, add program instructions that enforce preconditions without - // effects. - - if (onlyAtEod(build, v)) { - DEBUG_PRINTF("only at eod\n"); - const auto *end_inst = program.end_instruction(); - program.add_before_end(make_unique(end_inst)); - } - - if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { - makeRoleCheckBounds(build, v, e, program); - } - - // This role program may be triggered by different predecessors, with - // different offset bounds. We must ensure we put this check/set operation - // after the bounds check to deal with this case. - if (in_degree(v, g) > 1) { - assert(!build.isRootSuccessor(v)); - makeRoleCheckNotHandled(prog_build, v, program); - } - - makeRoleLookaround(build, bc, v, program); - makeRoleCheckLeftfix(build, bc.leftfix_info, v, program); - - // Next, we can add program instructions that have effects. This must be - // done as a series of blocks, as some of them (like reports) are - // escapable. - - RoseProgram effects_block; - - RoseProgram reports_block; - makeRoleReports(build, bc, v, reports_block); - effects_block.add_block(move(reports_block)); - - RoseProgram infix_block; - makeRoleInfixTriggers(build, bc, v, infix_block); - effects_block.add_block(move(infix_block)); - - // Note: SET_GROUPS instruction must be after infix triggers, as an infix - // going dead may switch off groups. - RoseProgram groups_block; - makeRoleGroups(build, prog_build, v, groups_block); - effects_block.add_block(move(groups_block)); - - RoseProgram suffix_block; - makeRoleSuffix(build, bc, v, suffix_block); - effects_block.add_block(move(suffix_block)); - - RoseProgram state_block; - makeRoleSetState(bc.roleStateIndices, v, state_block); - effects_block.add_block(move(state_block)); - - // Note: EOD eager reports may generate a CHECK_ONLY_EOD instruction (if - // the program doesn't have one already). - RoseProgram eod_block; - makeRoleEagerEodReports(build, bc, v, eod_block); - effects_block.add_block(move(eod_block)); - - /* a 'ghost role' may do nothing if we know that its groups are already set - * - in this case we can avoid producing a program at all. */ - if (effects_block.empty()) { - return {}; - } - - program.add_before_end(move(effects_block)); - return program; -} - -static -u32 writeBoundaryProgram(const RoseBuildImpl &build, build_context &bc, - const set &reports) { - if (reports.empty()) { - return 0; - } - - // Note: no CATCHUP instruction is necessary in the boundary case, as we - // should always be caught up (and may not even have the resources in - // scratch to support it). - - const bool has_som = false; - RoseProgram program; - for (const auto &id : reports) { - makeReport(build, id, has_som, program); - } - return writeProgram(bc, move(program)); -} - static void makeBoundaryPrograms(const RoseBuildImpl &build, build_context &bc, const BoundaryReports &boundary, @@ -4284,12 +2589,14 @@ void makeBoundaryPrograms(const RoseBuildImpl &build, build_context &bc, DEBUG_PRINTF("report $: %zu\n", boundary.report_at_eod.size()); DEBUG_PRINTF("report ^$: %zu\n", dboundary.report_at_0_eod_full.size()); - out.reportEodOffset = - writeBoundaryProgram(build, bc, boundary.report_at_eod); - out.reportZeroOffset = - writeBoundaryProgram(build, bc, boundary.report_at_0); - out.reportZeroEodOffset = - writeBoundaryProgram(build, bc, dboundary.report_at_0_eod_full); + auto eod_prog = makeBoundaryProgram(build, boundary.report_at_eod); + out.reportEodOffset = writeProgram(bc, move(eod_prog)); + + auto zero_prog = makeBoundaryProgram(build, boundary.report_at_0); + out.reportZeroOffset = writeProgram(bc, move(zero_prog)); + + auto zeod_prog = makeBoundaryProgram(build, dboundary.report_at_0_eod_full); + out.reportZeroEodOffset = writeProgram(bc, move(zeod_prog)); } static @@ -4341,10 +2648,9 @@ bool hasUsefulStops(const left_build_info &build) { static void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, - const set &eager_queues, - u32 leftfixBeginQueue, u32 leftfixCount, - vector &leftTable, u32 *laggedRoseCount, - size_t *history) { + const set &eager_queues, u32 leftfixBeginQueue, + u32 leftfixCount, vector &leftTable, + u32 *laggedRoseCount, size_t *history) { const RoseGraph &g = tbi.g; const CompileContext &cc = tbi.cc; @@ -4430,445 +2736,6 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, *laggedRoseCount = lagIndex; } -static -void addPredBlockSingle(u32 pred_state, RoseProgram &pred_block, - RoseProgram &program) { - // Prepend an instruction to check the pred state is on. - const auto *end_inst = pred_block.end_instruction(); - assert(!pred_block.empty()); - pred_block.insert(begin(pred_block), - make_unique(pred_state, end_inst)); - program.add_block(move(pred_block)); -} - -static -void addPredBlocksAny(map &pred_blocks, u32 num_states, - RoseProgram &program) { - RoseProgram sparse_program; - - vector keys; - for (const u32 &key : pred_blocks | map_keys) { - keys.push_back(key); - } - - const RoseInstruction *end_inst = sparse_program.end_instruction(); - auto ri = make_unique(num_states, keys, end_inst); - sparse_program.add_before_end(move(ri)); - - RoseProgram &block = pred_blocks.begin()->second; - assert(!block.empty()); - - /* we no longer need the check handled instruction as all the pred-role - * blocks are being collapsed together */ - stripCheckHandledInstruction(block); - - sparse_program.add_before_end(move(block)); - program.add_block(move(sparse_program)); -} - -static -void addPredBlocksMulti(map &pred_blocks, - u32 num_states, RoseProgram &program) { - assert(!pred_blocks.empty()); - - RoseProgram sparse_program; - const RoseInstruction *end_inst = sparse_program.end_instruction(); - vector> jump_table; - - // BEGIN instruction. - auto ri_begin = make_unique(num_states, end_inst); - RoseInstrSparseIterBegin *begin_inst = ri_begin.get(); - sparse_program.add_before_end(move(ri_begin)); - - // NEXT instructions, one per pred program. - u32 prev_key = pred_blocks.begin()->first; - for (auto it = next(begin(pred_blocks)); it != end(pred_blocks); ++it) { - auto ri = make_unique(prev_key, begin_inst, - end_inst); - sparse_program.add_before_end(move(ri)); - prev_key = it->first; - } - - // Splice in each pred program after its BEGIN/NEXT. - auto out_it = begin(sparse_program); - for (auto &m : pred_blocks) { - u32 key = m.first; - RoseProgram &flat_prog = m.second; - assert(!flat_prog.empty()); - const size_t block_len = flat_prog.size() - 1; // without INSTR_END. - - assert(dynamic_cast(out_it->get()) || - dynamic_cast(out_it->get())); - out_it = sparse_program.insert(++out_it, move(flat_prog)); - - // Jump table target for this key is the beginning of the block we just - // spliced in. - jump_table.emplace_back(key, out_it->get()); - - assert(distance(begin(sparse_program), out_it) + block_len <= - sparse_program.size()); - advance(out_it, block_len); - } - - // Write the jump table back into the SPARSE_ITER_BEGIN instruction. - begin_inst->jump_table = move(jump_table); - - program.add_block(move(sparse_program)); -} - -static -void addPredBlocks(map &pred_blocks, u32 num_states, - RoseProgram &program) { - const size_t num_preds = pred_blocks.size(); - if (num_preds == 0) { - return; - } - - if (num_preds == 1) { - const auto head = pred_blocks.begin(); - addPredBlockSingle(head->first, head->second, program); - return; - } - - // First, see if all our blocks are equivalent, in which case we can - // collapse them down into one. - const auto &blocks = pred_blocks | map_values; - if (all_of(begin(blocks), end(blocks), [&](const RoseProgram &block) { - return RoseProgramEquivalence()(*begin(blocks), block); - })) { - DEBUG_PRINTF("all blocks equiv\n"); - addPredBlocksAny(pred_blocks, num_states, program); - return; - } - - addPredBlocksMulti(pred_blocks, num_states, program); -} - -static -void makePushDelayedInstructions(const RoseBuildImpl &build, - ProgramBuild &prog_build, u32 lit_id, - RoseProgram &program) { - const auto &info = build.literal_info.at(lit_id); - - vector delay_instructions; - - for (const auto &delayed_lit_id : info.delayed_ids) { - DEBUG_PRINTF("delayed lit id %u\n", delayed_lit_id); - assert(contains(prog_build.delay_programs, delayed_lit_id)); - u32 delay_id = prog_build.delay_programs.at(delayed_lit_id); - const auto &delay_lit = build.literals.right.at(delayed_lit_id); - delay_instructions.emplace_back(verify_u8(delay_lit.delay), delay_id); - } - - sort_and_unique(delay_instructions, [](const RoseInstrPushDelayed &a, - const RoseInstrPushDelayed &b) { - return tie(a.delay, a.index) < tie(b.delay, b.index); - }); - - for (const auto &ri : delay_instructions) { - program.add_before_end(make_unique(ri)); - } -} - -static -void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 lit_id, - RoseProgram &program) { - const auto &info = build.literal_info.at(lit_id); - rose_group groups = info.group_mask; - if (!groups) { - return; - } - program.add_before_end(make_unique(groups)); -} - -static -void makeCheckLitMaskInstruction(const RoseBuildImpl &build, build_context &bc, - u32 lit_id, RoseProgram &program) { - const auto &info = build.literal_info.at(lit_id); - if (!info.requires_benefits) { - return; - } - - vector look; - - const ue2_literal &s = build.literals.right.at(lit_id).s; - DEBUG_PRINTF("building mask for lit %u: %s\n", lit_id, - dumpString(s).c_str()); - assert(s.length() <= MAX_MASK2_WIDTH); - s32 i = 0 - s.length(); - for (const auto &e : s) { - if (!e.nocase) { - look.emplace_back(verify_s8(i), e); - } - i++; - } - - assert(!look.empty()); - makeLookaroundInstruction(bc, look, program); -} - -static -void makeGroupSquashInstruction(const RoseBuildImpl &build, - u32 lit_id, - RoseProgram &program) { - const auto &info = build.literal_info.at(lit_id); - if (!info.squash_group) { - return; - } - - rose_group groups = info.group_mask; - if (!groups) { - return; - } - - DEBUG_PRINTF("squashes 0x%llx\n", groups); - program.add_before_end( - make_unique(~groups)); // Note negated. -} - -static -u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) { - const auto &lit_vertices = build.literal_info.at(lit_id).vertices; - assert(!lit_vertices.empty()); - - u32 max_offset = 0; - for (const auto &v : lit_vertices) { - max_offset = max(max_offset, build.g[v].max_offset); - } - - return max_offset; -} - -static -u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id) { - const auto &lit_vertices = build.literal_info.at(lit_id).vertices; - assert(!lit_vertices.empty()); - - u32 min_offset = UINT32_MAX; - for (const auto &v : lit_vertices) { - min_offset = min(min_offset, build.g[v].min_offset); - } - - return min_offset; -} - -static -void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, u32 lit_id, - const vector &lit_edges, - u32 floatingMinLiteralMatchOffset, - RoseProgram &program) { - if (lit_edges.empty()) { - return; - } - - if (floatingMinLiteralMatchOffset == 0) { - return; - } - - RoseVertex v = target(lit_edges.front(), build.g); - if (!build.isFloating(v)) { - return; - } - - const auto &lit = build.literals.right.at(lit_id); - size_t min_len = lit.elength(); - u32 min_offset = findMinOffset(build, lit_id); - DEBUG_PRINTF("has min_len=%zu, min_offset=%u, global min is %u\n", min_len, - min_offset, floatingMinLiteralMatchOffset); - - // If we can't match before the min offset, we don't need the check. - if (min_len >= floatingMinLiteralMatchOffset) { - DEBUG_PRINTF("no need for check, min is %u\n", - floatingMinLiteralMatchOffset); - return; - } - - assert(min_offset >= floatingMinLiteralMatchOffset); - assert(min_offset < UINT32_MAX); - - DEBUG_PRINTF("adding lit early check, min_offset=%u\n", min_offset); - const auto *end_inst = program.end_instruction(); - program.add_before_end( - make_unique(min_offset, end_inst)); -} - -static -void makeCheckLiteralInstruction(const RoseBuildImpl &build, u32 lit_id, - size_t longLitLengthThreshold, - RoseProgram &program) { - assert(longLitLengthThreshold > 0); - - DEBUG_PRINTF("lit_id=%u, long lit threshold %zu\n", lit_id, - longLitLengthThreshold); - - if (build.isDelayed(lit_id)) { - return; - } - - const rose_literal_id &lit = build.literals.right.at(lit_id); - - if (lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX) { - DEBUG_PRINTF("lit short enough to not need confirm\n"); - return; - } - - // Check resource limits as well. - if (lit.s.length() > build.cc.grey.limitLiteralLength) { - throw ResourceLimitError(); - } - - if (lit.s.length() <= longLitLengthThreshold) { - DEBUG_PRINTF("is a medium-length literal\n"); - const auto *end_inst = program.end_instruction(); - unique_ptr ri; - if (lit.s.any_nocase()) { - ri = make_unique(lit.s.get_string(), - end_inst); - } else { - ri = make_unique(lit.s.get_string(), - end_inst); - } - program.add_before_end(move(ri)); - return; - } - - // Long literal support should only really be used for the floating table - // in streaming mode. - assert(lit.table == ROSE_FLOATING && build.cc.streaming); - - DEBUG_PRINTF("is a long literal\n"); - - const auto *end_inst = program.end_instruction(); - unique_ptr ri; - if (lit.s.any_nocase()) { - ri = make_unique(lit.s.get_string(), - end_inst); - } else { - ri = make_unique(lit.s.get_string(), end_inst); - } - program.add_before_end(move(ri)); -} - -static -bool hasDelayedLiteral(const RoseBuildImpl &build, - const vector &lit_edges) { - auto is_delayed = bind(&RoseBuildImpl::isDelayed, &build, _1); - for (const auto &e : lit_edges) { - auto v = target(e, build.g); - const auto &lits = build.g[v].literals; - if (any_of(begin(lits), end(lits), is_delayed)) { - return true; - } - } - return false; -} - -static -RoseProgram makeLitInitialProgram(const RoseBuildImpl &build, - build_context &bc, ProgramBuild &prog_build, - u32 lit_id, const vector &lit_edges, - bool is_anchored_replay_program) { - RoseProgram program; - - // Check long literal info. - makeCheckLiteralInstruction(build, lit_id, bc.longLitLengthThreshold, - program); - - // Check lit mask. - makeCheckLitMaskInstruction(build, bc, lit_id, program); - - // Check literal groups. This is an optimisation that we only perform for - // delayed literals, as their groups may be switched off; ordinarily, we - // can trust the HWLM matcher. - if (hasDelayedLiteral(build, lit_edges)) { - makeGroupCheckInstruction(build, lit_id, program); - } - - // Add instructions for pushing delayed matches, if there are any. - makePushDelayedInstructions(build, prog_build, lit_id, program); - - // Add pre-check for early literals in the floating table. - makeCheckLitEarlyInstruction(build, lit_id, lit_edges, - prog_build.floatingMinLiteralMatchOffset, - program); - - /* Check if we are able to deliever matches from the anchored table now */ - if (!is_anchored_replay_program) { - makeAnchoredLiteralDelay(build, prog_build, lit_id, program); - } - - return program; -} - -static -RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc, - ProgramBuild &prog_build, u32 lit_id, - const vector &lit_edges, - bool is_anchored_replay_program) { - const auto &g = build.g; - - DEBUG_PRINTF("lit id=%u, %zu lit edges\n", lit_id, lit_edges.size()); - - // Construct initial program up front, as its early checks must be able - // to jump to end and terminate processing for this literal. - auto lit_program = makeLitInitialProgram(build, bc, prog_build, lit_id, - lit_edges, - is_anchored_replay_program); - - RoseProgram role_programs; - - // Predecessor state id -> program block. - map pred_blocks; - - // Construct sparse iter sub-programs. - for (const auto &e : lit_edges) { - const auto &u = source(e, g); - if (build.isAnyStart(u)) { - continue; // Root roles are not handled with sparse iterator. - } - DEBUG_PRINTF("sparse iter edge (%zu,%zu)\n", g[u].index, - g[target(e, g)].index); - assert(contains(bc.roleStateIndices, u)); - u32 pred_state = bc.roleStateIndices.at(u); - auto role_prog = makeRoleProgram(build, bc, prog_build, e); - if (!role_prog.empty()) { - pred_blocks[pred_state].add_block(move(role_prog)); - } - } - - // Add blocks to deal with non-root edges (triggered by sparse iterator or - // mmbit_isset checks). - addPredBlocks(pred_blocks, bc.roleStateIndices.size(), role_programs); - - // Add blocks to handle root roles. - for (const auto &e : lit_edges) { - const auto &u = source(e, g); - if (!build.isAnyStart(u)) { - continue; - } - DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].index, - g[target(e, g)].index); - role_programs.add_block(makeRoleProgram(build, bc, prog_build, e)); - } - - if (lit_id == build.eod_event_literal_id) { - /* Note: does not require the lit intial program */ - assert(build.eod_event_literal_id != MO_INVALID_IDX); - return role_programs; - } - - /* Instructions to run even if a role program bails out */ - RoseProgram unconditional_block; - - // Literal may squash groups. - makeGroupSquashInstruction(build, lit_id, unconditional_block); - - role_programs.add_block(move(unconditional_block)); - lit_program.add_before_end(move(role_programs)); - - return lit_program; -} - static RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc, ProgramBuild &prog_build, u32 lit_id, @@ -4885,49 +2752,10 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc, edges_ptr = &no_edges; } - return makeLiteralProgram(build, bc, prog_build, lit_id, *edges_ptr, - is_anchored_replay_program); -} - -/** - * \brief Consumes list of program blocks corresponding to different literals, - * checks them for duplicates and then concatenates them into one program. - * - * Note: if a block will squash groups, a CLEAR_WORK_DONE instruction is - * inserted to prevent the work_done flag being contaminated by early blocks. - */ -static -RoseProgram assembleProgramBlocks(vector &&blocks) { - RoseProgram program; - - DEBUG_PRINTF("%zu blocks before dedupe\n", blocks.size()); - - sort(blocks.begin(), blocks.end(), - [](const RoseProgram &a, const RoseProgram &b) { - RoseProgramHash hasher; - return hasher(a) < hasher(b); - }); - - blocks.erase(unique(blocks.begin(), blocks.end(), RoseProgramEquivalence()), - blocks.end()); - - DEBUG_PRINTF("%zu blocks after dedupe\n", blocks.size()); - - for (auto &block : blocks) { - /* If we have multiple blocks from different literals and any of them - * squash groups, we will have to add a CLEAR_WORK_DONE instruction to - * each literal program block to clear the work_done flags so that it's - * only set if a state has been. */ - if (!program.empty() && reads_work_done_flag(block)) { - RoseProgram clear_block; - clear_block.add_before_end(make_unique()); - program.add_block(move(clear_block)); - } - - program.add_block(move(block)); - } - - return program; + return makeLiteralProgram(build, bc.leftfix_info, bc.suffixes, + bc.engine_info_by_queue, bc.lookarounds, + bc.roleStateIndices, prog_build, lit_id, + *edges_ptr, is_anchored_replay_program); } static @@ -4947,38 +2775,6 @@ RoseProgram makeFragmentProgram(const RoseBuildImpl &build, build_context &bc, return assembleProgramBlocks(move(blocks)); } -static -u32 writeDelayRebuildProgram(const RoseBuildImpl &build, build_context &bc, - ProgramBuild &prog_build, - const vector &lit_ids) { - assert(!lit_ids.empty()); - - if (!build.cc.streaming) { - return 0; // We only do delayed rebuild in streaming mode. - } - - vector blocks; - - for (const auto &lit_id : lit_ids) { - DEBUG_PRINTF("lit_id=%u\n", lit_id); - const auto &info = build.literal_info.at(lit_id); - if (info.delayed_ids.empty()) { - continue; // No delayed IDs, no work to do. - } - - RoseProgram prog; - makeCheckLiteralInstruction(build, lit_id, bc.longLitLengthThreshold, - prog); - makeCheckLitMaskInstruction(build, bc, lit_id, prog); - makePushDelayedInstructions(build, prog_build, lit_id, prog); - blocks.push_back(move(prog)); - } - - auto program = assembleProgramBlocks(move(blocks)); - - return writeProgram(bc, move(program)); -} - /** * \brief Returns a map from literal ID to a list of edges leading into * vertices with that literal ID. @@ -5118,8 +2914,15 @@ void buildLiteralPrograms(const RoseBuildImpl &build, lit_edge_map); frag.lit_program_offset = writeProgram(bc, move(lit_prog)); - frag.delay_program_offset - = writeDelayRebuildProgram(build, bc, prog_build, frag.lit_ids); + // We only do delayed rebuild in streaming mode. + if (!build.cc.streaming) { + continue; + } + + auto rebuild_prog = makeDelayRebuildProgram(build, + bc.lookarounds, prog_build, + frag.lit_ids); + frag.delay_program_offset = writeProgram(bc, move(rebuild_prog)); } } @@ -5267,10 +3070,7 @@ pair buildReportPrograms(const RoseBuildImpl &build, programs.reserve(reports.size()); for (ReportID id : reports) { - RoseProgram program; - const bool has_som = false; - makeCatchupMpv(build, bc.needs_mpv_catchup, id, program); - makeReport(build, id, has_som, program); + auto program = makeReportProgram(build, bc.needs_mpv_catchup, id); u32 offset = writeProgram(bc, move(program)); programs.push_back(offset); build.rm.setProgramOffset(id, offset); @@ -5283,38 +3083,6 @@ pair buildReportPrograms(const RoseBuildImpl &build, return {offset, count}; } -static -RoseProgram makeEodAnchorProgram(const RoseBuildImpl &build, - bool needs_catchup, - ProgramBuild &prog_build, const RoseEdge &e, - const bool multiple_preds) { - const RoseGraph &g = build.g; - const RoseVertex v = target(e, g); - - RoseProgram program; - - if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { - makeRoleCheckBounds(build, v, e, program); - } - - if (multiple_preds) { - // Only necessary when there is more than one pred. - makeRoleCheckNotHandled(prog_build, v, program); - } - - const auto &reports = g[v].reports; - makeCatchup(build, needs_catchup, reports, program); - - const bool has_som = false; - RoseProgram report_block; - for (const auto &id : reports) { - makeReport(build, id, has_som, report_block); - } - program.add_before_end(move(report_block)); - - return program; -} - static bool hasEodAnchoredSuffix(const RoseBuildImpl &build) { const RoseGraph &g = build.g; @@ -5366,7 +3134,8 @@ void addEodAnchorProgram(const RoseBuildImpl &build, const build_context &bc, continue; } if (canEagerlyReportAtEod(build, e)) { - DEBUG_PRINTF("already done report for vertex %zu\n", g[u].index); + DEBUG_PRINTF("already done report for vertex %zu\n", + g[u].index); continue; } edge_list.push_back(e); @@ -5378,8 +3147,7 @@ void addEodAnchorProgram(const RoseBuildImpl &build, const build_context &bc, assert(contains(bc.roleStateIndices, u)); u32 pred_state = bc.roleStateIndices.at(u); pred_blocks[pred_state].add_block( - makeEodAnchorProgram(build, bc.needs_catchup, prog_build, e, - multiple_preds)); + makeEodAnchorProgram(build, prog_build, e, multiple_preds)); } } @@ -5414,58 +3182,31 @@ void addEodEventProgram(const RoseBuildImpl &build, build_context &bc, tie(g[source(b, g)].index, g[target(b, g)].index); }); - auto block = makeLiteralProgram(build, bc, prog_build, + auto block = makeLiteralProgram(build, bc.leftfix_info, bc.suffixes, + bc.engine_info_by_queue, bc.lookarounds, + bc.roleStateIndices, prog_build, build.eod_event_literal_id, edge_list, false); program.add_block(move(block)); } static -void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program) { - if (!eodNfaIterOffset) { - return; - } - - RoseProgram block; - block.add_before_end(make_unique(eodNfaIterOffset)); - program.add_block(move(block)); -} - -static -void addSuffixesEodProgram(const RoseBuildImpl &build, RoseProgram &program) { - if (!hasEodAnchoredSuffix(build)) { - return; - } - - RoseProgram block; - block.add_before_end(make_unique()); - program.add_block(move(block)); -} - -static -void addMatcherEodProgram(const RoseBuildImpl &build, RoseProgram &program) { - if (!hasEodMatcher(build)) { - return; - } - - RoseProgram block; - block.add_before_end(make_unique()); - program.add_block(move(block)); -} - -static -u32 writeEodProgram(const RoseBuildImpl &build, build_context &bc, - ProgramBuild &prog_build, u32 eodNfaIterOffset) { +RoseProgram makeEodProgram(const RoseBuildImpl &build, build_context &bc, + ProgramBuild &prog_build, u32 eodNfaIterOffset) { RoseProgram program; addEodEventProgram(build, bc, prog_build, program); addEnginesEodProgram(eodNfaIterOffset, program); addEodAnchorProgram(build, bc, prog_build, false, program); - addMatcherEodProgram(build, program); + if (hasEodMatcher(build)) { + addMatcherEodProgram(program); + } addEodAnchorProgram(build, bc, prog_build, true, program); - addSuffixesEodProgram(build, program); + if (hasEodAnchoredSuffix(build)) { + addSuffixesEodProgram(program); + } - return writeProgram(bc, move(program)); + return program; } static @@ -5737,8 +3478,6 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { build_context bc; u32 floatingMinLiteralMatchOffset = findMinFloatingLiteralMatch(*this, anchored_dfas); - bc.longLitLengthThreshold = longLitLengthThreshold; - bc.needs_catchup = needsCatchup(*this); recordResources(bc.resources, *this, fragments); if (!anchored_dfas.empty()) { bc.resources.has_anchored = true; @@ -5791,7 +3530,8 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { &laggedRoseCount, &historyRequired); // Information only needed for program construction. - ProgramBuild prog_build(floatingMinLiteralMatchOffset); + ProgramBuild prog_build(floatingMinLiteralMatchOffset, + longLitLengthThreshold, needsCatchup(*this)); prog_build.vertex_group_map = getVertexGroupMap(*this); prog_build.squashable_groups = getSquashableGroups(*this); @@ -5803,13 +3543,14 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { buildLiteralPrograms(*this, fragments, bc, prog_build); - proto.eodProgramOffset = - writeEodProgram(*this, bc, prog_build, eodNfaIterOffset); + auto eod_prog = makeEodProgram(*this, bc, prog_build, eodNfaIterOffset); + proto.eodProgramOffset = writeProgram(bc, move(eod_prog)); size_t longLitStreamStateRequired = 0; - proto.longLitTableOffset = buildLongLiteralTable(*this, bc.engine_blob, - bc.longLiterals, longLitLengthThreshold, &historyRequired, - &longLitStreamStateRequired); + proto.longLitTableOffset + = buildLongLiteralTable(*this, bc.engine_blob, bc.longLiterals, + longLitLengthThreshold, &historyRequired, + &longLitStreamStateRequired); proto.lastByteHistoryIterOffset = buildLastByteIter(g, bc); proto.eagerIterOffset = writeEagerQueueIter( @@ -5817,7 +3558,7 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { addSomRevNfas(bc, proto, ssm); - writeLookaroundTables(bc, proto); + writeLookaroundTables(bc.lookarounds, bc.engine_blob, proto); writeDkeyInfo(rm, bc.engine_blob, proto); writeLeftInfo(bc.engine_blob, proto, leftInfoTable); @@ -5829,8 +3570,8 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Build floating HWLM matcher. rose_group fgroups = 0; - auto ftable = buildFloatingMatcher(*this, fragments, - bc.longLitLengthThreshold, &fgroups, &historyRequired); + auto ftable = buildFloatingMatcher(*this, fragments, longLitLengthThreshold, + &fgroups, &historyRequired); if (ftable) { proto.fmatcherOffset = bc.engine_blob.add(ftable); bc.resources.has_floating = true; @@ -5838,7 +3579,7 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Build delay rebuild HWLM matcher. auto drtable = buildDelayRebuildMatcher(*this, fragments, - bc.longLitLengthThreshold); + longLitLengthThreshold); if (drtable) { proto.drmatcherOffset = bc.engine_blob.add(drtable); } diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 21db7a8e..b920e922 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -622,6 +622,11 @@ u64a findMaxOffset(const std::set &reports, const ReportManager &rm); void normaliseLiteralMask(const ue2_literal &s, std::vector &msk, std::vector &cmp); +u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id); +u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id); + +bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e); + #ifndef NDEBUG bool canImplementGraphs(const RoseBuildImpl &tbi); #endif diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 44044cb9..51a6ea85 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -909,6 +909,59 @@ u32 roseQuality(const RoseEngine *t) { return 1; } +u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id) { + const auto &lit_vertices = build.literal_info.at(lit_id).vertices; + assert(!lit_vertices.empty()); + + u32 min_offset = UINT32_MAX; + for (const auto &v : lit_vertices) { + min_offset = min(min_offset, build.g[v].min_offset); + } + + return min_offset; +} + +u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) { + const auto &lit_vertices = build.literal_info.at(lit_id).vertices; + assert(!lit_vertices.empty()); + + u32 max_offset = 0; + for (const auto &v : lit_vertices) { + max_offset = max(max_offset, build.g[v].max_offset); + } + + return max_offset; +} + +bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e) { + const auto &g = build.g; + const auto v = target(e, g); + + if (!build.g[v].eod_accept) { + return false; + } + + // If there's a graph between us and EOD, we shouldn't be eager. + if (build.g[v].left) { + return false; + } + + // Must be exactly at EOD. + if (g[e].minBound != 0 || g[e].maxBound != 0) { + return false; + } + + // In streaming mode, we can only eagerly report EOD for literals in the + // EOD-anchored table, as that's the only time we actually know where EOD + // is. In block mode, we always have this information. + const auto u = source(e, g); + if (build.cc.streaming && !build.isInETable(u)) { + return false; + } + + return true; +} + #ifndef NDEBUG /** \brief Returns true if all the graphs (NFA, DFA, Haig, etc) in this Rose * graph are implementable. */ diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index c319eed2..8d0306ae 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -29,14 +29,52 @@ #include "rose_build_program.h" #include "rose_build_instructions.h" +#include "rose_build_lookaround.h" +#include "rose_build_resources.h" +#include "nfa/nfa_api_queue.h" +#include "nfa/nfa_build_util.h" +#include "nfa/tamaramacompile.h" +#include "nfagraph/ng_util.h" +#include "util/charreach_util.h" +#include "util/container.h" +#include "util/compile_context.h" +#include "util/compile_error.h" +#include "util/dump_charclass.h" +#include "util/report_manager.h" +#include "util/verify_types.h" + +#include #include #include using namespace std; +using boost::adaptors::map_values; +using boost::adaptors::map_keys; namespace ue2 { +engine_info::engine_info(const NFA *nfa, bool trans) + : type((NFAEngineType)nfa->type), accepts_eod(nfaAcceptsEod(nfa)), + stream_size(nfa->streamStateSize), + scratch_size(nfa->scratchStateSize), + scratch_align(state_alignment(*nfa)), + transient(trans) { + assert(scratch_align); +} + +left_build_info::left_build_info(u32 q, u32 l, u32 t, rose_group sm, + const std::vector &stops, u32 max_ql, + u8 cm_count, const CharReach &cm_cr) + : queue(q), lag(l), transient(t), squash_mask(sm), stopAlphabet(stops), + max_queuelen(max_ql), countingMiracleCount(cm_count), + countingMiracleReach(cm_cr) { +} + +left_build_info::left_build_info(const vector> &looks) + : has_lookaround(true), lookaround(looks) { +} + using OffsetMap = RoseInstruction::OffsetMap; static @@ -216,6 +254,8 @@ bool RoseProgramEquivalence::operator()(const RoseProgram &prog1, return std::equal(prog1.begin(), prog1.end(), prog2.begin(), is_equiv); } +/* Removes any CHECK_HANDLED instructions from the given program */ +static void stripCheckHandledInstruction(RoseProgram &prog) { for (auto it = prog.begin(); it != prog.end();) { auto ins = dynamic_cast(it->get()); @@ -238,6 +278,9 @@ void stripCheckHandledInstruction(RoseProgram &prog) { } } + +/** Returns true if the program may read the the interpreter's work_done flag */ +static bool reads_work_done_flag(const RoseProgram &prog) { for (const auto &ri : prog) { if (dynamic_cast(ri.get())) { @@ -247,4 +290,2111 @@ bool reads_work_done_flag(const RoseProgram &prog) { return false; } +void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program) { + if (!eodNfaIterOffset) { + return; + } + + RoseProgram block; + block.add_before_end(make_unique(eodNfaIterOffset)); + program.add_block(move(block)); +} + +void addSuffixesEodProgram(RoseProgram &program) { + RoseProgram block; + block.add_before_end(make_unique()); + program.add_block(move(block)); +} + +void addMatcherEodProgram(RoseProgram &program) { + RoseProgram block; + block.add_before_end(make_unique()); + program.add_block(move(block)); +} + +static +void makeRoleCheckLeftfix(const RoseBuildImpl &build, + const map &leftfix_info, + RoseVertex v, RoseProgram &program) { + auto it = leftfix_info.find(v); + if (it == end(leftfix_info)) { + return; + } + const left_build_info &lni = it->second; + if (lni.has_lookaround) { + return; // Leftfix completely implemented by lookaround. + } + + assert(!build.cc.streaming || + build.g[v].left.lag <= MAX_STORED_LEFTFIX_LAG); + + bool is_prefix = build.isRootSuccessor(v); + const auto *end_inst = program.end_instruction(); + + unique_ptr ri; + if (is_prefix) { + ri = make_unique(lni.queue, build.g[v].left.lag, + build.g[v].left.leftfix_report, + end_inst); + } else { + ri = make_unique(lni.queue, build.g[v].left.lag, + build.g[v].left.leftfix_report, + end_inst); + } + program.add_before_end(move(ri)); +} + +static +void makeAnchoredLiteralDelay(const RoseBuildImpl &build, + const ProgramBuild &prog_build, u32 lit_id, + RoseProgram &program) { + // Only relevant for literals in the anchored table. + const rose_literal_id &lit = build.literals.right.at(lit_id); + if (lit.table != ROSE_ANCHORED) { + return; + } + + // If this literal match cannot occur after floatingMinLiteralMatchOffset, + // we do not need this check. + bool all_too_early = true; + rose_group groups = 0; + + const auto &lit_vertices = build.literal_info.at(lit_id).vertices; + for (RoseVertex v : lit_vertices) { + if (build.g[v].max_offset > prog_build.floatingMinLiteralMatchOffset) { + all_too_early = false; + } + groups |= build.g[v].groups; + } + + if (all_too_early) { + return; + } + + assert(contains(prog_build.anchored_programs, lit_id)); + u32 anch_id = prog_build.anchored_programs.at(lit_id); + + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(groups, anch_id, end_inst); + program.add_before_end(move(ri)); +} + +static +void makeDedupe(const ReportManager &rm, const Report &report, + RoseProgram &program) { + const auto *end_inst = program.end_instruction(); + auto ri = + make_unique(report.quashSom, rm.getDkey(report), + report.offsetAdjust, end_inst); + program.add_before_end(move(ri)); +} + +static +void makeDedupeSom(const ReportManager &rm, const Report &report, + RoseProgram &program) { + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(report.quashSom, + rm.getDkey(report), + report.offsetAdjust, end_inst); + program.add_before_end(move(ri)); +} + +static +void makeCatchup(const ReportManager &rm, bool needs_catchup, + const flat_set &reports, RoseProgram &program) { + if (!needs_catchup) { + return; + } + + // Everything except the INTERNAL_ROSE_CHAIN report needs catchup to run + // before reports are triggered. + + auto report_needs_catchup = [&](const ReportID &id) { + const Report &report = rm.getReport(id); + return report.type != INTERNAL_ROSE_CHAIN; + }; + + if (!any_of(begin(reports), end(reports), report_needs_catchup)) { + DEBUG_PRINTF("none of the given reports needs catchup\n"); + return; + } + + program.add_before_end(make_unique()); +} + +static +void writeSomOperation(const Report &report, som_operation *op) { + assert(op); + + memset(op, 0, sizeof(*op)); + + switch (report.type) { + case EXTERNAL_CALLBACK_SOM_REL: + op->type = SOM_EXTERNAL_CALLBACK_REL; + break; + case INTERNAL_SOM_LOC_SET: + op->type = SOM_INTERNAL_LOC_SET; + break; + case INTERNAL_SOM_LOC_SET_IF_UNSET: + op->type = SOM_INTERNAL_LOC_SET_IF_UNSET; + break; + case INTERNAL_SOM_LOC_SET_IF_WRITABLE: + op->type = SOM_INTERNAL_LOC_SET_IF_WRITABLE; + break; + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: + op->type = SOM_INTERNAL_LOC_SET_REV_NFA; + break; + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: + op->type = SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET; + break; + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: + op->type = SOM_INTERNAL_LOC_SET_REV_NFA_IF_WRITABLE; + break; + case INTERNAL_SOM_LOC_COPY: + op->type = SOM_INTERNAL_LOC_COPY; + break; + case INTERNAL_SOM_LOC_COPY_IF_WRITABLE: + op->type = SOM_INTERNAL_LOC_COPY_IF_WRITABLE; + break; + case INTERNAL_SOM_LOC_MAKE_WRITABLE: + op->type = SOM_INTERNAL_LOC_MAKE_WRITABLE; + break; + case EXTERNAL_CALLBACK_SOM_STORED: + op->type = SOM_EXTERNAL_CALLBACK_STORED; + break; + case EXTERNAL_CALLBACK_SOM_ABS: + op->type = SOM_EXTERNAL_CALLBACK_ABS; + break; + case EXTERNAL_CALLBACK_SOM_REV_NFA: + op->type = SOM_EXTERNAL_CALLBACK_REV_NFA; + break; + case INTERNAL_SOM_LOC_SET_FROM: + op->type = SOM_INTERNAL_LOC_SET_FROM; + break; + case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: + op->type = SOM_INTERNAL_LOC_SET_FROM_IF_WRITABLE; + break; + default: + // This report doesn't correspond to a SOM operation. + assert(0); + throw CompileError("Unable to generate bytecode."); + } + + op->onmatch = report.onmatch; + + switch (report.type) { + case EXTERNAL_CALLBACK_SOM_REV_NFA: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: + op->aux.revNfaIndex = report.revNfaIndex; + break; + default: + op->aux.somDistance = report.somDistance; + break; + } +} + +static +void makeReport(const RoseBuildImpl &build, const ReportID id, + const bool has_som, RoseProgram &program) { + assert(id < build.rm.numReports()); + const Report &report = build.rm.getReport(id); + + RoseProgram report_block; + const RoseInstruction *end_inst = report_block.end_instruction(); + + // Handle min/max offset checks. + if (report.minOffset > 0 || report.maxOffset < MAX_OFFSET) { + auto ri = make_unique(report.minOffset, + report.maxOffset, end_inst); + report_block.add_before_end(move(ri)); + } + + // If this report has an exhaustion key, we can check it in the program + // rather than waiting until we're in the callback adaptor. + if (report.ekey != INVALID_EKEY) { + auto ri = make_unique(report.ekey, end_inst); + report_block.add_before_end(move(ri)); + } + + // External SOM reports that aren't passthrough need their SOM value + // calculated. + if (isExternalSomReport(report) && + report.type != EXTERNAL_CALLBACK_SOM_PASS) { + auto ri = make_unique(); + writeSomOperation(report, &ri->som); + report_block.add_before_end(move(ri)); + } + + // Min length constraint. + if (report.minLength > 0) { + assert(build.hasSom); + auto ri = make_unique( + report.offsetAdjust, report.minLength, end_inst); + report_block.add_before_end(move(ri)); + } + + if (report.quashSom) { + report_block.add_before_end(make_unique()); + } + + switch (report.type) { + case EXTERNAL_CALLBACK: + if (!has_som) { + // Dedupe is only necessary if this report has a dkey, or if there + // are SOM reports to catch up. + bool needs_dedupe = build.rm.getDkey(report) != ~0U || build.hasSom; + if (report.ekey == INVALID_EKEY) { + if (needs_dedupe) { + report_block.add_before_end( + make_unique( + report.quashSom, build.rm.getDkey(report), + report.onmatch, report.offsetAdjust, end_inst)); + } else { + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust)); + } + } else { + if (needs_dedupe) { + makeDedupe(build.rm, report, report_block); + } + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); + } + } else { // has_som + makeDedupeSom(build.rm, report, report_block); + if (report.ekey == INVALID_EKEY) { + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust)); + } else { + report_block.add_before_end( + make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); + } + } + break; + case INTERNAL_SOM_LOC_SET: + case INTERNAL_SOM_LOC_SET_IF_UNSET: + case INTERNAL_SOM_LOC_SET_IF_WRITABLE: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: + case INTERNAL_SOM_LOC_COPY: + case INTERNAL_SOM_LOC_COPY_IF_WRITABLE: + case INTERNAL_SOM_LOC_MAKE_WRITABLE: + case INTERNAL_SOM_LOC_SET_FROM: + case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: + if (has_som) { + auto ri = make_unique(); + writeSomOperation(report, &ri->som); + report_block.add_before_end(move(ri)); + } else { + auto ri = make_unique(); + writeSomOperation(report, &ri->som); + report_block.add_before_end(move(ri)); + } + break; + case INTERNAL_ROSE_CHAIN: { + report_block.add_before_end(make_unique( + report.onmatch, report.topSquashDistance)); + break; + } + case EXTERNAL_CALLBACK_SOM_REL: + case EXTERNAL_CALLBACK_SOM_STORED: + case EXTERNAL_CALLBACK_SOM_ABS: + case EXTERNAL_CALLBACK_SOM_REV_NFA: + makeDedupeSom(build.rm, report, report_block); + if (report.ekey == INVALID_EKEY) { + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust)); + } else { + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); + } + break; + case EXTERNAL_CALLBACK_SOM_PASS: + makeDedupeSom(build.rm, report, report_block); + if (report.ekey == INVALID_EKEY) { + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust)); + } else { + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); + } + break; + + default: + assert(0); + throw CompileError("Unable to generate bytecode."); + } + + assert(!report_block.empty()); + program.add_block(move(report_block)); +} + +static +void makeRoleReports(const RoseBuildImpl &build, + const std::map &leftfix_info, + bool needs_catchup, RoseVertex v, RoseProgram &program) { + const auto &g = build.g; + + bool report_som = false; + if (g[v].left.tracksSom()) { + /* we are a suffaig - need to update role to provide som to the + * suffix. */ + assert(contains(leftfix_info, v)); + const left_build_info &lni = leftfix_info.at(v); + program.add_before_end( + make_unique(lni.queue, g[v].left.lag)); + report_som = true; + } else if (g[v].som_adjust) { + program.add_before_end( + make_unique(g[v].som_adjust)); + report_som = true; + } + + makeCatchup(build.rm, needs_catchup, g[v].reports, program); + + RoseProgram report_block; + for (ReportID id : g[v].reports) { + makeReport(build, id, report_som, report_block); + } + program.add_before_end(move(report_block)); +} + +static +void makeRoleSetState(const unordered_map &roleStateIndices, + RoseVertex v, RoseProgram &program) { + // We only need this instruction if a state index has been assigned to this + // vertex. + auto it = roleStateIndices.find(v); + if (it == end(roleStateIndices)) { + return; + } + program.add_before_end(make_unique(it->second)); +} + +static +void makePushDelayedInstructions(const RoseLiteralMap &literals, + ProgramBuild &prog_build, + const flat_set &delayed_ids, + RoseProgram &program) { + vector delay_instructions; + + for (const auto &delayed_lit_id : delayed_ids) { + DEBUG_PRINTF("delayed lit id %u\n", delayed_lit_id); + assert(contains(prog_build.delay_programs, delayed_lit_id)); + u32 delay_id = prog_build.delay_programs.at(delayed_lit_id); + const auto &delay_lit = literals.right.at(delayed_lit_id); + delay_instructions.emplace_back(verify_u8(delay_lit.delay), delay_id); + } + + sort_and_unique(delay_instructions, [](const RoseInstrPushDelayed &a, + const RoseInstrPushDelayed &b) { + return tie(a.delay, a.index) < tie(b.delay, b.index); + }); + + for (const auto &ri : delay_instructions) { + program.add_before_end(make_unique(ri)); + } +} + +static +void makeCheckLiteralInstruction(const rose_literal_id &lit, + size_t longLitLengthThreshold, + RoseProgram &program, + const CompileContext &cc) { + assert(longLitLengthThreshold > 0); + + DEBUG_PRINTF("lit=%s, long lit threshold %zu\n", dumpString(lit.s).c_str(), + longLitLengthThreshold); + + if (lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX) { + DEBUG_PRINTF("lit short enough to not need confirm\n"); + return; + } + + // Check resource limits as well. + if (lit.s.length() > cc.grey.limitLiteralLength) { + throw ResourceLimitError(); + } + + if (lit.s.length() <= longLitLengthThreshold) { + DEBUG_PRINTF("is a medium-length literal\n"); + const auto *end_inst = program.end_instruction(); + unique_ptr ri; + if (lit.s.any_nocase()) { + ri = make_unique(lit.s.get_string(), + end_inst); + } else { + ri = make_unique(lit.s.get_string(), + end_inst); + } + program.add_before_end(move(ri)); + return; + } + + // Long literal support should only really be used for the floating table + // in streaming mode. + assert(lit.table == ROSE_FLOATING && cc.streaming); + + DEBUG_PRINTF("is a long literal\n"); + + const auto *end_inst = program.end_instruction(); + unique_ptr ri; + if (lit.s.any_nocase()) { + ri = make_unique(lit.s.get_string(), + end_inst); + } else { + ri = make_unique(lit.s.get_string(), end_inst); + } + program.add_before_end(move(ri)); +} + +static +void makeRoleCheckNotHandled(ProgramBuild &prog_build, RoseVertex v, + RoseProgram &program) { + u32 handled_key; + if (contains(prog_build.handledKeys, v)) { + handled_key = prog_build.handledKeys.at(v); + } else { + handled_key = verify_u32(prog_build.handledKeys.size()); + prog_build.handledKeys.emplace(v, handled_key); + } + + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(handled_key, end_inst); + program.add_before_end(move(ri)); +} + +static +void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, + const RoseEdge &e, RoseProgram &program) { + const RoseGraph &g = build.g; + const RoseVertex u = source(e, g); + + // We know that we can trust the anchored table (DFA) to always deliver us + // literals at the correct offset. + if (build.isAnchored(v)) { + DEBUG_PRINTF("literal in anchored table, skipping bounds check\n"); + return; + } + + // Use the minimum literal length. + u32 lit_length = g[v].eod_accept ? 0 : verify_u32(build.minLiteralLen(v)); + + u64a min_bound = g[e].minBound + lit_length; + u64a max_bound = g[e].maxBound == ROSE_BOUND_INF + ? ROSE_BOUND_INF + : g[e].maxBound + lit_length; + + if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { + assert(g[u].fixedOffset()); + // Make offsets absolute. + min_bound += g[u].max_offset; + if (max_bound != ROSE_BOUND_INF) { + max_bound += g[u].max_offset; + } + } + + assert(max_bound <= ROSE_BOUND_INF); + assert(min_bound <= max_bound); + + // CHECK_BOUNDS instruction uses 64-bit bounds, so we can use MAX_OFFSET + // (max value of a u64a) to represent ROSE_BOUND_INF. + if (max_bound == ROSE_BOUND_INF) { + max_bound = MAX_OFFSET; + } + + // This instruction should be doing _something_ -- bounds should be tighter + // than just {length, inf}. + assert(min_bound > lit_length || max_bound < MAX_OFFSET); + + const auto *end_inst = program.end_instruction(); + program.add_before_end( + make_unique(min_bound, max_bound, end_inst)); +} + +static +void makeRoleGroups(const RoseGraph &g, ProgramBuild &prog_build, + RoseVertex v, RoseProgram &program) { + rose_group groups = g[v].groups; + if (!groups) { + return; + } + + // The set of "already on" groups as we process this vertex is the + // intersection of the groups set by our predecessors. + assert(in_degree(v, g) > 0); + rose_group already_on = ~rose_group{0}; + for (const auto &u : inv_adjacent_vertices_range(v, g)) { + already_on &= prog_build.vertex_group_map.at(u); + } + + DEBUG_PRINTF("already_on=0x%llx\n", already_on); + DEBUG_PRINTF("squashable=0x%llx\n", prog_build.squashable_groups); + DEBUG_PRINTF("groups=0x%llx\n", groups); + + already_on &= ~prog_build.squashable_groups; + DEBUG_PRINTF("squashed already_on=0x%llx\n", already_on); + + // We don't *have* to mask off the groups that we know are already on, but + // this will make bugs more apparent. + groups &= ~already_on; + + if (!groups) { + DEBUG_PRINTF("no new groups to set, skipping\n"); + return; + } + + program.add_before_end(make_unique(groups)); +} + +static +void addLookaround(lookaround_info &lookarounds, + const vector> &look, + u32 &look_index, u32 &reach_index) { + // Check the cache. + auto it = lookarounds.cache.find(look); + if (it != lookarounds.cache.end()) { + look_index = verify_u32(it->second.first); + reach_index = verify_u32(it->second.second); + DEBUG_PRINTF("reusing look at idx %u\n", look_index); + DEBUG_PRINTF("reusing reach at idx %u\n", reach_index); + return; + } + + size_t look_idx = lookarounds.lookTableSize; + size_t reach_idx = lookarounds.reachTableSize; + + if (look.size() == 1) { + lookarounds.lookTableSize += look.front().size(); + lookarounds.reachTableSize += look.front().size() * REACH_BITVECTOR_LEN; + } else { + lookarounds.lookTableSize += look.size(); + lookarounds.reachTableSize += look.size() * MULTI_REACH_BITVECTOR_LEN; + } + + lookarounds.cache.emplace(look, make_pair(look_idx, reach_idx)); + lookarounds.table.emplace_back(look); + + DEBUG_PRINTF("adding look at idx %zu\n", look_idx); + DEBUG_PRINTF("adding reach at idx %zu\n", reach_idx); + look_index = verify_u32(look_idx); + reach_index = verify_u32(reach_idx); +} + +static +bool checkReachMask(const CharReach &cr, u8 &andmask, u8 &cmpmask) { + size_t reach_size = cr.count(); + assert(reach_size > 0); + // check whether entry_size is some power of 2. + if ((reach_size - 1) & reach_size) { + return false; + } + make_and_cmp_mask(cr, &andmask, &cmpmask); + if ((1 << popcount32((u8)(~andmask))) ^ reach_size) { + return false; + } + return true; +} + +static +bool checkReachWithFlip(const CharReach &cr, u8 &andmask, + u8 &cmpmask, u8 &flip) { + if (checkReachMask(cr, andmask, cmpmask)) { + flip = 0; + return true; + } + if (checkReachMask(~cr, andmask, cmpmask)) { + flip = 1; + return true; + } + return false; +} + +static +bool makeRoleByte(const vector &look, RoseProgram &program) { + if (look.size() == 1) { + const auto &entry = look[0]; + u8 andmask_u8, cmpmask_u8; + u8 flip; + if (!checkReachWithFlip(entry.reach, andmask_u8, cmpmask_u8, flip)) { + return false; + } + s32 checkbyte_offset = verify_s32(entry.offset); + DEBUG_PRINTF("CHECK BYTE offset=%d\n", checkbyte_offset); + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(andmask_u8, cmpmask_u8, flip, + checkbyte_offset, end_inst); + program.add_before_end(move(ri)); + return true; + } + return false; +} + +static +bool makeRoleMask(const vector &look, RoseProgram &program) { + if (look.back().offset < look.front().offset + 8) { + s32 base_offset = verify_s32(look.front().offset); + u64a and_mask = 0; + u64a cmp_mask = 0; + u64a neg_mask = 0; + for (const auto &entry : look) { + u8 andmask_u8, cmpmask_u8, flip; + if (!checkReachWithFlip(entry.reach, andmask_u8, + cmpmask_u8, flip)) { + return false; + } + DEBUG_PRINTF("entry offset %d\n", entry.offset); + u32 shift = (entry.offset - base_offset) << 3; + and_mask |= (u64a)andmask_u8 << shift; + cmp_mask |= (u64a)cmpmask_u8 << shift; + if (flip) { + neg_mask |= 0xffLLU << shift; + } + } + DEBUG_PRINTF("CHECK MASK and_mask=%llx cmp_mask=%llx\n", + and_mask, cmp_mask); + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(and_mask, cmp_mask, neg_mask, + base_offset, end_inst); + program.add_before_end(move(ri)); + return true; + } + return false; +} + +static UNUSED +string convertMaskstoString(u8 *p, int byte_len) { + string s; + for (int i = 0; i < byte_len; i++) { + u8 hi = *p >> 4; + u8 lo = *p & 0xf; + s += (char)(hi + (hi < 10 ? 48 : 87)); + s += (char)(lo + (lo < 10 ? 48 : 87)); + p++; + } + return s; +} + +static +bool makeRoleMask32(const vector &look, + RoseProgram &program) { + if (look.back().offset >= look.front().offset + 32) { + return false; + } + s32 base_offset = verify_s32(look.front().offset); + array and_mask, cmp_mask; + and_mask.fill(0); + cmp_mask.fill(0); + u32 neg_mask = 0; + for (const auto &entry : look) { + u8 andmask_u8, cmpmask_u8, flip; + if (!checkReachWithFlip(entry.reach, andmask_u8, + cmpmask_u8, flip)) { + return false; + } + u32 shift = entry.offset - base_offset; + assert(shift < 32); + and_mask[shift] = andmask_u8; + cmp_mask[shift] = cmpmask_u8; + if (flip) { + neg_mask |= 1 << shift; + } + } + + DEBUG_PRINTF("and_mask %s\n", + convertMaskstoString(and_mask.data(), 32).c_str()); + DEBUG_PRINTF("cmp_mask %s\n", + convertMaskstoString(cmp_mask.data(), 32).c_str()); + DEBUG_PRINTF("neg_mask %08x\n", neg_mask); + DEBUG_PRINTF("base_offset %d\n", base_offset); + + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(and_mask, cmp_mask, neg_mask, + base_offset, end_inst); + program.add_before_end(move(ri)); + return true; +} + +// Sorting by the size of every bucket. +// Used in map, cmpNibble>. +struct cmpNibble { + bool operator()(const u32 data1, const u32 data2) const{ + u32 size1 = popcount32(data1 >> 16) * popcount32(data1 << 16); + u32 size2 = popcount32(data2 >> 16) * popcount32(data2 << 16); + return std::tie(size1, data1) < std::tie(size2, data2); + } +}; + +// Insert all pairs of bucket and offset into buckets. +static really_inline +void getAllBuckets(const vector &look, + map, cmpNibble> &buckets, u64a &neg_mask) { + s32 base_offset = verify_s32(look.front().offset); + for (const auto &entry : look) { + CharReach cr = entry.reach; + // Flip heavy character classes to save buckets. + if (cr.count() > 128 ) { + cr.flip(); + } else { + neg_mask ^= 1ULL << (entry.offset - base_offset); + } + map lo2hi; + // We treat Ascii Table as a 16x16 grid. + // Push every row in cr into lo2hi and mark the row number. + for (size_t i = cr.find_first(); i != CharReach::npos;) { + u8 it_hi = i >> 4; + u16 low_encode = 0; + while (i != CharReach::npos && (i >> 4) == it_hi) { + low_encode |= 1 << (i & 0xf); + i = cr.find_next(i); + } + lo2hi[low_encode] |= 1 << it_hi; + } + for (const auto &it : lo2hi) { + u32 hi_lo = (it.second << 16) | it.first; + buckets[hi_lo].push_back(entry.offset); + } + } +} + +// Once we have a new bucket, we'll try to combine it with all old buckets. +static really_inline +void nibUpdate(map &nib, u32 hi_lo) { + u16 hi = hi_lo >> 16; + u16 lo = hi_lo & 0xffff; + for (const auto pairs : nib) { + u32 old = pairs.first; + if ((old >> 16) == hi || (old & 0xffff) == lo) { + if (!nib[old | hi_lo]) { + nib[old | hi_lo] = nib[old] | nib[hi_lo]; + } + } + } +} + +static really_inline +void nibMaskUpdate(array &mask, u32 data, u8 bit_index) { + for (u8 index = 0; data > 0; data >>= 1, index++) { + if (data & 1) { + // 0 ~ 7 bucket in first 16 bytes, + // 8 ~ 15 bucket in second 16 bytes. + if (bit_index >= 8) { + mask[index + 16] |= 1 << (bit_index - 8); + } else { + mask[index] |= 1 << bit_index; + } + } + } +} + +static +bool getShuftiMasks(const vector &look, array &hi_mask, + array &lo_mask, u8 *bucket_select_hi, + u8 *bucket_select_lo, u64a &neg_mask, + u8 &bit_idx, size_t len) { + map nib; // map every bucket to its bucket number. + map, cmpNibble> bucket2offsets; + s32 base_offset = look.front().offset; + + bit_idx = 0; + neg_mask = ~0ULL; + + getAllBuckets(look, bucket2offsets, neg_mask); + + for (const auto &it : bucket2offsets) { + u32 hi_lo = it.first; + // New bucket. + if (!nib[hi_lo]) { + if ((bit_idx >= 8 && len == 64) || bit_idx >= 16) { + return false; + } + nib[hi_lo] = 1 << bit_idx; + + nibUpdate(nib, hi_lo); + nibMaskUpdate(hi_mask, hi_lo >> 16, bit_idx); + nibMaskUpdate(lo_mask, hi_lo & 0xffff, bit_idx); + bit_idx++; + } + + DEBUG_PRINTF("hi_lo %x bucket %x\n", hi_lo, nib[hi_lo]); + + // Update bucket_select_mask. + u8 nib_hi = nib[hi_lo] >> 8; + u8 nib_lo = nib[hi_lo] & 0xff; + for (const auto offset : it.second) { + bucket_select_hi[offset - base_offset] |= nib_hi; + bucket_select_lo[offset - base_offset] |= nib_lo; + } + } + return true; +} + +static +unique_ptr +makeCheckShufti16x8(u32 offset_range, u8 bucket_idx, + const array &hi_mask, const array &lo_mask, + const array &bucket_select_mask, + u32 neg_mask, s32 base_offset, + const RoseInstruction *end_inst) { + if (offset_range > 16 || bucket_idx > 8) { + return nullptr; + } + array nib_mask; + array bucket_select_mask_16; + copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin()); + copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin() + 16); + copy(bucket_select_mask.begin(), bucket_select_mask.begin() + 16, + bucket_select_mask_16.begin()); + return make_unique + (nib_mask, bucket_select_mask_16, + neg_mask & 0xffff, base_offset, end_inst); +} + +static +unique_ptr +makeCheckShufti32x8(u32 offset_range, u8 bucket_idx, + const array &hi_mask, const array &lo_mask, + const array &bucket_select_mask, + u32 neg_mask, s32 base_offset, + const RoseInstruction *end_inst) { + if (offset_range > 32 || bucket_idx > 8) { + return nullptr; + } + + array hi_mask_16; + array lo_mask_16; + copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_16.begin()); + copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_16.begin()); + return make_unique + (hi_mask_16, lo_mask_16, bucket_select_mask, + neg_mask, base_offset, end_inst); +} + +static +unique_ptr +makeCheckShufti16x16(u32 offset_range, u8 bucket_idx, + const array &hi_mask, const array &lo_mask, + const array &bucket_select_mask_lo, + const array &bucket_select_mask_hi, + u32 neg_mask, s32 base_offset, + const RoseInstruction *end_inst) { + if (offset_range > 16 || bucket_idx > 16) { + return nullptr; + } + + array bucket_select_mask_32; + copy(bucket_select_mask_lo.begin(), bucket_select_mask_lo.begin() + 16, + bucket_select_mask_32.begin()); + copy(bucket_select_mask_hi.begin(), bucket_select_mask_hi.begin() + 16, + bucket_select_mask_32.begin() + 16); + return make_unique + (hi_mask, lo_mask, bucket_select_mask_32, + neg_mask & 0xffff, base_offset, end_inst); +} +static +unique_ptr +makeCheckShufti32x16(u32 offset_range, u8 bucket_idx, + const array &hi_mask, const array &lo_mask, + const array &bucket_select_mask_lo, + const array &bucket_select_mask_hi, + u32 neg_mask, s32 base_offset, + const RoseInstruction *end_inst) { + if (offset_range > 32 || bucket_idx > 16) { + return nullptr; + } + + return make_unique + (hi_mask, lo_mask, bucket_select_mask_hi, + bucket_select_mask_lo, neg_mask, base_offset, end_inst); +} + +static +bool makeRoleShufti(const vector &look, RoseProgram &program) { + + s32 base_offset = verify_s32(look.front().offset); + if (look.back().offset >= base_offset + 32) { + return false; + } + + u8 bucket_idx = 0; // number of buckets + u64a neg_mask_64; + array hi_mask; + array lo_mask; + array bucket_select_hi; + array bucket_select_lo; + hi_mask.fill(0); + lo_mask.fill(0); + bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8. + bucket_select_lo.fill(0); + + if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi.data(), + bucket_select_lo.data(), neg_mask_64, bucket_idx, 32)) { + return false; + } + u32 neg_mask = (u32)neg_mask_64; + + DEBUG_PRINTF("hi_mask %s\n", + convertMaskstoString(hi_mask.data(), 32).c_str()); + DEBUG_PRINTF("lo_mask %s\n", + convertMaskstoString(lo_mask.data(), 32).c_str()); + DEBUG_PRINTF("bucket_select_hi %s\n", + convertMaskstoString(bucket_select_hi.data(), 32).c_str()); + DEBUG_PRINTF("bucket_select_lo %s\n", + convertMaskstoString(bucket_select_lo.data(), 32).c_str()); + + const auto *end_inst = program.end_instruction(); + s32 offset_range = look.back().offset - base_offset + 1; + + auto ri = makeCheckShufti16x8(offset_range, bucket_idx, hi_mask, lo_mask, + bucket_select_lo, neg_mask, base_offset, + end_inst); + if (!ri) { + ri = makeCheckShufti32x8(offset_range, bucket_idx, hi_mask, lo_mask, + bucket_select_lo, neg_mask, base_offset, + end_inst); + } + if (!ri) { + ri = makeCheckShufti16x16(offset_range, bucket_idx, hi_mask, lo_mask, + bucket_select_lo, bucket_select_hi, + neg_mask, base_offset, end_inst); + } + if (!ri) { + ri = makeCheckShufti32x16(offset_range, bucket_idx, hi_mask, lo_mask, + bucket_select_lo, bucket_select_hi, + neg_mask, base_offset, end_inst); + } + assert(ri); + program.add_before_end(move(ri)); + + return true; +} + +/** + * Builds a lookaround instruction, or an appropriate specialization if one is + * available. + */ +static +void makeLookaroundInstruction(lookaround_info &lookarounds, + const vector &look, + RoseProgram &program) { + assert(!look.empty()); + + if (makeRoleByte(look, program)) { + return; + } + + if (look.size() == 1) { + s8 offset = look.begin()->offset; + u32 look_idx, reach_idx; + vector> lookaround; + lookaround.emplace_back(look); + addLookaround(lookarounds, lookaround, look_idx, reach_idx); + // We don't need look_idx here. + auto ri = make_unique(offset, reach_idx, + program.end_instruction()); + program.add_before_end(move(ri)); + return; + } + + if (makeRoleMask(look, program)) { + return; + } + + if (makeRoleMask32(look, program)) { + return; + } + + if (makeRoleShufti(look, program)) { + return; + } + + u32 look_idx, reach_idx; + vector> lookaround; + lookaround.emplace_back(look); + addLookaround(lookarounds, lookaround, look_idx, reach_idx); + u32 look_count = verify_u32(look.size()); + + auto ri = make_unique(look_idx, reach_idx, + look_count, + program.end_instruction()); + program.add_before_end(move(ri)); +} + +static +void makeCheckLitMaskInstruction(const RoseBuildImpl &build, + lookaround_info &lookarounds, u32 lit_id, + RoseProgram &program) { + const auto &info = build.literal_info.at(lit_id); + if (!info.requires_benefits) { + return; + } + + vector look; + + const ue2_literal &s = build.literals.right.at(lit_id).s; + DEBUG_PRINTF("building mask for lit %u: %s\n", lit_id, + dumpString(s).c_str()); + assert(s.length() <= MAX_MASK2_WIDTH); + s32 i = 0 - s.length(); + for (const auto &e : s) { + if (!e.nocase) { + look.emplace_back(verify_s8(i), e); + } + i++; + } + + assert(!look.empty()); + makeLookaroundInstruction(lookarounds, look, program); +} + +static +void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, u32 lit_id, + const vector &lit_edges, + u32 floatingMinLiteralMatchOffset, + RoseProgram &prog) { + if (lit_edges.empty()) { + return; + } + + if (floatingMinLiteralMatchOffset == 0) { + return; + } + + RoseVertex v = target(lit_edges.front(), build.g); + if (!build.isFloating(v)) { + return; + } + + const auto &lit = build.literals.right.at(lit_id); + size_t min_len = lit.elength(); + u32 min_offset = findMinOffset(build, lit_id); + DEBUG_PRINTF("has min_len=%zu, min_offset=%u, global min is %u\n", min_len, + min_offset, floatingMinLiteralMatchOffset); + + // If we can't match before the min offset, we don't need the check. + if (min_len >= floatingMinLiteralMatchOffset) { + DEBUG_PRINTF("no need for check, min is %u\n", + floatingMinLiteralMatchOffset); + return; + } + + assert(min_offset >= floatingMinLiteralMatchOffset); + assert(min_offset < UINT32_MAX); + + DEBUG_PRINTF("adding lit early check, min_offset=%u\n", min_offset); + const auto *end = prog.end_instruction(); + prog.add_before_end(make_unique(min_offset, end)); +} + +static +void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 lit_id, + RoseProgram &prog) { + const auto &info = build.literal_info.at(lit_id); + + if (!info.group_mask) { + return; + } + prog.add_before_end(make_unique(info.group_mask)); +} + +static +bool hasDelayedLiteral(const RoseBuildImpl &build, + const vector &lit_edges) { + auto is_delayed = bind(&RoseBuildImpl::isDelayed, &build, _1); + for (const auto &e : lit_edges) { + auto v = target(e, build.g); + const auto &lits = build.g[v].literals; + if (any_of(begin(lits), end(lits), is_delayed)) { + return true; + } + } + return false; +} + +static +RoseProgram makeLitInitialProgram(const RoseBuildImpl &build, + lookaround_info &lookarounds, + ProgramBuild &prog_build, u32 lit_id, + const vector &lit_edges, + bool is_anchored_replay_program) { + RoseProgram program; + + // Check long literal info. + if (!build.isDelayed(lit_id)) { + makeCheckLiteralInstruction(build.literals.right.at(lit_id), + prog_build.longLitLengthThreshold, + program, build.cc); + } + + // Check lit mask. + makeCheckLitMaskInstruction(build, lookarounds, lit_id, program); + + // Check literal groups. This is an optimisation that we only perform for + // delayed literals, as their groups may be switched off; ordinarily, we + // can trust the HWLM matcher. + if (hasDelayedLiteral(build, lit_edges)) { + makeGroupCheckInstruction(build, lit_id, program); + } + + // Add instructions for pushing delayed matches, if there are any. + makePushDelayedInstructions(build.literals, prog_build, + build.literal_info.at(lit_id).delayed_ids, + program); + + // Add pre-check for early literals in the floating table. + makeCheckLitEarlyInstruction(build, lit_id, lit_edges, + prog_build.floatingMinLiteralMatchOffset, + program); + + /* Check if we are able to deliever matches from the anchored table now */ + if (!is_anchored_replay_program) { + makeAnchoredLiteralDelay(build, prog_build, lit_id, program); + } + + return program; +} + +#if defined(DEBUG) || defined(DUMP_SUPPORT) +static UNUSED +string dumpMultiLook(const vector &looks) { + ostringstream oss; + for (auto it = looks.begin(); it != looks.end(); ++it) { + if (it != looks.begin()) { + oss << ", "; + } + oss << "{" << int(it->offset) << ": " << describeClass(it->reach) << "}"; + } + return oss.str(); +} +#endif + +static +bool makeRoleMultipathShufti(const vector> &multi_look, + RoseProgram &program) { + if (multi_look.empty()) { + return false; + } + + // find the base offset + assert(!multi_look[0].empty()); + s32 base_offset = multi_look[0].front().offset; + s32 last_start = base_offset; + s32 end_offset = multi_look[0].back().offset; + size_t multi_len = 0; + + for (const auto &look : multi_look) { + assert(look.size() > 0); + multi_len += look.size(); + + LIMIT_TO_AT_MOST(&base_offset, look.front().offset); + ENSURE_AT_LEAST(&last_start, look.front().offset); + ENSURE_AT_LEAST(&end_offset, look.back().offset); + } + + assert(last_start < 0); + + if (end_offset - base_offset >= MULTIPATH_MAX_LEN) { + return false; + } + + if (multi_len <= 16) { + multi_len = 16; + } else if (multi_len <= 32) { + multi_len = 32; + } else if (multi_len <= 64) { + multi_len = 64; + } else { + DEBUG_PRINTF("too long for multi-path\n"); + return false; + } + + vector linear_look; + array data_select_mask; + data_select_mask.fill(0); + u64a hi_bits_mask = 0; + u64a lo_bits_mask = 0; + + for (const auto &look : multi_look) { + assert(linear_look.size() < 64); + lo_bits_mask |= 1LLU << linear_look.size(); + for (const auto &entry : look) { + assert(entry.offset - base_offset < MULTIPATH_MAX_LEN); + data_select_mask[linear_look.size()] = + verify_u8(entry.offset - base_offset); + linear_look.emplace_back(verify_s8(linear_look.size()), entry.reach); + } + hi_bits_mask |= 1LLU << (linear_look.size() - 1); + } + + u8 bit_index = 0; // number of buckets + u64a neg_mask; + array hi_mask; + array lo_mask; + array bucket_select_hi; + array bucket_select_lo; + hi_mask.fill(0); + lo_mask.fill(0); + bucket_select_hi.fill(0); + bucket_select_lo.fill(0); + + if (!getShuftiMasks(linear_look, hi_mask, lo_mask, bucket_select_hi.data(), + bucket_select_lo.data(), neg_mask, bit_index, + multi_len)) { + return false; + } + + DEBUG_PRINTF("hi_mask %s\n", + convertMaskstoString(hi_mask.data(), 16).c_str()); + DEBUG_PRINTF("lo_mask %s\n", + convertMaskstoString(lo_mask.data(), 16).c_str()); + DEBUG_PRINTF("bucket_select_hi %s\n", + convertMaskstoString(bucket_select_hi.data(), 64).c_str()); + DEBUG_PRINTF("bucket_select_lo %s\n", + convertMaskstoString(bucket_select_lo.data(), 64).c_str()); + DEBUG_PRINTF("data_select_mask %s\n", + convertMaskstoString(data_select_mask.data(), 64).c_str()); + DEBUG_PRINTF("hi_bits_mask %llx\n", hi_bits_mask); + DEBUG_PRINTF("lo_bits_mask %llx\n", lo_bits_mask); + DEBUG_PRINTF("neg_mask %llx\n", neg_mask); + DEBUG_PRINTF("base_offset %d\n", base_offset); + DEBUG_PRINTF("last_start %d\n", last_start); + + // Since we don't have 16x16 now, just call 32x16 instead. + if (bit_index > 8) { + assert(multi_len <= 32); + multi_len = 32; + } + + const auto *end_inst = program.end_instruction(); + assert(multi_len == 16 || multi_len == 32 || multi_len == 64); + if (multi_len == 16) { + neg_mask &= 0xffff; + assert(!(hi_bits_mask & ~0xffffULL)); + assert(!(lo_bits_mask & ~0xffffULL)); + assert(bit_index <=8); + array nib_mask; + copy(begin(lo_mask), begin(lo_mask) + 16, nib_mask.begin()); + copy(begin(hi_mask), begin(hi_mask) + 16, nib_mask.begin() + 16); + + auto ri = make_unique + (nib_mask, bucket_select_lo, data_select_mask, hi_bits_mask, + lo_bits_mask, neg_mask, base_offset, last_start, end_inst); + program.add_before_end(move(ri)); + } else if (multi_len == 32) { + neg_mask &= 0xffffffff; + assert(!(hi_bits_mask & ~0xffffffffULL)); + assert(!(lo_bits_mask & ~0xffffffffULL)); + if (bit_index <= 8) { + auto ri = make_unique + (hi_mask, lo_mask, bucket_select_lo, data_select_mask, + hi_bits_mask, lo_bits_mask, neg_mask, base_offset, + last_start, end_inst); + program.add_before_end(move(ri)); + } else { + auto ri = make_unique + (hi_mask, lo_mask, bucket_select_hi, bucket_select_lo, + data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask, + base_offset, last_start, end_inst); + program.add_before_end(move(ri)); + } + } else { + auto ri = make_unique + (hi_mask, lo_mask, bucket_select_lo, data_select_mask, + hi_bits_mask, lo_bits_mask, neg_mask, base_offset, + last_start, end_inst); + program.add_before_end(move(ri)); + } + return true; +} + +static +void makeRoleMultipathLookaround(lookaround_info &lookarounds, + const vector> &multi_look, + RoseProgram &program) { + assert(!multi_look.empty()); + assert(multi_look.size() <= MAX_LOOKAROUND_PATHS); + vector> ordered_look; + set look_offset; + + assert(!multi_look[0].empty()); + s32 last_start = multi_look[0][0].offset; + + // build offset table. + for (const auto &look : multi_look) { + assert(look.size() > 0); + last_start = max(last_start, (s32)look.begin()->offset); + + for (const auto &t : look) { + look_offset.insert(t.offset); + } + } + + array start_mask; + if (multi_look.size() < MAX_LOOKAROUND_PATHS) { + start_mask.fill((1 << multi_look.size()) - 1); + } else { + start_mask.fill(0xff); + } + + u32 path_idx = 0; + for (const auto &look : multi_look) { + for (const auto &t : look) { + assert(t.offset >= (int)*look_offset.begin()); + size_t update_offset = t.offset - *look_offset.begin() + 1; + if (update_offset < start_mask.size()) { + start_mask[update_offset] &= ~(1 << path_idx); + } + } + path_idx++; + } + + for (u32 i = 1; i < MULTIPATH_MAX_LEN; i++) { + start_mask[i] &= start_mask[i - 1]; + DEBUG_PRINTF("start_mask[%u] = %x\n", i, start_mask[i]); + } + + assert(look_offset.size() <= MULTIPATH_MAX_LEN); + + assert(last_start < 0); + + for (const auto &offset : look_offset) { + vector multi_entry; + multi_entry.resize(MAX_LOOKAROUND_PATHS); + + for (size_t i = 0; i < multi_look.size(); i++) { + for (const auto &t : multi_look[i]) { + if (t.offset == offset) { + multi_entry[i] = t; + } + } + } + ordered_look.emplace_back(multi_entry); + } + + u32 look_idx, reach_idx; + addLookaround(lookarounds, ordered_look, look_idx, reach_idx); + u32 look_count = verify_u32(ordered_look.size()); + + auto ri = make_unique(look_idx, reach_idx, + look_count, last_start, + start_mask, + program.end_instruction()); + program.add_before_end(move(ri)); +} + +static +void makeRoleLookaround(const RoseBuildImpl &build, + const map &leftfix_info, + lookaround_info &lookarounds, RoseVertex v, + RoseProgram &program) { + if (!build.cc.grey.roseLookaroundMasks) { + return; + } + + vector> looks; + + // Lookaround from leftfix (mandatory). + if (contains(leftfix_info, v) && leftfix_info.at(v).has_lookaround) { + DEBUG_PRINTF("using leftfix lookaround\n"); + looks = leftfix_info.at(v).lookaround; + } + + // We may be able to find more lookaround info (advisory) and merge it + // in. + if (looks.size() <= 1) { + vector look; + vector look_more; + if (!looks.empty()) { + look = move(looks.front()); + } + findLookaroundMasks(build, v, look_more); + mergeLookaround(look, look_more); + if (!look.empty()) { + makeLookaroundInstruction(lookarounds, look, program); + } + return; + } + + if (!makeRoleMultipathShufti(looks, program)) { + assert(looks.size() <= 8); + makeRoleMultipathLookaround(lookarounds, looks, program); + } +} + +static +void makeRoleSuffix(const RoseBuildImpl &build, + const map &suffixes, + const map &engine_info_by_queue, + RoseVertex v, RoseProgram &prog) { + const auto &g = build.g; + if (!g[v].suffix) { + return; + } + assert(contains(suffixes, g[v].suffix)); + u32 queue = suffixes.at(g[v].suffix); + u32 event; + assert(contains(engine_info_by_queue, queue)); + const auto eng_info = engine_info_by_queue.at(queue); + if (isContainerType(eng_info.type)) { + auto tamaProto = g[v].suffix.tamarama.get(); + assert(tamaProto); + event = (u32)MQE_TOP_FIRST + + tamaProto->top_remap.at(make_pair(g[v].index, + g[v].suffix.top)); + assert(event < MQE_INVALID); + } else if (isMultiTopType(eng_info.type)) { + assert(!g[v].suffix.haig); + event = (u32)MQE_TOP_FIRST + g[v].suffix.top; + assert(event < MQE_INVALID); + } else { + // DFAs/Puffs have no MQE_TOP_N support, so they get a classic TOP + // event. + assert(!g[v].suffix.graph || onlyOneTop(*g[v].suffix.graph)); + event = MQE_TOP; + } + + prog.add_before_end(make_unique(queue, event)); +} + +static +void addInfixTriggerInstructions(vector triggers, + RoseProgram &prog) { + // Order, de-dupe and add instructions to the end of program. + sort_and_unique(triggers, [](const TriggerInfo &a, const TriggerInfo &b) { + return tie(a.cancel, a.queue, a.event) < + tie(b.cancel, b.queue, b.event); + }); + for (const auto &ti : triggers) { + prog.add_before_end( + make_unique(ti.cancel, ti.queue, ti.event)); + } +} + +static +void makeRoleInfixTriggers(const RoseBuildImpl &build, + const map &leftfix_info, + const map &engine_info_by_queue, + RoseVertex u, RoseProgram &program) { + const auto &g = build.g; + + vector triggers; + + for (const auto &e : out_edges_range(u, g)) { + RoseVertex v = target(e, g); + if (!g[v].left) { + continue; + } + + assert(contains(leftfix_info, v)); + const left_build_info &lbi = leftfix_info.at(v); + if (lbi.has_lookaround) { + continue; + } + + assert(contains(engine_info_by_queue, lbi.queue)); + const auto &eng_info = engine_info_by_queue.at(lbi.queue); + + // DFAs have no TOP_N support, so they get a classic MQE_TOP event. + u32 top; + if (isContainerType(eng_info.type)) { + auto tamaProto = g[v].left.tamarama.get(); + assert(tamaProto); + top = MQE_TOP_FIRST + tamaProto->top_remap.at( + make_pair(g[v].index, g[e].rose_top)); + assert(top < MQE_INVALID); + } else if (!isMultiTopType(eng_info.type)) { + assert(num_tops(g[v].left) == 1); + top = MQE_TOP; + } else { + top = MQE_TOP_FIRST + g[e].rose_top; + assert(top < MQE_INVALID); + } + + triggers.emplace_back(g[e].rose_cancel_prev_top, lbi.queue, top); + } + + addInfixTriggerInstructions(move(triggers), program); +} + + +/** + * \brief True if the given vertex is a role that can only be switched on at + * EOD. + */ +static +bool onlyAtEod(const RoseBuildImpl &tbi, RoseVertex v) { + const RoseGraph &g = tbi.g; + + // All such roles have only (0,0) edges to vertices with the eod_accept + // property, and no other effects (suffixes, ordinary reports, etc, etc). + + if (isLeafNode(v, g) || !g[v].reports.empty() || g[v].suffix) { + return false; + } + + for (const auto &e : out_edges_range(v, g)) { + RoseVertex w = target(e, g); + if (!g[w].eod_accept) { + return false; + } + assert(!g[w].reports.empty()); + assert(g[w].literals.empty()); + + if (g[e].minBound || g[e].maxBound) { + return false; + } + } + + /* There is no pointing enforcing this check at runtime if + * this role is only fired by the eod event literal */ + if (tbi.eod_event_literal_id != MO_INVALID_IDX && + g[v].literals.size() == 1 && + *g[v].literals.begin() == tbi.eod_event_literal_id) { + return false; + } + + return true; +} + +static +void addCheckOnlyEodInstruction(RoseProgram &prog) { + DEBUG_PRINTF("only at eod\n"); + const auto *end_inst = prog.end_instruction(); + prog.add_before_end(make_unique(end_inst)); +} + +static +void makeRoleEagerEodReports(const RoseBuildImpl &build, + const map &leftfix_info, + bool needs_catchup, RoseVertex v, + RoseProgram &program) { + RoseProgram eod_program; + + for (const auto &e : out_edges_range(v, build.g)) { + if (canEagerlyReportAtEod(build, e)) { + RoseProgram block; + makeRoleReports(build, leftfix_info, needs_catchup, + target(e, build.g), block); + eod_program.add_block(move(block)); + } + } + + if (eod_program.empty()) { + return; + } + + if (!onlyAtEod(build, v)) { + // The rest of our program wasn't EOD anchored, so we need to guard + // these reports with a check. + addCheckOnlyEodInstruction(program); + } + + program.add_before_end(move(eod_program)); +} + +/* Makes a program for a role/vertex given a specfic pred/in_edge. */ +static +RoseProgram makeRoleProgram(const RoseBuildImpl &build, + const map &leftfix_info, + const map &suffixes, + const map &engine_info_by_queue, + lookaround_info &lookarounds, + unordered_map roleStateIndices, + ProgramBuild &prog_build, const RoseEdge &e) { + const RoseGraph &g = build.g; + auto v = target(e, g); + + RoseProgram program; + + // First, add program instructions that enforce preconditions without + // effects. + + if (onlyAtEod(build, v)) { + addCheckOnlyEodInstruction(program); + } + + if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { + makeRoleCheckBounds(build, v, e, program); + } + + // This role program may be triggered by different predecessors, with + // different offset bounds. We must ensure we put this check/set operation + // after the bounds check to deal with this case. + if (in_degree(v, g) > 1) { + assert(!build.isRootSuccessor(v)); + makeRoleCheckNotHandled(prog_build, v, program); + } + + makeRoleLookaround(build, leftfix_info, lookarounds, v, program); + makeRoleCheckLeftfix(build, leftfix_info, v, program); + + // Next, we can add program instructions that have effects. This must be + // done as a series of blocks, as some of them (like reports) are + // escapable. + + RoseProgram effects_block; + + RoseProgram reports_block; + makeRoleReports(build, leftfix_info, prog_build.needs_catchup, v, + reports_block); + effects_block.add_block(move(reports_block)); + + RoseProgram infix_block; + makeRoleInfixTriggers(build, leftfix_info, engine_info_by_queue, v, + infix_block); + effects_block.add_block(move(infix_block)); + + // Note: SET_GROUPS instruction must be after infix triggers, as an infix + // going dead may switch off groups. + RoseProgram groups_block; + makeRoleGroups(build.g, prog_build, v, groups_block); + effects_block.add_block(move(groups_block)); + + RoseProgram suffix_block; + makeRoleSuffix(build, suffixes, engine_info_by_queue, v, suffix_block); + effects_block.add_block(move(suffix_block)); + + RoseProgram state_block; + makeRoleSetState(roleStateIndices, v, state_block); + effects_block.add_block(move(state_block)); + + // Note: EOD eager reports may generate a CHECK_ONLY_EOD instruction (if + // the program doesn't have one already). + RoseProgram eod_block; + makeRoleEagerEodReports(build, leftfix_info, prog_build.needs_catchup, v, + eod_block); + effects_block.add_block(move(eod_block)); + + /* a 'ghost role' may do nothing if we know that its groups are already set + * - in this case we can avoid producing a program at all. */ + if (effects_block.empty()) { + return {}; + } + + program.add_before_end(move(effects_block)); + return program; +} + +static +void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 lit_id, + RoseProgram &prog) { + const auto &info = build.literal_info.at(lit_id); + if (!info.squash_group) { + return; + } + + DEBUG_PRINTF("squashes 0x%llx\n", info.group_mask); + assert(info.group_mask); + /* Note: group_mask is negated. */ + prog.add_before_end(make_unique(~info.group_mask)); +} + +RoseProgram assembleProgramBlocks(vector &&blocks) { + DEBUG_PRINTF("%zu blocks before dedupe\n", blocks.size()); + + sort(blocks.begin(), blocks.end(), + [](const RoseProgram &a, const RoseProgram &b) { + RoseProgramHash hasher; + return hasher(a) < hasher(b); + }); + + blocks.erase(unique(blocks.begin(), blocks.end(), RoseProgramEquivalence()), + blocks.end()); + + DEBUG_PRINTF("%zu blocks after dedupe\n", blocks.size()); + + RoseProgram prog; + for (auto &block : blocks) { + /* If we have multiple blocks from different literals and any of them + * squash groups, we will have to add a CLEAR_WORK_DONE instruction to + * each literal program block to clear the work_done flags so that it's + * only set if a state has been. */ + if (!prog.empty() && reads_work_done_flag(block)) { + RoseProgram clear_block; + clear_block.add_before_end(make_unique()); + prog.add_block(move(clear_block)); + } + + prog.add_block(move(block)); + } + + return prog; +} + +RoseProgram makeLiteralProgram(const RoseBuildImpl &build, + const map &leftfix_info, + const map &suffixes, + const map &engine_info_by_queue, + lookaround_info &lookarounds, + unordered_map roleStateIndices, + ProgramBuild &prog_build, u32 lit_id, + const vector &lit_edges, + bool is_anchored_replay_program) { + const auto &g = build.g; + + DEBUG_PRINTF("lit id=%u, %zu lit edges\n", lit_id, lit_edges.size()); + + // Construct initial program up front, as its early checks must be able + // to jump to end and terminate processing for this literal. + auto lit_program = makeLitInitialProgram(build, lookarounds, prog_build, + lit_id, lit_edges, + is_anchored_replay_program); + + RoseProgram role_programs; + + // Predecessor state id -> program block. + map pred_blocks; + + // Construct sparse iter sub-programs. + for (const auto &e : lit_edges) { + const auto &u = source(e, g); + if (build.isAnyStart(u)) { + continue; // Root roles are not handled with sparse iterator. + } + DEBUG_PRINTF("sparse iter edge (%zu,%zu)\n", g[u].index, + g[target(e, g)].index); + assert(contains(roleStateIndices, u)); + u32 pred_state = roleStateIndices.at(u); + auto role_prog = makeRoleProgram(build, leftfix_info, suffixes, + engine_info_by_queue, lookarounds, + roleStateIndices, prog_build, e); + if (!role_prog.empty()) { + pred_blocks[pred_state].add_block(move(role_prog)); + } + } + + // Add blocks to deal with non-root edges (triggered by sparse iterator or + // mmbit_isset checks). + addPredBlocks(pred_blocks, roleStateIndices.size(), role_programs); + + // Add blocks to handle root roles. + for (const auto &e : lit_edges) { + const auto &u = source(e, g); + if (!build.isAnyStart(u)) { + continue; + } + DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].index, + g[target(e, g)].index); + auto role_prog = makeRoleProgram(build, leftfix_info, suffixes, + engine_info_by_queue, lookarounds, + roleStateIndices, prog_build, e); + role_programs.add_block(move(role_prog)); + } + + if (lit_id == build.eod_event_literal_id) { + /* Note: does not require the lit intial program */ + assert(build.eod_event_literal_id != MO_INVALID_IDX); + return role_programs; + } + + /* Instructions to run even if a role program bails out */ + RoseProgram unconditional_block; + + // Literal may squash groups. + makeGroupSquashInstruction(build, lit_id, unconditional_block); + + role_programs.add_block(move(unconditional_block)); + lit_program.add_before_end(move(role_programs)); + + return lit_program; +} + +RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build, + lookaround_info &lookarounds, + ProgramBuild &prog_build, + const vector &lit_ids) { + assert(!lit_ids.empty()); + assert(build.cc.streaming); + + vector blocks; + + for (const auto &lit_id : lit_ids) { + DEBUG_PRINTF("lit_id=%u\n", lit_id); + const auto &info = build.literal_info.at(lit_id); + if (info.delayed_ids.empty()) { + continue; // No delayed IDs, no work to do. + } + + RoseProgram prog; + if (!build.isDelayed(lit_id)) { + makeCheckLiteralInstruction(build.literals.right.at(lit_id), + prog_build.longLitLengthThreshold, prog, + build.cc); + } + + makeCheckLitMaskInstruction(build, lookarounds, lit_id, prog); + makePushDelayedInstructions(build.literals, prog_build, + build.literal_info.at(lit_id).delayed_ids, + prog); + blocks.push_back(move(prog)); + } + + return assembleProgramBlocks(move(blocks)); +} + +RoseProgram makeEodAnchorProgram(const RoseBuildImpl &build, + ProgramBuild &prog_build, const RoseEdge &e, + const bool multiple_preds) { + const RoseGraph &g = build.g; + const RoseVertex v = target(e, g); + + RoseProgram program; + + if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { + makeRoleCheckBounds(build, v, e, program); + } + + if (multiple_preds) { + // Only necessary when there is more than one pred. + makeRoleCheckNotHandled(prog_build, v, program); + } + + makeCatchup(build.rm, prog_build.needs_catchup, g[v].reports, program); + + const bool has_som = false; + RoseProgram report_block; + for (const auto &id : g[v].reports) { + makeReport(build, id, has_som, report_block); + } + program.add_before_end(move(report_block)); + + return program; +} + +static +void makeCatchupMpv(const ReportManager &rm, bool needs_mpv_catchup, + ReportID id, RoseProgram &program) { + if (!needs_mpv_catchup) { + return; + } + + const Report &report = rm.getReport(id); + if (report.type == INTERNAL_ROSE_CHAIN) { + return; + } + + program.add_before_end(make_unique()); +} + +RoseProgram makeReportProgram(const RoseBuildImpl &build, + bool needs_mpv_catchup, ReportID id) { + RoseProgram prog; + + makeCatchupMpv(build.rm, needs_mpv_catchup, id, prog); + + const bool has_som = false; + makeReport(build, id, has_som, prog); + + return prog; +} + +RoseProgram makeBoundaryProgram(const RoseBuildImpl &build, + const set &reports) { + // Note: no CATCHUP instruction is necessary in the boundary case, as we + // should always be caught up (and may not even have the resources in + // scratch to support it). + + const bool has_som = false; + RoseProgram prog; + for (const auto &id : reports) { + makeReport(build, id, has_som, prog); + } + + return prog; +} + +static +void addPredBlockSingle(u32 pred_state, RoseProgram &pred_block, + RoseProgram &program) { + // Prepend an instruction to check the pred state is on. + const auto *end_inst = pred_block.end_instruction(); + pred_block.insert(begin(pred_block), + make_unique(pred_state, end_inst)); + program.add_block(move(pred_block)); +} + +static +void addPredBlocksAny(map &pred_blocks, u32 num_states, + RoseProgram &program) { + RoseProgram sparse_program; + + vector keys; + for (const u32 &key : pred_blocks | map_keys) { + keys.push_back(key); + } + + const RoseInstruction *end_inst = sparse_program.end_instruction(); + auto ri = make_unique(num_states, keys, end_inst); + sparse_program.add_before_end(move(ri)); + + RoseProgram &block = pred_blocks.begin()->second; + + /* we no longer need the check handled instruction as all the pred-role + * blocks are being collapsed together */ + stripCheckHandledInstruction(block); + + sparse_program.add_before_end(move(block)); + program.add_block(move(sparse_program)); +} + +static +void addPredBlocksMulti(map &pred_blocks, + u32 num_states, RoseProgram &program) { + assert(!pred_blocks.empty()); + + RoseProgram sparse_program; + const RoseInstruction *end_inst = sparse_program.end_instruction(); + vector> jump_table; + + // BEGIN instruction. + auto ri_begin = make_unique(num_states, end_inst); + RoseInstrSparseIterBegin *begin_inst = ri_begin.get(); + sparse_program.add_before_end(move(ri_begin)); + + // NEXT instructions, one per pred program. + u32 prev_key = pred_blocks.begin()->first; + for (auto it = next(begin(pred_blocks)); it != end(pred_blocks); ++it) { + auto ri = make_unique(prev_key, begin_inst, + end_inst); + sparse_program.add_before_end(move(ri)); + prev_key = it->first; + } + + // Splice in each pred program after its BEGIN/NEXT. + auto out_it = begin(sparse_program); + for (auto &m : pred_blocks) { + u32 key = m.first; + RoseProgram &flat_prog = m.second; + assert(!flat_prog.empty()); + const size_t block_len = flat_prog.size() - 1; // without INSTR_END. + + assert(dynamic_cast(out_it->get()) || + dynamic_cast(out_it->get())); + out_it = sparse_program.insert(++out_it, move(flat_prog)); + + // Jump table target for this key is the beginning of the block we just + // spliced in. + jump_table.emplace_back(key, out_it->get()); + + assert(distance(begin(sparse_program), out_it) + block_len <= + sparse_program.size()); + advance(out_it, block_len); + } + + // Write the jump table back into the SPARSE_ITER_BEGIN instruction. + begin_inst->jump_table = move(jump_table); + + program.add_block(move(sparse_program)); +} + +void addPredBlocks(map &pred_blocks, u32 num_states, + RoseProgram &program) { + // Trim empty blocks, if any exist. + for (auto it = pred_blocks.begin(); it != pred_blocks.end();) { + if (it->second.empty()) { + it = pred_blocks.erase(it); + } else { + ++it; + } + } + + const size_t num_preds = pred_blocks.size(); + if (num_preds == 0) { + return; + } + + if (num_preds == 1) { + const auto head = pred_blocks.begin(); + addPredBlockSingle(head->first, head->second, program); + return; + } + + // First, see if all our blocks are equivalent, in which case we can + // collapse them down into one. + const auto &blocks = pred_blocks | map_values; + if (all_of(begin(blocks), end(blocks), [&](const RoseProgram &block) { + return RoseProgramEquivalence()(*begin(blocks), block); + })) { + DEBUG_PRINTF("all blocks equiv\n"); + addPredBlocksAny(pred_blocks, num_states, program); + return; + } + + addPredBlocksMulti(pred_blocks, num_states, program); +} + +void applyFinalSpecialisation(RoseProgram &program) { + assert(!program.empty()); + assert(program.back().code() == ROSE_INSTR_END); + if (program.size() < 2) { + return; + } + + /* Replace the second-to-last instruction (before END) with a one-shot + * specialisation if available. */ + auto it = next(program.rbegin()); + if (auto *ri = dynamic_cast(it->get())) { + DEBUG_PRINTF("replacing REPORT with FINAL_REPORT\n"); + program.replace(it, make_unique( + ri->onmatch, ri->offset_adjust)); + } +} + +void recordLongLiterals(vector &longLiterals, + const RoseProgram &program) { + for (const auto &ri : program) { + if (const auto *ri_check = + dynamic_cast(ri.get())) { + DEBUG_PRINTF("found CHECK_LONG_LIT for string '%s'\n", + escapeString(ri_check->literal).c_str()); + longLiterals.emplace_back(ri_check->literal, false); + continue; + } + if (const auto *ri_check = + dynamic_cast(ri.get())) { + DEBUG_PRINTF("found CHECK_LONG_LIT_NOCASE for string '%s'\n", + escapeString(ri_check->literal).c_str()); + longLiterals.emplace_back(ri_check->literal, true); + } + } +} + +void recordResources(RoseResources &resources, const RoseProgram &program) { + for (const auto &ri : program) { + switch (ri->code()) { + case ROSE_INSTR_TRIGGER_SUFFIX: + resources.has_suffixes = true; + break; + case ROSE_INSTR_TRIGGER_INFIX: + case ROSE_INSTR_CHECK_INFIX: + case ROSE_INSTR_CHECK_PREFIX: + case ROSE_INSTR_SOM_LEFTFIX: + resources.has_leftfixes = true; + break; + case ROSE_INSTR_SET_STATE: + case ROSE_INSTR_CHECK_STATE: + case ROSE_INSTR_SPARSE_ITER_BEGIN: + case ROSE_INSTR_SPARSE_ITER_NEXT: + resources.has_states = true; + break; + case ROSE_INSTR_CHECK_GROUPS: + resources.checks_groups = true; + break; + case ROSE_INSTR_PUSH_DELAYED: + resources.has_lit_delay = true; + break; + case ROSE_INSTR_CHECK_LONG_LIT: + case ROSE_INSTR_CHECK_LONG_LIT_NOCASE: + resources.has_lit_check = true; + break; + default: + break; + } + } +} + } // namespace ue2 diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index c25aab61..d6a9e218 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -42,8 +42,10 @@ namespace ue2 { +struct LookEntry; class RoseEngineBlob; class RoseInstruction; +struct RoseResources; /** * \brief Container for a list of program instructions. @@ -145,11 +147,161 @@ public: bool operator()(const RoseProgram &prog1, const RoseProgram &prog2) const; }; -/* Removes any CHECK_HANDLED instructions from the given program */ -void stripCheckHandledInstruction(RoseProgram &prog); +/** \brief Data only used during construction of various programs (literal, + * anchored, delay, etc). */ +struct ProgramBuild : noncopyable { + explicit ProgramBuild(u32 fMinLitOffset, size_t longLitThresh, + bool catchup) + : floatingMinLiteralMatchOffset(fMinLitOffset), + longLitLengthThreshold(longLitThresh), needs_catchup(catchup) { + } -/** Returns true if the program may read the the interpreter's work_done flag */ -bool reads_work_done_flag(const RoseProgram &prog); + /** \brief Minimum offset of a match from the floating table. */ + const u32 floatingMinLiteralMatchOffset; + + /** \brief Long literal length threshold, used in streaming mode. */ + const size_t longLitLengthThreshold; + + /** \brief True if reports need CATCH_UP instructions to catch up suffixes, + * outfixes etc. */ + const bool needs_catchup; + + /** \brief Mapping from vertex to key, for vertices with a + * CHECK_NOT_HANDLED instruction. */ + ue2::unordered_map handledKeys; + + /** \brief Mapping from Rose literal ID to anchored program index. */ + std::map anchored_programs; + + /** \brief Mapping from Rose literal ID to delayed program index. */ + std::map delay_programs; + + /** \brief Mapping from every vertex to the groups that must be on for that + * vertex to be reached. */ + ue2::unordered_map vertex_group_map; + + /** \brief Global bitmap of groups that can be squashed. */ + rose_group squashable_groups = 0; +}; + +void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program); +void addSuffixesEodProgram(RoseProgram &program); +void addMatcherEodProgram(RoseProgram &program); + +static constexpr u32 INVALID_QUEUE = ~0U; + +struct left_build_info { + // Constructor for an engine implementation. + left_build_info(u32 q, u32 l, u32 t, rose_group sm, + const std::vector &stops, u32 max_ql, u8 cm_count, + const CharReach &cm_cr); + + // Constructor for a lookaround implementation. + explicit left_build_info(const std::vector> &looks); + + u32 queue = INVALID_QUEUE; /* uniquely idents the left_build_info */ + u32 lag = 0; + u32 transient = 0; + rose_group squash_mask = ~rose_group{0}; + std::vector stopAlphabet; + u32 max_queuelen = 0; + u8 countingMiracleCount = 0; + CharReach countingMiracleReach; + u32 countingMiracleOffset = 0; /* populated later when laying out bytecode */ + bool has_lookaround = false; + + // alternative implementation to the NFA + std::vector> lookaround; +}; + +struct lookaround_info : noncopyable { + /** \brief LookEntry list cache, so that we can reuse the look index and + * reach index for the same lookaround. */ + ue2::unordered_map>, + std::pair> cache; + + /** \brief Lookaround table for Rose roles. */ + std::vector>> table; + + /** \brief Lookaround look table size. */ + size_t lookTableSize = 0; + + /** \brief Lookaround reach table size. + * since single path lookaround and multi-path lookaround have different + * bitvectors range (32 and 256), we need to maintain both look table size + * and reach table size. */ + size_t reachTableSize = 0; +}; + +/** + * \brief Provides a brief summary of properties of an NFA that has already been + * finalised and stored in the blob. + */ +struct engine_info { + engine_info(const NFA *nfa, bool trans); + + enum NFAEngineType type; + bool accepts_eod; + u32 stream_size; + u32 scratch_size; + u32 scratch_align; + bool transient; +}; + +/** + * \brief Consumes list of program blocks corresponding to different literals, + * checks them for duplicates and then concatenates them into one program. + * + * Note: if a block will squash groups, a CLEAR_WORK_DONE instruction is + * inserted to prevent the work_done flag being contaminated by early blocks. + */ +RoseProgram assembleProgramBlocks(std::vector &&blocks); + +RoseProgram makeLiteralProgram(const RoseBuildImpl &build, + const std::map &leftfix_info, + const std::map &suffixes, + const std::map &engine_info_by_queue, + lookaround_info &lookarounds, + unordered_map roleStateIndices, + ProgramBuild &prog_build, u32 lit_id, + const std::vector &lit_edges, + bool is_anchored_replay_program); + +RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build, + lookaround_info &lookarounds, + ProgramBuild &prog_build, + const std::vector &lit_ids); + +RoseProgram makeEodAnchorProgram(const RoseBuildImpl &build, + ProgramBuild &prog_build, const RoseEdge &e, + const bool multiple_preds); + +RoseProgram makeReportProgram(const RoseBuildImpl &build, + bool needs_mpv_catchup, ReportID id); + +RoseProgram makeBoundaryProgram(const RoseBuildImpl &build, + const std::set &reports); + +struct TriggerInfo { + TriggerInfo(bool c, u32 q, u32 e) : cancel(c), queue(q), event(e) {} + bool cancel; + u32 queue; + u32 event; + + bool operator==(const TriggerInfo &b) const { + return cancel == b.cancel && queue == b.queue && event == b.event; + } +}; + +void addPredBlocks(std::map &pred_blocks, u32 num_states, + RoseProgram &program); + +void applyFinalSpecialisation(RoseProgram &program); + +void recordLongLiterals(std::vector &longLiterals, + const RoseProgram &program); + +void recordResources(RoseResources &resources, const RoseProgram &program); } // namespace ue2 diff --git a/src/rose/rose_build_resources.h b/src/rose/rose_build_resources.h new file mode 100644 index 00000000..3edb81b9 --- /dev/null +++ b/src/rose/rose_build_resources.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ROSE_BUILD_RESOURCES_H +#define ROSE_BUILD_RESOURCES_H + +namespace ue2 { + +/** + * \brief Structure tracking which resources are used by this Rose instance at + * runtime. + * + * We use this to control how much initialisation we need to do at the + * beginning of a stream/block at runtime. + */ +struct RoseResources { + bool has_outfixes = false; + bool has_suffixes = false; + bool has_leftfixes = false; + bool has_literals = false; + bool has_states = false; + bool checks_groups = false; + bool has_lit_delay = false; + bool has_lit_check = false; // long literal support + bool has_anchored = false; + bool has_floating = false; + bool has_eod = false; +}; + +} + +#endif From 8a7ac432c00b9e13a4503e3b12eb9882b4e69fa9 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 24 Apr 2017 09:26:35 +1000 Subject: [PATCH 277/326] ue2_literal: add hash_value() --- src/util/hash.h | 12 +++++++++++- src/util/ue2string.h | 10 +++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/util/hash.h b/src/util/hash.h index 0b571772..6f76e43d 100644 --- a/src/util/hash.h +++ b/src/util/hash.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,6 +34,7 @@ #ifndef UTIL_HASH_H #define UTIL_HASH_H +#include #include namespace ue2 { @@ -69,6 +70,15 @@ size_t hash_all(Args&&... args) { return v; } +/** + * \brief Compute the hash of all the elements of any range on which we can + * call std::begin() and std::end(). + */ +template +size_t hash_range(const Range &r) { + return boost::hash_range(std::begin(r), std::end(r)); +} + } // namespace ue2 #endif // UTIL_HASH_H diff --git a/src/util/ue2string.h b/src/util/ue2string.h index 08b6a544..a90d47a3 100644 --- a/src/util/ue2string.h +++ b/src/util/ue2string.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,6 +35,7 @@ #include "ue2common.h" #include "util/charreach.h" +#include "util/hash.h" #include #include @@ -206,6 +207,13 @@ private: std::vector nocase; /* for trolling value */ }; +inline +size_t hash_value(const ue2_literal::elem &elem) { + return hash_all(elem.c, elem.nocase); +} + +inline +size_t hash_value(const ue2_literal &lit) { return hash_range(lit); } /// Return a reversed copy of this literal. ue2_literal reverse_literal(const ue2_literal &in); From a75b2ba2e5b06ec3c2f522355e92e9b7c9bf4db1 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 24 Apr 2017 09:27:16 +1000 Subject: [PATCH 278/326] rose: remove hasLiteral() --- src/rose/rose_build_compile.cpp | 19 ++++++++++++++----- src/rose/rose_build_impl.h | 2 -- src/rose/rose_build_misc.cpp | 22 ---------------------- 3 files changed, 14 insertions(+), 29 deletions(-) diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index c0096a97..f4c2613e 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -484,7 +484,7 @@ bool checkFloatingKillableByPrefixes(const RoseBuildImpl &tbi) { } static -bool checkEodStealFloating(const RoseBuildImpl &tbi, +bool checkEodStealFloating(const RoseBuildImpl &build, const vector &eodLiteralsForFloating, u32 numFloatingLiterals, size_t shortestFloatingLen) { @@ -498,27 +498,36 @@ bool checkEodStealFloating(const RoseBuildImpl &tbi, return false; } - if (tbi.hasNoFloatingRoots()) { + if (build.hasNoFloatingRoots()) { DEBUG_PRINTF("skipping as floating table is conditional\n"); /* TODO: investigate putting stuff in atable */ return false; } - if (checkFloatingKillableByPrefixes(tbi)) { + if (checkFloatingKillableByPrefixes(build)) { DEBUG_PRINTF("skipping as prefixes may make ftable conditional\n"); return false; } + // Collect a set of all floating literals. + unordered_set floating_lits; + for (auto &m : build.literals) { + const auto &lit = m.left; + if (lit.table == ROSE_FLOATING) { + floating_lits.insert(lit.s); + } + } + DEBUG_PRINTF("%zu are eod literals, %u floating; floating len=%zu\n", eodLiteralsForFloating.size(), numFloatingLiterals, shortestFloatingLen); u32 new_floating_lits = 0; for (u32 eod_id : eodLiteralsForFloating) { - const rose_literal_id &lit = tbi.literals.right.at(eod_id); + const rose_literal_id &lit = build.literals.right.at(eod_id); DEBUG_PRINTF("checking '%s'\n", dumpString(lit.s).c_str()); - if (tbi.hasLiteral(lit.s, ROSE_FLOATING)) { + if (contains(floating_lits, lit.s)) { DEBUG_PRINTF("skip; there is already a floating version\n"); continue; } diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index b920e922..f4916604 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -490,8 +490,6 @@ public: const std::vector &cmp, u32 delay, rose_literal_table table); - bool hasLiteral(const ue2_literal &s, rose_literal_table table) const; - u32 getNewLiteralId(void); void removeVertices(const std::vector &dead); diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 51a6ea85..f5c5ce1a 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -486,28 +486,6 @@ u32 RoseBuildImpl::getLiteralId(const ue2_literal &s, const vector &msk, return id; } -bool RoseBuildImpl::hasLiteral(const ue2_literal &s, - rose_literal_table table) const { - DEBUG_PRINTF("looking if %s exists\n", dumpString(s).c_str()); - assert(table != ROSE_ANCHORED); - - for (RoseLiteralMap::left_map::const_iterator it - = literals.left.lower_bound(rose_literal_id(s, table, 0)); - it != literals.left.end(); ++it) { - if (it->first.table != table || it->first.s != s) { - break; - } - const rose_literal_info &info = literal_info[it->second]; - if (!info.vertices.empty()) { - return true; - } - } - - DEBUG_PRINTF("(used) literal not found\n"); - - return false; -} - u32 RoseBuildImpl::getNewLiteralId() { rose_literal_id key(ue2_literal(), ROSE_ANCHORED, 0); u32 numLiterals = verify_u32(literals.left.size()); From 8b9328fe9e7b3b72cf7f6143b9c20851971e9085 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 26 Apr 2017 15:12:27 +1000 Subject: [PATCH 279/326] rose: replace RoseLiteralMap use of bimap This apoproach is simpler and more efficient for cases with large numbers of literals. --- src/rose/rose_build_add.cpp | 20 ++++---- src/rose/rose_build_bytecode.cpp | 37 +++++++-------- src/rose/rose_build_castle.cpp | 6 +-- src/rose/rose_build_compile.cpp | 40 ++++++++-------- src/rose/rose_build_convert.cpp | 4 +- src/rose/rose_build_dedupe.cpp | 4 +- src/rose/rose_build_dump.cpp | 35 +++++++------- src/rose/rose_build_groups.cpp | 32 ++++++------- src/rose/rose_build_impl.h | 66 +++++++++++++++++++++++++-- src/rose/rose_build_lookaround.cpp | 4 +- src/rose/rose_build_matchers.cpp | 15 +++--- src/rose/rose_build_merge.cpp | 14 +++--- src/rose/rose_build_misc.cpp | 55 +++++++++------------- src/rose/rose_build_program.cpp | 14 +++--- src/rose/rose_build_role_aliasing.cpp | 6 +-- 15 files changed, 198 insertions(+), 154 deletions(-) diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index b53f7c8a..26f88445 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -295,7 +295,7 @@ void createVertices(RoseBuildImpl *tbi, if (bd.som && !g[w].left.haig) { /* no prefix - som based on literal start */ assert(!prefix_graph); - g[w].som_adjust = tbi->literals.right.at(literalId).elength(); + g[w].som_adjust = tbi->literals.at(literalId).elength(); DEBUG_PRINTF("set som_adjust to %u\n", g[w].som_adjust); } @@ -333,7 +333,7 @@ void createVertices(RoseBuildImpl *tbi, u32 ghostId = tbi->literal_info[literalId].undelayed_id; DEBUG_PRINTF("creating delay ghost vertex, id=%u\n", ghostId); assert(ghostId != literalId); - assert(tbi->literals.right.at(ghostId).delay == 0); + assert(tbi->literals.at(ghostId).delay == 0); // Adjust offsets, removing delay. u32 ghost_min = min_offset, ghost_max = max_offset; @@ -1907,16 +1907,20 @@ void removeAddedLiterals(RoseBuildImpl &tbi, const flat_set &lit_ids) { return; } + DEBUG_PRINTF("remove last %zu literals\n", lit_ids.size()); + // lit_ids should be a contiguous range. assert(lit_ids.size() == *lit_ids.rbegin() - *lit_ids.begin() + 1); + assert(*lit_ids.rbegin() == tbi.literals.size() - 1); - for (const u32 &lit_id : lit_ids) { - assert(lit_id < tbi.literal_info.size()); - assert(tbi.literals.right.at(lit_id).table == ROSE_ANCHORED); - assert(tbi.literal_info[lit_id].vertices.empty()); + assert(all_of_in(lit_ids, [&](u32 lit_id) { + return lit_id < tbi.literal_info.size() && + tbi.literals.at(lit_id).table == ROSE_ANCHORED && + tbi.literal_info[lit_id].vertices.empty(); + })); - tbi.literals.right.erase(lit_id); - } + tbi.literals.erase_back(lit_ids.size()); + assert(tbi.literals.size() == *lit_ids.begin()); // lit_ids should be at the end of tbi.literal_info. assert(tbi.literal_info.size() == *lit_ids.rbegin() + 1); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 636af0a6..dfe4ff63 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -746,8 +746,8 @@ void findTriggerSequences(const RoseBuildImpl &tbi, const u32 top = e.first; const set &lit_ids = e.second; - for (u32 id : lit_ids) { - const rose_literal_id &lit = tbi.literals.right.at(id); + for (u32 id : lit_ids) { + const rose_literal_id &lit = tbi.literals.at(id); (*trigger_lits)[top].push_back(as_cr_seq(lit)); } } @@ -905,8 +905,8 @@ u32 decreaseLag(const RoseBuildImpl &build, NGHolder &h, for (RoseVertex v : succs) { u32 lag = rg[v].left.lag; for (u32 lit_id : rg[v].literals) { - u32 delay = build.literals.right.at(lit_id).delay; - const ue2_literal &literal = build.literals.right.at(lit_id).s; + u32 delay = build.literals.at(lit_id).delay; + const ue2_literal &literal = build.literals.at(lit_id).s; assert(lag <= literal.length() + delay); size_t base = literal.length() + delay - lag; if (base >= literal.length()) { @@ -1134,7 +1134,7 @@ bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi, for (RoseVertex v : succs) { for (auto u : inv_adjacent_vertices_range(v, g)) { for (u32 lit_id : g[u].literals) { - lits.insert(build.literals.right.at(lit_id).s); + lits.insert(build.literals.at(lit_id).s); } } } @@ -1315,7 +1315,7 @@ void updateExclusiveInfixProperties(const RoseBuildImpl &build, set lits; for (auto u : inv_adjacent_vertices_range(v, build.g)) { for (u32 lit_id : build.g[u].literals) { - lits.insert(build.literals.right.at(lit_id).s); + lits.insert(build.literals.at(lit_id).s); } } DEBUG_PRINTF("%zu literals\n", lits.size()); @@ -2117,9 +2117,8 @@ u32 RoseBuildImpl::calcHistoryRequired() const { } // Delayed literals contribute to history requirement as well. - for (const auto &e : literals.right) { - const u32 id = e.first; - const auto &lit = e.second; + for (u32 id = 0; id < literals.size(); id++) { + const auto &lit = literals.at(id); if (lit.delay) { // If the literal is delayed _and_ has a mask that is longer than // the literal, we need enough history to match the whole mask as @@ -2716,11 +2715,11 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, } else { left.lagIndex = ROSE_OFFSET_INVALID; } - - DEBUG_PRINTF("rose %u is %s\n", left_index, - left.infix ? "infix" : "prefix"); } + DEBUG_PRINTF("rose %u is %s\n", left_index, + left.infix ? "infix" : "prefix"); + // Update squash mask. left.squash_mask &= lbi.squash_mask; @@ -2853,9 +2852,8 @@ vector groupByFragment(const RoseBuildImpl &build) { map frag_info; - for (const auto &m : build.literals.right) { - const u32 lit_id = m.first; - const auto &lit = m.second; + for (u32 lit_id = 0; lit_id < build.literals.size(); lit_id++) { + const auto &lit = build.literals.at(lit_id); const auto &info = build.literal_info.at(lit_id); if (!isUsedLiteral(build, lit_id)) { @@ -2993,7 +2991,7 @@ pair writeAnchoredPrograms(const RoseBuildImpl &build, for (const auto &frag : fragments) { for (const u32 lit_id : frag.lit_ids) { - const auto &lit = build.literals.right.at(lit_id); + const auto &lit = build.literals.at(lit_id); if (lit.table != ROSE_ANCHORED) { continue; @@ -3238,7 +3236,7 @@ void fillMatcherDistances(const RoseBuildImpl &build, RoseEngine *engine) { assert(g[v].min_offset <= g[v].max_offset); for (u32 lit_id : g[v].literals) { - const rose_literal_id &key = build.literals.right.at(lit_id); + const rose_literal_id &key = build.literals.at(lit_id); u32 max_d = g[v].max_offset; u32 min_d = g[v].min_offset; @@ -3371,9 +3369,8 @@ pair floatingCountAndMaxLen(const RoseBuildImpl &build) { size_t num = 0; size_t max_len = 0; - for (const auto &e : build.literals.right) { - const u32 id = e.first; - const rose_literal_id &lit = e.second; + for (u32 id = 0; id < build.literals.size(); id++) { + const rose_literal_id &lit = build.literals.at(id); if (lit.table != ROSE_FLOATING) { continue; diff --git a/src/rose/rose_build_castle.cpp b/src/rose/rose_build_castle.cpp index 7987b0f6..a85a784f 100644 --- a/src/rose/rose_build_castle.cpp +++ b/src/rose/rose_build_castle.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -131,7 +131,7 @@ vector literals_for_vertex(const RoseBuildImpl &tbi, vector rv; for (const u32 id : tbi.g[v].literals) { - rv.push_back(tbi.literals.right.at(id)); + rv.push_back(tbi.literals.at(id)); } return rv; @@ -366,7 +366,7 @@ bool triggerKillsRoseCastle(const RoseBuildImpl &tbi, const left_id &left, /* check each pred literal to see if they all kill previous castle * state */ for (u32 lit_id : tbi.g[source(e, tbi.g)].literals) { - const rose_literal_id &pred_lit = tbi.literals.right.at(lit_id); + const rose_literal_id &pred_lit = tbi.literals.at(lit_id); const ue2_literal s = findNonOverlappingTail(all_lits, pred_lit.s); const CharReach &cr = c.reach(); diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index f4c2613e..96241e39 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -119,7 +119,7 @@ void removeLiteralFromGraph(RoseBuildImpl &build, u32 id) { */ static void explodeLiteral(RoseBuildImpl &build, u32 id) { - const auto &lit = build.literals.right.at(id); + const auto &lit = build.literals.at(id); auto &info = build.literal_info[id]; assert(!info.group_mask); // not set yet @@ -139,7 +139,7 @@ void explodeLiteral(RoseBuildImpl &build, u32 id) { DEBUG_PRINTF("adding exploded lit %u: '%s'\n", new_id, dumpString(new_str).c_str()); - const auto &new_lit = build.literals.right.at(new_id); + const auto &new_lit = build.literals.at(new_id); auto &new_info = build.literal_info.at(new_id); insert(&new_info.vertices, info.vertices); for (const auto &v : info.vertices) { @@ -150,7 +150,7 @@ void explodeLiteral(RoseBuildImpl &build, u32 id) { if (!info.delayed_ids.empty()) { flat_set &del_ids = new_info.delayed_ids; for (u32 delay_id : info.delayed_ids) { - const auto &dlit = build.literals.right.at(delay_id); + const auto &dlit = build.literals.at(delay_id); u32 new_delay_id = build.getLiteralId(new_lit.s, new_lit.msk, new_lit.cmp, dlit.delay, dlit.table); @@ -170,9 +170,8 @@ void explodeLiteral(RoseBuildImpl &build, u32 id) { void RoseBuildImpl::handleMixedSensitivity(void) { vector explode; - for (const auto &e : literals.right) { - u32 id = e.first; - const rose_literal_id &lit = e.second; + for (u32 id = 0; id < literals.size(); id++) { + const rose_literal_id &lit = literals.at(id); if (lit.delay) { continue; /* delay id's are virtual-ish */ @@ -420,7 +419,7 @@ bool RoseBuildImpl::isDirectReport(u32 id) const { } } - if (literals.right.at(id).table == ROSE_ANCHORED) { + if (literals.at(id).table == ROSE_ANCHORED) { /* in-edges are irrelevant for anchored region. */ continue; } @@ -439,7 +438,7 @@ bool RoseBuildImpl::isDirectReport(u32 id) const { } DEBUG_PRINTF("literal %u ('%s') is a %s report\n", id, - dumpString(literals.right.at(id).s).c_str(), + dumpString(literals.at(id).s).c_str(), info.vertices.size() > 1 ? "multi-direct" : "direct"); return true; } @@ -511,8 +510,7 @@ bool checkEodStealFloating(const RoseBuildImpl &build, // Collect a set of all floating literals. unordered_set floating_lits; - for (auto &m : build.literals) { - const auto &lit = m.left; + for (auto &lit : build.literals) { if (lit.table == ROSE_FLOATING) { floating_lits.insert(lit.s); } @@ -524,7 +522,7 @@ bool checkEodStealFloating(const RoseBuildImpl &build, u32 new_floating_lits = 0; for (u32 eod_id : eodLiteralsForFloating) { - const rose_literal_id &lit = build.literals.right.at(eod_id); + const rose_literal_id &lit = build.literals.at(eod_id); DEBUG_PRINTF("checking '%s'\n", dumpString(lit.s).c_str()); if (contains(floating_lits, lit.s)) { @@ -558,12 +556,16 @@ bool checkEodStealFloating(const RoseBuildImpl &build, static void promoteEodToFloating(RoseBuildImpl &tbi, const vector &eodLiterals) { - DEBUG_PRINTF("promoting eod literals to floating table\n"); + DEBUG_PRINTF("promoting %zu eod literals to floating table\n", + eodLiterals.size()); for (u32 eod_id : eodLiterals) { - const rose_literal_id &lit = tbi.literals.right.at(eod_id); + const rose_literal_id &lit = tbi.literals.at(eod_id); + DEBUG_PRINTF("eod_id=%u, lit=%s\n", eod_id, dumpString(lit.s).c_str()); u32 floating_id = tbi.getLiteralId(lit.s, lit.msk, lit.cmp, lit.delay, ROSE_FLOATING); + DEBUG_PRINTF("floating_id=%u, lit=%s\n", floating_id, + dumpString(tbi.literals.at(floating_id).s).c_str()); auto &float_verts = tbi.literal_info[floating_id].vertices; auto &eod_verts = tbi.literal_info[eod_id].vertices; @@ -588,7 +590,7 @@ bool promoteEodToAnchored(RoseBuildImpl &tbi, const vector &eodLiterals) { bool rv = true; for (u32 eod_id : eodLiterals) { - const rose_literal_id &lit = tbi.literals.right.at(eod_id); + const rose_literal_id &lit = tbi.literals.at(eod_id); NGHolder h; add_edge(h.start, h.accept, h); @@ -728,7 +730,7 @@ void stealEodVertices(RoseBuildImpl &tbi) { continue; // skip unused literals } - const rose_literal_id &lit = tbi.literals.right.at(i); + const rose_literal_id &lit = tbi.literals.at(i); if (lit.table == ROSE_EOD_ANCHORED) { if (suitableForAnchored(tbi, lit, info)) { @@ -770,7 +772,7 @@ bool RoseBuildImpl::isDelayed(u32 id) const { bool RoseBuildImpl::hasDelayedLiteral(RoseVertex v) const { for (u32 lit_id : g[v].literals) { - if (literals.right.at(lit_id).delay) { + if (literals.at(lit_id).delay) { return true; } } @@ -1096,7 +1098,7 @@ bool triggerKillsRoseGraph(const RoseBuildImpl &build, const left_id &left, /* check each pred literal to see if they all kill previous graph * state */ for (u32 lit_id : build.g[source(e, build.g)].literals) { - const rose_literal_id &pred_lit = build.literals.right.at(lit_id); + const rose_literal_id &pred_lit = build.literals.at(lit_id); const ue2_literal s = findNonOverlappingTail(all_lits, pred_lit.s); DEBUG_PRINTF("running graph %zu\n", states.size()); @@ -1170,7 +1172,7 @@ void findTopTriggerCancels(RoseBuildImpl &build) { } for (u32 lit_id : pred_lit_ids) { - const rose_literal_id &p_lit = build.literals.right.at(lit_id); + const rose_literal_id &p_lit = build.literals.at(lit_id); if (p_lit.delay || p_lit.table == ROSE_ANCHORED) { goto next_rose; } @@ -1271,7 +1273,7 @@ void countFloatingLiterals(const RoseBuildImpl &tbi, u32 *total_count, u32 *short_count) { *total_count = 0; *short_count = 0; - for (const rose_literal_id &lit : tbi.literals.right | map_values) { + for (const rose_literal_id &lit : tbi.literals) { if (lit.delay) { continue; /* delay id's are virtual-ish */ } diff --git a/src/rose/rose_build_convert.cpp b/src/rose/rose_build_convert.cpp index 89eac225..0c1f4338 100644 --- a/src/rose/rose_build_convert.cpp +++ b/src/rose/rose_build_convert.cpp @@ -235,7 +235,7 @@ void convertFloodProneSuffix(RoseBuildImpl &tbi, RoseVertex v, u32 lit_id, static size_t findFloodProneSuffixLen(const RoseBuildImpl &tbi) { size_t numLiterals = 0; - for (const rose_literal_id &lit : tbi.literals.right | map_values) { + for (const rose_literal_id &lit : tbi.literals) { if (lit.delay) { continue; // delay ids are virtual-ish } @@ -293,7 +293,7 @@ void convertFloodProneSuffixes(RoseBuildImpl &tbi) { } u32 lit_id = *g[v].literals.begin(); - const rose_literal_id &lit = tbi.literals.right.at(lit_id); + const rose_literal_id &lit = tbi.literals.at(lit_id); // anchored or delayed literals need thought. if (lit.table != ROSE_FLOATING || lit.delay) { diff --git a/src/rose/rose_build_dedupe.cpp b/src/rose/rose_build_dedupe.cpp index dbff7aa7..d3e72313 100644 --- a/src/rose/rose_build_dedupe.cpp +++ b/src/rose/rose_build_dedupe.cpp @@ -311,9 +311,9 @@ bool RoseDedupeAuxImpl::requiresDedupeSupport( } for (auto it = begin(lits); it != end(lits); ++it) { - const auto &lit1 = build.literals.right.at(it->first); + const auto &lit1 = build.literals.at(it->first); for (auto jt = next(it); jt != end(lits); ++jt) { - const auto &lit2 = build.literals.right.at(jt->first); + const auto &lit2 = build.literals.at(jt->first); if (literalsCouldRace(lit1, lit2)) { DEBUG_PRINTF("literals could race\n"); return true; diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index dfbbe116..7fd19d43 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -257,17 +257,13 @@ private: os << "/nofrag "; } - if (contains(build.literals.right, id)) { - const auto &lit = build.literals.right.at(id); - os << '\'' << dotEscapeString(lit.s.get_string()) << '\''; - if (lit.s.any_nocase()) { - os << " (nocase)"; - } - if (lit.delay) { - os << " +" << lit.delay; - } - } else { - os << ""; + const auto &lit = build.literals.at(id); + os << '\'' << dotEscapeString(lit.s.get_string()) << '\''; + if (lit.s.any_nocase()) { + os << " (nocase)"; + } + if (lit.delay) { + os << " +" << lit.delay; } } @@ -358,15 +354,16 @@ void dumpRoseLiterals(const RoseBuildImpl &build, DEBUG_PRINTF("dumping literals\n"); ofstream os(grey.dumpPath + "rose_literals.txt"); - os << "ROSE LITERALS: a total of " << build.literals.right.size() - << " literals and " << num_vertices(g) << " roles." << endl << endl; + os << "ROSE LITERALS: a total of " << build.literals.size() + << " literals and " << num_vertices(g) << " roles." << endl + << endl; - for (const auto &e : build.literals.right) { - u32 id = e.first; - const ue2_literal &s = e.second.s; + for (u32 id = 0; id < build.literals.size(); id++) { + const auto &lit = build.literals.at(id); + const ue2_literal &s = lit.s; const rose_literal_info &lit_info = build.literal_info[id]; - switch (e.second.table) { + switch (lit.table) { case ROSE_ANCHORED: os << "ANCHORED"; break; @@ -397,8 +394,8 @@ void dumpRoseLiterals(const RoseBuildImpl &build, os << " benefits,"; } - if (e.second.delay) { - os << " delayed "<< e.second.delay << ","; + if (lit.delay) { + os << " delayed "<< lit.delay << ","; } os << " groups 0x" << hex << setw(16) << setfill('0') diff --git a/src/rose/rose_build_groups.cpp b/src/rose/rose_build_groups.cpp index f17e1ee4..c670e603 100644 --- a/src/rose/rose_build_groups.cpp +++ b/src/rose/rose_build_groups.cpp @@ -238,9 +238,8 @@ void assignGroupsToLiterals(RoseBuildImpl &build) { u32 group_always_on = 0; // First pass: handle always on literals. - for (const auto &e : literals.right) { - u32 id = e.first; - const rose_literal_id &lit = e.second; + for (u32 id = 0; id < literals.size(); id++) { + const rose_literal_id &lit = literals.at(id); rose_literal_info &info = literal_info[id]; if (!requires_group_assignment(lit, info)) { @@ -274,9 +273,8 @@ void assignGroupsToLiterals(RoseBuildImpl &build) { priority_queue> pq; // Second pass: the other literals. - for (const auto &e : literals.right) { - u32 id = e.first; - const rose_literal_id &lit = e.second; + for (u32 id = 0; id < literals.size(); id++) { + const rose_literal_id &lit = literals.at(id); rose_literal_info &info = literal_info[id]; if (!requires_group_assignment(lit, info)) { @@ -290,7 +288,7 @@ void assignGroupsToLiterals(RoseBuildImpl &build) { while (!pq.empty()) { u32 id = get<2>(pq.top()); pq.pop(); - UNUSED const rose_literal_id &lit = literals.right.at(id); + UNUSED const rose_literal_id &lit = literals.at(id); DEBUG_PRINTF("assigning groups to lit %u (v %zu l %zu)\n", id, literal_info[id].vertices.size(), lit.s.length()); @@ -361,9 +359,8 @@ void assignGroupsToLiterals(RoseBuildImpl &build) { } } /* assign delayed literals to the same group as their parent */ - for (const auto &e : literals.right) { - u32 id = e.first; - const rose_literal_id &lit = e.second; + for (u32 id = 0; id < literals.size(); id++) { + const rose_literal_id &lit = literals.at(id); if (!lit.delay) { continue; @@ -378,7 +375,7 @@ void assignGroupsToLiterals(RoseBuildImpl &build) { } DEBUG_PRINTF("populate group to literal mapping\n"); - for (const u32 id : literals.right | map_keys) { + for (u32 id = 0; id < literals.size(); id++) { rose_group groups = literal_info[id].group_mask; while (groups) { u32 group_id = findAndClearLSB_64(&groups); @@ -561,10 +558,10 @@ bool isGroupSquasher(const RoseBuildImpl &build, const u32 id /* literal id */, const rose_literal_info &lit_info = build.literal_info.at(id); DEBUG_PRINTF("checking if %u '%s' is a group squasher %016llx\n", id, - dumpString(build.literals.right.at(id).s).c_str(), - lit_info.group_mask); + dumpString(build.literals.at(id).s).c_str(), + lit_info.group_mask); - if (build.literals.right.at(id).table == ROSE_EVENT) { + if (build.literals.at(id).table == ROSE_EVENT) { DEBUG_PRINTF("event literal\n"); return false; } @@ -693,9 +690,10 @@ bool isGroupSquasher(const RoseBuildImpl &build, const u32 id /* literal id */, void findGroupSquashers(RoseBuildImpl &build) { rose_group forbidden_squash_group = build.boundary_group_mask; - for (const auto &e : build.literals.right) { - if (e.second.delay) { - forbidden_squash_group |= build.literal_info[e.first].group_mask; + for (u32 id = 0; id < build.literals.size(); id++) { + const auto &lit = build.literals.at(id); + if (lit.delay) { + forbidden_squash_group |= build.literal_info[id].group_mask; } } diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index f4916604..13f1cfc9 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -39,16 +39,17 @@ #include "nfagraph/ng_holder.h" #include "nfagraph/ng_revacc.h" #include "util/bytecode_ptr.h" +#include "util/hash.h" #include "util/order_check.h" #include "util/queue_index_factory.h" #include "util/ue2_containers.h" +#include "util/ue2string.h" +#include "util/verify_types.h" #include #include #include #include -#include -#include #include struct RoseEngine; @@ -300,6 +301,11 @@ struct rose_literal_id { } return MAX(mask_len, s.length()) + delay; } + + bool operator==(const rose_literal_id &b) const { + return s == b.s && msk == b.msk && cmp == b.cmp && table == b.table && + delay == b.delay && distinctiveness == b.distinctiveness; + } }; static inline @@ -313,8 +319,60 @@ bool operator<(const rose_literal_id &a, const rose_literal_id &b) { return 0; } -// Literals are stored in a map from (string, nocase) -> ID -typedef boost::bimap RoseLiteralMap; +inline +size_t hash_value(const rose_literal_id &lit) { + return hash_all(lit.s, lit.msk, lit.cmp, lit.table, lit.delay, + lit.distinctiveness); +} + +class RoseLiteralMap { + /** + * \brief Main storage for literals. + * + * Note that this cannot be a vector, as the present code relies on + * iterator stability when iterating over this list and adding to it inside + * the loop. + */ + std::deque lits; + + /** \brief Quick-lookup index from literal -> index in lits. */ + unordered_map lits_index; + +public: + std::pair insert(const rose_literal_id &lit) { + auto it = lits_index.find(lit); + if (it != lits_index.end()) { + return {it->second, false}; + } + u32 id = verify_u32(lits.size()); + lits.push_back(lit); + lits_index.emplace(lit, id); + return {id, true}; + } + + // Erase the last num elements. + void erase_back(size_t num) { + assert(num <= lits.size()); + for (size_t i = 0; i < num; i++) { + lits_index.erase(lits.back()); + lits.pop_back(); + } + assert(lits.size() == lits_index.size()); + } + + const rose_literal_id &at(u32 id) const { + assert(id < lits.size()); + return lits.at(id); + } + + using const_iterator = decltype(lits)::const_iterator; + const_iterator begin() const { return lits.begin(); } + const_iterator end() const { return lits.end(); } + + size_t size() const { + return lits.size(); + } +}; struct simple_anchored_info { simple_anchored_info(u32 min_b, u32 max_b, const ue2_literal &lit) diff --git a/src/rose/rose_build_lookaround.cpp b/src/rose/rose_build_lookaround.cpp index 07ab7c59..a46a1aeb 100644 --- a/src/rose/rose_build_lookaround.cpp +++ b/src/rose/rose_build_lookaround.cpp @@ -447,7 +447,7 @@ static void findFloodReach(const RoseBuildImpl &tbi, const RoseVertex v, set &flood_reach) { for (u32 lit_id : tbi.g[v].literals) { - const ue2_literal &s = tbi.literals.right.at(lit_id).s; + const ue2_literal &s = tbi.literals.at(lit_id).s; if (s.empty()) { continue; } @@ -491,7 +491,7 @@ map findLiteralReach(const RoseBuildImpl &build, map look; for (u32 lit_id : build.g[v].literals) { - const rose_literal_id &lit = build.literals.right.at(lit_id); + const rose_literal_id &lit = build.literals.at(lit_id); auto lit_look = findLiteralReach(lit); if (first) { diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index 64a1c919..682a87c3 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -211,7 +211,7 @@ bool maskFromPreds(const RoseBuildImpl &build, const rose_literal_id &id, } u32 u_lit_id = *(g[u].literals.begin()); - const rose_literal_id &u_id = build.literals.right.at(u_lit_id); + const rose_literal_id &u_id = build.literals.at(u_lit_id); DEBUG_PRINTF("u has lit: %s\n", escapeString(u_id.s).c_str()); // Number of characters to take from the back of u's literal. @@ -346,9 +346,8 @@ void findMoreLiteralMasks(RoseBuildImpl &build) { } vector candidates; - for (const auto &e : build.literals.right) { - const u32 id = e.first; - const auto &lit = e.second; + for (u32 id = 0; id < build.literals.size(); id++) { + const auto &lit = build.literals.at(id); if (lit.delay || build.isDelayed(id)) { continue; @@ -377,7 +376,7 @@ void findMoreLiteralMasks(RoseBuildImpl &build) { } for (const u32 &id : candidates) { - const auto &lit = build.literals.right.at(id); + const auto &lit = build.literals.at(id); auto &lit_info = build.literal_info.at(id); vector msk, cmp; @@ -492,7 +491,7 @@ bool isNoRunsLiteral(const RoseBuildImpl &build, const u32 id, return false; } - size_t len = build.literals.right.at(id).s.length(); + size_t len = build.literals.at(id).s.length(); if (len > max_len) { DEBUG_PRINTF("long literal, requires confirm\n"); return false; @@ -617,7 +616,7 @@ u64a literalMinReportOffset(const RoseBuildImpl &build, // If this literal in the undelayed literal corresponding to some delayed // literals, we must take their minimum offsets into account. for (const u32 &delayed_id : info.delayed_ids) { - const auto &delayed_lit = build.literals.right.at(delayed_id); + const auto &delayed_lit = build.literals.at(delayed_id); const auto &delayed_info = build.literal_info.at(delayed_id); u64a delayed_min_offset = literalMinReportOffset(build, delayed_lit, delayed_info); @@ -682,7 +681,7 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, for (const auto &f : fragments) { for (u32 id : f.lit_ids) { - const rose_literal_id &lit = build.literals.right.at(id); + const rose_literal_id &lit = build.literals.at(id); if (lit.table != table) { continue; /* wrong table */ diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index 685d1523..d638e589 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -1054,14 +1054,14 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u, vector> ulits; ulits.reserve(tbi.g[u].literals.size()); for (u32 id : tbi.g[u].literals) { - ulits.push_back(make_pair(&tbi.literals.right.at(id), ulag)); + ulits.emplace_back(&tbi.literals.at(id), ulag); } u32 vlag = tbi.g[v].left.lag; vector> vlits; vlits.reserve(tbi.g[v].literals.size()); for (u32 id : tbi.g[v].literals) { - vlits.push_back(make_pair(&tbi.literals.right.at(id), vlag)); + vlits.emplace_back(&tbi.literals.at(id), vlag); } if (!compatibleLiteralsForMerge(ulits, vlits)) { @@ -1130,7 +1130,7 @@ bool checkPredDelays(const RoseBuildImpl &tbi, const deque &v1, vector pred_rose_lits; pred_rose_lits.reserve(pred_lits.size()); for (const auto &p : pred_lits) { - pred_rose_lits.push_back(&tbi.literals.right.at(p)); + pred_rose_lits.push_back(&tbi.literals.at(p)); } for (auto v : v2) { @@ -1140,7 +1140,7 @@ bool checkPredDelays(const RoseBuildImpl &tbi, const deque &v1, } for (const u32 vlit : tbi.g[v].literals) { - const rose_literal_id &vl = tbi.literals.right.at(vlit); + const rose_literal_id &vl = tbi.literals.at(vlit); assert(!vl.delay); // this should never have got this far? for (const auto &ul : pred_rose_lits) { assert(!ul->delay); // this should never have got this far? @@ -1195,7 +1195,7 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, u32 ulag = tbi.g[a].left.lag; for (u32 id : tbi.g[a].literals) { - ulits.push_back(make_pair(&tbi.literals.right.at(id), ulag)); + ulits.emplace_back(&tbi.literals.at(id), ulag); } } @@ -1207,7 +1207,7 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, u32 vlag = tbi.g[a].left.lag; for (u32 id : tbi.g[a].literals) { - vlits.push_back(make_pair(&tbi.literals.right.at(id), vlag)); + vlits.emplace_back(&tbi.literals.at(id), vlag); } } @@ -2730,7 +2730,7 @@ u32 allowedSquashDistance(const CharReach &cr, u32 min_width, /* TODO: inspect further back in the pattern */ for (u32 lit_id : g[tv].literals) { - const rose_literal_id &lit = tbi.literals.right.at(lit_id); + const rose_literal_id &lit = tbi.literals.at(lit_id); if (lit.delay) { return 0; /* TODO: better */ } diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index f5c5ce1a..01be11ef 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -154,14 +154,12 @@ bool isInTable(const RoseBuildImpl &tbi, RoseVertex v, // All literals for a given vertex will be in the same table, so we need // only inspect the first one. - const auto lit_table = tbi.literals.right.at(*lit_ids.begin()).table; + const auto lit_table = tbi.literals.at(*lit_ids.begin()).table; -#ifndef NDEBUG // Verify that all literals for this vertex are in the same table. - for (auto lit_id : lit_ids) { - assert(tbi.literals.right.at(lit_id).table == lit_table); - } -#endif + assert(all_of_in(lit_ids, [&](u32 lit_id) { + return tbi.literals.at(lit_id).table == lit_table; + })); return lit_table == table; } @@ -211,7 +209,7 @@ size_t RoseBuildImpl::maxLiteralLen(RoseVertex v) const { size_t maxlen = 0; for (const auto &lit_id : lit_ids) { - maxlen = max(maxlen, literals.right.at(lit_id).elength()); + maxlen = max(maxlen, literals.at(lit_id).elength()); } return maxlen; @@ -224,7 +222,7 @@ size_t RoseBuildImpl::minLiteralLen(RoseVertex v) const { size_t minlen = ROSE_BOUND_INF; for (const auto &lit_id : lit_ids) { - minlen = min(minlen, literals.right.at(lit_id).elength()); + minlen = min(minlen, literals.at(lit_id).elength()); } return minlen; @@ -287,12 +285,11 @@ size_t maxOverlap(const rose_literal_id &a, const rose_literal_id &b) { static const rose_literal_id &getOverlapLiteral(const RoseBuildImpl &tbi, u32 literal_id) { - map::const_iterator it = - tbi.anchoredLitSuffix.find(literal_id); + auto it = tbi.anchoredLitSuffix.find(literal_id); if (it != tbi.anchoredLitSuffix.end()) { return it->second; } - return tbi.literals.right.at(literal_id); + return tbi.literals.at(literal_id); } ue2_literal findNonOverlappingTail(const set &lits, @@ -368,16 +365,14 @@ u32 RoseBuildImpl::calcSuccMaxBound(RoseVertex u) const { u32 RoseBuildImpl::getLiteralId(const ue2_literal &s, u32 delay, rose_literal_table table) { - DEBUG_PRINTF("getting id for %s\n", dumpString(s).c_str()); + DEBUG_PRINTF("getting id for %s in table %d\n", dumpString(s).c_str(), + table); assert(table != ROSE_ANCHORED); rose_literal_id key(s, table, delay); - u32 numLiterals = verify_u32(literals.left.size()); - RoseLiteralMap::iterator it; - bool inserted; - tie(it, inserted) - = literals.insert(RoseLiteralMap::value_type(key, numLiterals)); - u32 id = it->right; + auto m = literals.insert(key); + u32 id = m.first; + bool inserted = m.second; if (inserted) { literal_info.push_back(rose_literal_info()); @@ -457,19 +452,17 @@ rose_literal_id::rose_literal_id(const ue2_literal &s_in, u32 RoseBuildImpl::getLiteralId(const ue2_literal &s, const vector &msk, const vector &cmp, u32 delay, rose_literal_table table) { - DEBUG_PRINTF("getting id for %s\n", dumpString(s).c_str()); + DEBUG_PRINTF("getting id for %s in table %d\n", dumpString(s).c_str(), + table); assert(table != ROSE_ANCHORED); rose_literal_id key(s, msk, cmp, table, delay); - u32 numLiterals = verify_u32(literals.left.size()); /* ue2_literals are always uppercased if nocase and must have an * alpha char */ - RoseLiteralMap::iterator it; - bool inserted; - tie(it, inserted) = literals.insert( - RoseLiteralMap::value_type(key, numLiterals)); - u32 id = it->right; + auto m = literals.insert(key); + u32 id = m.first; + bool inserted = m.second; if (inserted) { literal_info.push_back(rose_literal_info()); @@ -488,16 +481,12 @@ u32 RoseBuildImpl::getLiteralId(const ue2_literal &s, const vector &msk, u32 RoseBuildImpl::getNewLiteralId() { rose_literal_id key(ue2_literal(), ROSE_ANCHORED, 0); - u32 numLiterals = verify_u32(literals.left.size()); + u32 numLiterals = verify_u32(literals.size()); key.distinctiveness = numLiterals; - RoseLiteralMap::iterator it; - bool inserted; - tie(it, inserted) - = literals.insert(RoseLiteralMap::value_type(key, numLiterals)); - u32 id = it->right; - - assert(inserted); + auto m = literals.insert(key); + assert(m.second); + u32 id = m.first; literal_info.push_back(rose_literal_info()); assert(literal_info.size() == id + 1); diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index 8d0306ae..6dfe3507 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -349,7 +349,7 @@ void makeAnchoredLiteralDelay(const RoseBuildImpl &build, const ProgramBuild &prog_build, u32 lit_id, RoseProgram &program) { // Only relevant for literals in the anchored table. - const rose_literal_id &lit = build.literals.right.at(lit_id); + const rose_literal_id &lit = build.literals.at(lit_id); if (lit.table != ROSE_ANCHORED) { return; } @@ -686,7 +686,7 @@ void makePushDelayedInstructions(const RoseLiteralMap &literals, DEBUG_PRINTF("delayed lit id %u\n", delayed_lit_id); assert(contains(prog_build.delay_programs, delayed_lit_id)); u32 delay_id = prog_build.delay_programs.at(delayed_lit_id); - const auto &delay_lit = literals.right.at(delayed_lit_id); + const auto &delay_lit = literals.at(delayed_lit_id); delay_instructions.emplace_back(verify_u8(delay_lit.delay), delay_id); } @@ -1335,7 +1335,7 @@ void makeCheckLitMaskInstruction(const RoseBuildImpl &build, vector look; - const ue2_literal &s = build.literals.right.at(lit_id).s; + const ue2_literal &s = build.literals.at(lit_id).s; DEBUG_PRINTF("building mask for lit %u: %s\n", lit_id, dumpString(s).c_str()); assert(s.length() <= MAX_MASK2_WIDTH); @@ -1369,7 +1369,7 @@ void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, u32 lit_id, return; } - const auto &lit = build.literals.right.at(lit_id); + const auto &lit = build.literals.at(lit_id); size_t min_len = lit.elength(); u32 min_offset = findMinOffset(build, lit_id); DEBUG_PRINTF("has min_len=%zu, min_offset=%u, global min is %u\n", min_len, @@ -1404,7 +1404,7 @@ void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 lit_id, static bool hasDelayedLiteral(const RoseBuildImpl &build, const vector &lit_edges) { - auto is_delayed = bind(&RoseBuildImpl::isDelayed, &build, _1); + auto is_delayed = [&build](u32 lit_id) { return build.isDelayed(lit_id); }; for (const auto &e : lit_edges) { auto v = target(e, build.g); const auto &lits = build.g[v].literals; @@ -1425,7 +1425,7 @@ RoseProgram makeLitInitialProgram(const RoseBuildImpl &build, // Check long literal info. if (!build.isDelayed(lit_id)) { - makeCheckLiteralInstruction(build.literals.right.at(lit_id), + makeCheckLiteralInstruction(build.literals.at(lit_id), prog_build.longLitLengthThreshold, program, build.cc); } @@ -2121,7 +2121,7 @@ RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build, RoseProgram prog; if (!build.isDelayed(lit_id)) { - makeCheckLiteralInstruction(build.literals.right.at(lit_id), + makeCheckLiteralInstruction(build.literals.at(lit_id), prog_build.longLitLengthThreshold, prog, build.cc); } diff --git a/src/rose/rose_build_role_aliasing.cpp b/src/rose/rose_build_role_aliasing.cpp index f8174d74..0e78ec7d 100644 --- a/src/rose/rose_build_role_aliasing.cpp +++ b/src/rose/rose_build_role_aliasing.cpp @@ -328,9 +328,9 @@ bool canMergeLiterals(RoseVertex a, RoseVertex b, const RoseBuildImpl &build) { // Otherwise, all the literals involved must have the same length. for (u32 a_id : lits_a) { - const rose_literal_id &la = build.literals.right.at(a_id); + const rose_literal_id &la = build.literals.at(a_id); for (u32 b_id : lits_b) { - const rose_literal_id &lb = build.literals.right.at(b_id); + const rose_literal_id &lb = build.literals.at(b_id); if (la.elength() != lb.elength()) { DEBUG_PRINTF("bad merge %zu!=%zu '%s', '%s'\n", la.elength(), @@ -1483,7 +1483,7 @@ void splitByLiteralTable(const RoseBuildImpl &build, auto make_split_key = [&](RoseVertex v) { const auto &lits = g[v].literals; assert(!lits.empty()); - return build.literals.right.at(*lits.begin()).table; + return build.literals.at(*lits.begin()).table; }; splitAndFilterBuckets(buckets, make_split_key); } From b30e5021f1bc3f8bbc127764fec1ca205e2f177c Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 27 Apr 2017 13:58:55 +1000 Subject: [PATCH 280/326] Strengthen assert to indicate that q->end is less than q->cur Coverity CID-167665 --- src/rose/match.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rose/match.c b/src/rose/match.c index 488ee5d5..daf81eac 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -150,7 +150,7 @@ hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, } if (top_squash_distance) { - assert(q->cur != q->end); + assert(q->cur < q->end); struct mq_item *last = &q->items[q->end - 1]; if (last->type == event && last->location >= loc - (s64a)top_squash_distance) { From 097d73c7ff33679859eec73f5216fc9713ac4780 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 27 Apr 2017 15:42:02 +1000 Subject: [PATCH 281/326] pass roleStateIndices by reference --- src/rose/rose_build_program.cpp | 18 +++++++++--------- src/rose/rose_build_program.h | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index 6dfe3507..92eeff63 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -1903,7 +1903,7 @@ RoseProgram makeRoleProgram(const RoseBuildImpl &build, const map &suffixes, const map &engine_info_by_queue, lookaround_info &lookarounds, - unordered_map roleStateIndices, + const unordered_map &roleStateIndices, ProgramBuild &prog_build, const RoseEdge &e) { const RoseGraph &g = build.g; auto v = target(e, g); @@ -2026,14 +2026,14 @@ RoseProgram assembleProgramBlocks(vector &&blocks) { } RoseProgram makeLiteralProgram(const RoseBuildImpl &build, - const map &leftfix_info, - const map &suffixes, - const map &engine_info_by_queue, - lookaround_info &lookarounds, - unordered_map roleStateIndices, - ProgramBuild &prog_build, u32 lit_id, - const vector &lit_edges, - bool is_anchored_replay_program) { + const map &leftfix_info, + const map &suffixes, + const map &engine_info_by_queue, + lookaround_info &lookarounds, + const unordered_map &roleStateIndices, + ProgramBuild &prog_build, u32 lit_id, + const vector &lit_edges, + bool is_anchored_replay_program) { const auto &g = build.g; DEBUG_PRINTF("lit id=%u, %zu lit edges\n", lit_id, lit_edges.size()); diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index d6a9e218..d8e542b8 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -262,7 +262,7 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, const std::map &suffixes, const std::map &engine_info_by_queue, lookaround_info &lookarounds, - unordered_map roleStateIndices, + const unordered_map &roleStateIndices, ProgramBuild &prog_build, u32 lit_id, const std::vector &lit_edges, bool is_anchored_replay_program); From 16a00074c695e36d468d2701512f7adfec8b4b70 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 26 Apr 2017 17:22:22 +1000 Subject: [PATCH 282/326] verify_types: throw on failure In release builds, we would like a verify_u32 (etc) failure to be more than just an assertion. --- src/util/verify_types.h | 55 ++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/src/util/verify_types.h b/src/util/verify_types.h index 98c24c99..148b4377 100644 --- a/src/util/verify_types.h +++ b/src/util/verify_types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,45 +30,50 @@ #define UTIL_VERIFY_TYPES #include "ue2common.h" +#include "util/compile_error.h" #include namespace ue2 { -template -static UNUSED u8 verify_u8(Int_T val) { - assert(val == (Int_T)((u8)val)); // there and back again - return (u8)(val); +template +To_T verify_cast(From_T val) { + To_T conv_val = static_cast(val); + if (static_cast(conv_val) != val) { + assert(0); + throw ResourceLimitError(); + } + return conv_val; } -template -static UNUSED s8 verify_s8(Int_T val) { - assert(val == (Int_T)((s8)val)); // there and back again - return (s8)(val); +template +s8 verify_s8(T val) { + return verify_cast(val); } -template -static UNUSED s16 verify_s16(Int_T val) { - assert(val == (Int_T)((s16)val)); // there and back again - return (s16)(val); +template +u8 verify_u8(T val) { + return verify_cast(val); } -template -static UNUSED u16 verify_u16(Int_T val) { - assert(val == (Int_T)((u16)val)); // there and back again - return (u16)(val); +template +s16 verify_s16(T val) { + return verify_cast(val); } -template -static UNUSED s32 verify_s32(Int_T val) { - assert(val == (Int_T)((s32)val)); // there and back again - return (s32)(val); +template +u16 verify_u16(T val) { + return verify_cast(val); } -template -static UNUSED u32 verify_u32(Int_T val) { - assert(val == (Int_T)((u32)val)); // there and back again - return (u32)(val); +template +s32 verify_s32(T val) { + return verify_cast(val); +} + +template +u32 verify_u32(T val) { + return verify_cast(val); } } // namespace ue2 From 1a04d1330e680c5d6c2be4a7a82facc955f19e06 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 1 May 2017 14:21:51 +1000 Subject: [PATCH 283/326] verify_types: add type static assertions --- src/util/verify_types.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/util/verify_types.h b/src/util/verify_types.h index 148b4377..5833d5ec 100644 --- a/src/util/verify_types.h +++ b/src/util/verify_types.h @@ -33,16 +33,25 @@ #include "util/compile_error.h" #include +#include namespace ue2 { template To_T verify_cast(From_T val) { + static_assert(std::is_integral::value, + "Output type must be integral."); + static_assert(std::is_integral::value || + std::is_enum::value || + std::is_convertible::value, + "Must be integral or enum type, or convertible to output."); + To_T conv_val = static_cast(val); if (static_cast(conv_val) != val) { assert(0); throw ResourceLimitError(); } + return conv_val; } From 15c8a7bd98639ad3a4820f196692cc9aa82f0686 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 1 May 2017 16:09:10 +1000 Subject: [PATCH 284/326] rose: rework storage of extra lookaround information - remove explicit lookaround table from bytecode - make the RoseInstr responsible for adding required info to blob --- CMakeLists.txt | 1 + src/rose/program_runtime.h | 16 ++-- src/rose/rose_build_bytecode.cpp | 74 +---------------- src/rose/rose_build_dump.cpp | 19 ++--- src/rose/rose_build_engine_blob.cpp | 117 +++++++++++++++++++++++++++ src/rose/rose_build_engine_blob.h | 18 +++++ src/rose/rose_build_instructions.cpp | 37 +++++++-- src/rose/rose_build_instructions.h | 63 ++++++--------- src/rose/rose_build_program.cpp | 116 +++++--------------------- src/rose/rose_build_program.h | 21 ----- src/rose/rose_internal.h | 4 - src/rose/rose_program.h | 8 +- 12 files changed, 231 insertions(+), 263 deletions(-) create mode 100644 src/rose/rose_build_engine_blob.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index bc42c659..4f5d661f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -948,6 +948,7 @@ SET (hs_SRCS src/rose/rose_build_convert.cpp src/rose/rose_build_convert.h src/rose/rose_build_dedupe.cpp + src/rose/rose_build_engine_blob.cpp src/rose/rose_build_engine_blob.h src/rose/rose_build_exclusive.cpp src/rose/rose_build_exclusive.h diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index dac8345e..c67a4acb 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -1031,8 +1031,7 @@ int roseCheckSingleLookaround(const struct RoseEngine *t, return 0; } - const u8 *reach_base = (const u8 *)t + t->lookaroundReachOffset; - const u8 *reach = reach_base + lookaroundReachIndex; + const u8 *reach = getByOffset(t, lookaroundReachIndex); u8 c; if (offset >= 0 && offset < (s64a)ci->len) { @@ -1069,14 +1068,11 @@ int roseCheckLookaround(const struct RoseEngine *t, DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, ci->buf_offset, ci->buf_offset + ci->len); - const u8 *base = (const u8 *)t; - const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset); - const s8 *look = look_base + lookaroundLookIndex; + const s8 *look = getByOffset(t, lookaroundLookIndex); const s8 *look_end = look + lookaroundCount; assert(look < look_end); - const u8 *reach_base = base + t->lookaroundReachOffset; - const u8 *reach = reach_base + lookaroundReachIndex; + const u8 *reach = getByOffset(t, lookaroundReachIndex); // The following code assumes that the lookaround structures are ordered by // increasing offset. @@ -1166,13 +1162,11 @@ int roseMultipathLookaround(const struct RoseEngine *t, DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, ci->buf_offset, ci->buf_offset + ci->len); - const s8 *look_base = getByOffset(t, t->lookaroundTableOffset); - const s8 *look = look_base + multipathLookaroundLookIndex; + const s8 *look = getByOffset(t, multipathLookaroundLookIndex); const s8 *look_end = look + multipathLookaroundCount; assert(look < look_end); - const u8 *reach_base = getByOffset(t, t->lookaroundReachOffset); - const u8 *reach = reach_base + multipathLookaroundReachIndex; + const u8 *reach = getByOffset(t, multipathLookaroundReachIndex); const s64a base_offset = (s64a)end - ci->buf_offset; DEBUG_PRINTF("base_offset=%lld\n", base_offset); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index dfe4ff63..02304ae2 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -147,8 +147,6 @@ struct build_context : noncopyable { ue2::unordered_map program_cache; - lookaround_info lookarounds; - /** \brief State indices, for those roles that have them. * Each vertex present has a unique state index in the range * [0, roleStateIndices.size()). */ @@ -2428,70 +2426,6 @@ bool hasEodAnchors(const RoseBuildImpl &build, const build_context &bc, return false; } -static -void writeLookaround(const vector &look_vec, s8 *&look, u8 *&reach) { - for (const auto &le : look_vec) { - *look = verify_s8(le.offset); - const CharReach &cr = le.reach; - - assert(cr.any()); // Should be at least one character! - fill_bitvector(cr, reach); - - ++look; - reach += REACH_BITVECTOR_LEN; - } -} - -static -void writeMultipathLookaround(const vector> &multi_look, - s8 *&look, u8 *&reach) { - for (const auto &m : multi_look) { - u8 u = 0; - assert(m.size() == MAX_LOOKAROUND_PATHS); - for (size_t i = 0; i < m.size(); i++) { - if (m[i].reach.none()) { - u |= (u8)1U << i; - } - } - std::fill_n(reach, MULTI_REACH_BITVECTOR_LEN, u); - - for (size_t i = 0; i < m.size(); i++) { - const CharReach &cr = m[i].reach; - if (cr.none()) { - continue; - } - *look = m[i].offset; - - for (size_t c = cr.find_first(); c != cr.npos; - c = cr.find_next(c)) { - reach[c] |= (u8)1U << i; - } - } - - ++look; - reach += MULTI_REACH_BITVECTOR_LEN; - } -} - -static -void writeLookaroundTables(const lookaround_info &lookarounds, - RoseEngineBlob &engine_blob, RoseEngine &proto) { - vector look_table(lookarounds.lookTableSize, 0); - vector reach_table(lookarounds.reachTableSize, 0); - s8 *look = look_table.data(); - u8 *reach = reach_table.data(); - for (const auto &la : lookarounds.table) { - if (la.size() == 1) { - writeLookaround(la.front(), look, reach); - } else { - writeMultipathLookaround(la, look, reach); - } - } - - proto.lookaroundTableOffset = engine_blob.add_range(look_table); - proto.lookaroundReachOffset = engine_blob.add_range(reach_table); -} - static void writeDkeyInfo(const ReportManager &rm, RoseEngineBlob &engine_blob, RoseEngine &proto) { @@ -2752,7 +2686,7 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc, } return makeLiteralProgram(build, bc.leftfix_info, bc.suffixes, - bc.engine_info_by_queue, bc.lookarounds, + bc.engine_info_by_queue, bc.roleStateIndices, prog_build, lit_id, *edges_ptr, is_anchored_replay_program); } @@ -2917,8 +2851,7 @@ void buildLiteralPrograms(const RoseBuildImpl &build, continue; } - auto rebuild_prog = makeDelayRebuildProgram(build, - bc.lookarounds, prog_build, + auto rebuild_prog = makeDelayRebuildProgram(build, prog_build, frag.lit_ids); frag.delay_program_offset = writeProgram(bc, move(rebuild_prog)); } @@ -3181,7 +3114,7 @@ void addEodEventProgram(const RoseBuildImpl &build, build_context &bc, }); auto block = makeLiteralProgram(build, bc.leftfix_info, bc.suffixes, - bc.engine_info_by_queue, bc.lookarounds, + bc.engine_info_by_queue, bc.roleStateIndices, prog_build, build.eod_event_literal_id, edge_list, false); @@ -3555,7 +3488,6 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { addSomRevNfas(bc, proto, ssm); - writeLookaroundTables(bc.lookarounds, bc.engine_blob, proto); writeDkeyInfo(rm, bc.engine_blob, proto); writeLeftInfo(bc.engine_blob, proto, leftInfoTable); diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 7fd19d43..b527db6c 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -625,12 +625,10 @@ void dumpLookaround(ofstream &os, const RoseEngine *t, assert(ri); const u8 *base = (const u8 *)t; - const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset); - const u8 *reach_base = base + t->lookaroundReachOffset; - const s8 *look = look_base + ri->look_index; + const s8 *look = (const s8 *)base + ri->look_index; const s8 *look_end = look + ri->count; - const u8 *reach = reach_base + ri->reach_index; + const u8 *reach = base + ri->reach_index; os << " contents:" << endl; @@ -648,12 +646,10 @@ void dumpMultipathLookaround(ofstream &os, const RoseEngine *t, assert(ri); const u8 *base = (const u8 *)t; - const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset); - const u8 *reach_base = base + t->lookaroundReachOffset; - const s8 *look_begin = look_base + ri->look_index; + const s8 *look_begin = (const s8 *)base + ri->look_index; const s8 *look_end = look_begin + ri->count; - const u8 *reach_begin = reach_base + ri->reach_index; + const u8 *reach_begin = base + ri->reach_index; os << " contents:" << endl; @@ -926,10 +922,7 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { os << " offset " << int{ri->offset} << endl; os << " reach_index " << ri->reach_index << endl; os << " fail_jump " << offset + ri->fail_jump << endl; - const u8 *base = (const u8 *)t; - const u8 *reach_base = base + t->lookaroundReachOffset; - const u8 *reach = reach_base + - ri->reach_index * REACH_BITVECTOR_LEN; + const u8 *reach = (const u8 *)t + ri->reach_index; os << " contents "; describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT); os << endl; @@ -2146,8 +2139,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, handledKeyFatbitSize); DUMP_U32(t, leftOffset); DUMP_U32(t, roseCount); - DUMP_U32(t, lookaroundTableOffset); - DUMP_U32(t, lookaroundReachOffset); DUMP_U32(t, eodProgramOffset); DUMP_U32(t, lastByteHistoryIterOffset); DUMP_U32(t, minWidth); diff --git a/src/rose/rose_build_engine_blob.cpp b/src/rose/rose_build_engine_blob.cpp new file mode 100644 index 00000000..d3957207 --- /dev/null +++ b/src/rose/rose_build_engine_blob.cpp @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rose_build_engine_blob.h" + +#include "rose_build_lookaround.h" +#include "util/charreach_util.h" + +using namespace std; + +namespace ue2 { + +u32 lookaround_info::get_offset_of(const vector> &reaches, + RoseEngineBlob &blob) { + assert(reaches.size() != 1); + + // Check the cache. + auto it = multi_cache.find(reaches); + if (it != multi_cache.end()) { + DEBUG_PRINTF("reusing reach at idx %u\n", it->second); + return it->second; + } + + vector raw_reach(reaches.size() * MULTI_REACH_BITVECTOR_LEN); + size_t off = 0; + for (const auto &m : reaches) { + u8 u = 0; + assert(m.size() == MAX_LOOKAROUND_PATHS); + for (size_t i = 0; i < m.size(); i++) { + if (m[i].none()) { + u |= (u8)1U << i; + } + } + fill_n(raw_reach.data() + off, MULTI_REACH_BITVECTOR_LEN, u); + + for (size_t i = 0; i < m.size(); i++) { + const CharReach &cr = m[i]; + if (cr.none()) { + continue; + } + + for (size_t c = cr.find_first(); c != cr.npos; + c = cr.find_next(c)) { + raw_reach[c + off] |= (u8)1U << i; + } + } + + off += MULTI_REACH_BITVECTOR_LEN; + } + + u32 reach_idx = blob.add_range(raw_reach); + DEBUG_PRINTF("adding reach at idx %u\n", reach_idx); + multi_cache.emplace(reaches, reach_idx); + + return reach_idx; +} + +u32 lookaround_info::get_offset_of(const vector &reach, + RoseEngineBlob &blob) { + if (contains(rcache, reach)) { + u32 offset = rcache[reach]; + DEBUG_PRINTF("reusing reach at idx %u\n", offset); + return offset; + } + + vector raw_reach(reach.size() * REACH_BITVECTOR_LEN); + size_t off = 0; + for (const auto &cr : reach) { + assert(cr.any()); // Should be at least one character! + fill_bitvector(cr, raw_reach.data() + off); + off += REACH_BITVECTOR_LEN; + } + + u32 offset = blob.add_range(raw_reach); + rcache.emplace(reach, offset); + return offset; +} + +u32 lookaround_info::get_offset_of(const vector &look, + RoseEngineBlob &blob) { + if (contains(lcache, look)) { + u32 offset = lcache[look]; + DEBUG_PRINTF("reusing look at idx %u\n", offset); + return offset; + } + + u32 offset = blob.add_range(look); + lcache.emplace(look, offset); + return offset; +} + +} // namespace ue2 diff --git a/src/rose/rose_build_engine_blob.h b/src/rose/rose_build_engine_blob.h index 69e8201e..a22f2dff 100644 --- a/src/rose/rose_build_engine_blob.h +++ b/src/rose/rose_build_engine_blob.h @@ -34,6 +34,7 @@ #include "ue2common.h" #include "util/alloc.h" #include "util/bytecode_ptr.h" +#include "util/charreach.h" #include "util/container.h" #include "util/multibit_build.h" #include "util/noncopyable.h" @@ -45,6 +46,21 @@ namespace ue2 { +class RoseEngineBlob; + +struct lookaround_info : noncopyable { + u32 get_offset_of(const std::vector> &look, + RoseEngineBlob &blob); + u32 get_offset_of(const std::vector &reach, + RoseEngineBlob &blob); + u32 get_offset_of(const std::vector &look, RoseEngineBlob &blob); + +private: + unordered_map>, u32> multi_cache; + unordered_map, u32> lcache; + unordered_map, u32> rcache; +}; + class RoseEngineBlob : noncopyable { public: /** \brief Base offset of engine_blob in the Rose engine bytecode. */ @@ -133,6 +149,8 @@ public: copy_bytes((char *)engine + base_offset, blob); } + lookaround_info lookaround_cache; + private: void pad(size_t align) { assert(ISALIGNED_N(base_offset, align)); diff --git a/src/rose/rose_build_instructions.cpp b/src/rose/rose_build_instructions.cpp index f39fbe98..b00c36be 100644 --- a/src/rose/rose_build_instructions.cpp +++ b/src/rose/rose_build_instructions.cpp @@ -118,7 +118,7 @@ void RoseInstrCheckSingleLookaround::write(void *dest, RoseEngineBlob &blob, RoseInstrBase::write(dest, blob, offset_map); auto *inst = static_cast(dest); inst->offset = offset; - inst->reach_index = reach_index; + inst->reach_index = blob.lookaround_cache.get_offset_of({reach}, blob); inst->fail_jump = calc_jump(offset_map, this, target); } @@ -126,9 +126,15 @@ void RoseInstrCheckLookaround::write(void *dest, RoseEngineBlob &blob, const OffsetMap &offset_map) const { RoseInstrBase::write(dest, blob, offset_map); auto *inst = static_cast(dest); - inst->look_index = look_index; - inst->reach_index = reach_index; - inst->count = count; + vector look_offsets; + vector reaches; + for (const auto &le : look) { + look_offsets.push_back(le.offset); + reaches.push_back(le.reach); + } + inst->look_index = blob.lookaround_cache.get_offset_of(look_offsets, blob); + inst->reach_index = blob.lookaround_cache.get_offset_of(reaches, blob); + inst->count = verify_u32(look.size()); inst->fail_jump = calc_jump(offset_map, this, target); } @@ -532,9 +538,26 @@ void RoseInstrMultipathLookaround::write(void *dest, RoseEngineBlob &blob, const OffsetMap &offset_map) const { RoseInstrBase::write(dest, blob, offset_map); auto *inst = static_cast(dest); - inst->look_index = look_index; - inst->reach_index = reach_index; - inst->count = count; + auto &cache = blob.lookaround_cache; + vector look_offsets; + vector> reaches; + for (const auto &vle : multi_look) { + reaches.push_back({}); + bool done_offset = false; + + for (const auto &le : vle) { + reaches.back().push_back(le.reach); + + /* empty reaches don't have valid offsets */ + if (!done_offset && le.reach.any()) { + look_offsets.push_back(le.offset); + done_offset = true; + } + } + } + inst->look_index = cache.get_offset_of(look_offsets, blob); + inst->reach_index = cache.get_offset_of(reaches, blob); + inst->count = verify_u32(multi_look.size()); inst->last_start = last_start; copy(begin(start_mask), end(start_mask), inst->start_mask); inst->fail_jump = calc_jump(offset_map, this, target); diff --git a/src/rose/rose_build_instructions.h b/src/rose/rose_build_instructions.h index 06d146a5..025f6a67 100644 --- a/src/rose/rose_build_instructions.h +++ b/src/rose/rose_build_instructions.h @@ -37,6 +37,7 @@ #ifndef ROSE_BUILD_INSTRUCTIONS_H #define ROSE_BUILD_INSTRUCTIONS_H +#include "rose_build_lookaround.h" #include "rose_build_program.h" #include "util/verify_types.h" @@ -382,20 +383,19 @@ class RoseInstrCheckSingleLookaround RoseInstrCheckSingleLookaround> { public: s8 offset; - u32 reach_index; + CharReach reach; const RoseInstruction *target; - RoseInstrCheckSingleLookaround(s8 offset_in, u32 reach_index_in, + RoseInstrCheckSingleLookaround(s8 offset_in, CharReach reach_in, const RoseInstruction *target_in) - : offset(offset_in), reach_index(reach_index_in), target(target_in) {} + : offset(offset_in), reach(std::move(reach_in)), target(target_in) {} bool operator==(const RoseInstrCheckSingleLookaround &ri) const { - return offset == ri.offset && reach_index == ri.reach_index && - target == ri.target; + return offset == ri.offset && reach == ri.reach && target == ri.target; } size_t hash() const override { - return hash_all(static_cast(opcode), offset, reach_index); + return hash_all(static_cast(opcode), offset, reach); } void write(void *dest, RoseEngineBlob &blob, @@ -404,7 +404,7 @@ public: bool equiv_to(const RoseInstrCheckSingleLookaround &ri, const OffsetMap &offsets, const OffsetMap &other_offsets) const { - return offset == ri.offset && reach_index == ri.reach_index && + return offset == ri.offset && reach == ri.reach && offsets.at(target) == other_offsets.at(ri.target); } }; @@ -414,24 +414,19 @@ class RoseInstrCheckLookaround ROSE_STRUCT_CHECK_LOOKAROUND, RoseInstrCheckLookaround> { public: - u32 look_index; - u32 reach_index; - u32 count; + std::vector look; const RoseInstruction *target; - RoseInstrCheckLookaround(u32 look_index_in, u32 reach_index_in, - u32 count_in, const RoseInstruction *target_in) - : look_index(look_index_in), reach_index(reach_index_in), - count(count_in), target(target_in) {} + RoseInstrCheckLookaround(std::vector look_in, + const RoseInstruction *target_in) + : look(std::move(look_in)), target(target_in) {} bool operator==(const RoseInstrCheckLookaround &ri) const { - return look_index == ri.look_index && reach_index == ri.reach_index && - count == ri.count && target == ri.target; + return look == ri.look && target == ri.target; } size_t hash() const override { - return hash_all(static_cast(opcode), look_index, reach_index, - count); + return hash_all(static_cast(opcode), look); } void write(void *dest, RoseEngineBlob &blob, @@ -439,9 +434,8 @@ public: bool equiv_to(const RoseInstrCheckLookaround &ri, const OffsetMap &offsets, const OffsetMap &other_offsets) const { - return look_index == ri.look_index && reach_index == ri.reach_index && - count == ri.count && - offsets.at(target) == other_offsets.at(ri.target); + return look == ri.look + && offsets.at(target) == other_offsets.at(ri.target); } }; @@ -1837,30 +1831,26 @@ class RoseInstrMultipathLookaround ROSE_STRUCT_MULTIPATH_LOOKAROUND, RoseInstrMultipathLookaround> { public: - u32 look_index; - u32 reach_index; - u32 count; + std::vector> multi_look; s32 last_start; std::array start_mask; const RoseInstruction *target; - RoseInstrMultipathLookaround(u32 look_index_in, u32 reach_index_in, - u32 count_in, s32 last_start_in, + RoseInstrMultipathLookaround(std::vector> ml, + s32 last_start_in, std::array start_mask_in, const RoseInstruction *target_in) - : look_index(look_index_in), reach_index(reach_index_in), - count(count_in), last_start(last_start_in), + : multi_look(std::move(ml)), last_start(last_start_in), start_mask(std::move(start_mask_in)), target(target_in) {} bool operator==(const RoseInstrMultipathLookaround &ri) const { - return look_index == ri.look_index && reach_index == ri.reach_index && - count == ri.count && last_start == ri.last_start && - start_mask == ri.start_mask && target == ri.target; + return multi_look == ri.multi_look && last_start == ri.last_start + && start_mask == ri.start_mask && target == ri.target; } size_t hash() const override { - return hash_all(static_cast(opcode), look_index, reach_index, - count, last_start, start_mask); + return hash_all(static_cast(opcode), multi_look, last_start, + start_mask); } void write(void *dest, RoseEngineBlob &blob, @@ -1869,10 +1859,9 @@ public: bool equiv_to(const RoseInstrMultipathLookaround &ri, const OffsetMap &offsets, const OffsetMap &other_offsets) const { - return look_index == ri.look_index && reach_index == ri.reach_index && - count == ri.count && last_start == ri.last_start && - start_mask == ri.start_mask && - offsets.at(target) == other_offsets.at(ri.target); + return multi_look == ri.multi_look && last_start == ri.last_start + && start_mask == ri.start_mask + && offsets.at(target) == other_offsets.at(ri.target); } }; diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index 92eeff63..eb9db5a6 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -28,6 +28,7 @@ #include "rose_build_program.h" +#include "rose_build_engine_blob.h" #include "rose_build_instructions.h" #include "rose_build_lookaround.h" #include "rose_build_resources.h" @@ -39,7 +40,6 @@ #include "util/container.h" #include "util/compile_context.h" #include "util/compile_error.h" -#include "util/dump_charclass.h" #include "util/report_manager.h" #include "util/verify_types.h" @@ -851,40 +851,6 @@ void makeRoleGroups(const RoseGraph &g, ProgramBuild &prog_build, program.add_before_end(make_unique(groups)); } -static -void addLookaround(lookaround_info &lookarounds, - const vector> &look, - u32 &look_index, u32 &reach_index) { - // Check the cache. - auto it = lookarounds.cache.find(look); - if (it != lookarounds.cache.end()) { - look_index = verify_u32(it->second.first); - reach_index = verify_u32(it->second.second); - DEBUG_PRINTF("reusing look at idx %u\n", look_index); - DEBUG_PRINTF("reusing reach at idx %u\n", reach_index); - return; - } - - size_t look_idx = lookarounds.lookTableSize; - size_t reach_idx = lookarounds.reachTableSize; - - if (look.size() == 1) { - lookarounds.lookTableSize += look.front().size(); - lookarounds.reachTableSize += look.front().size() * REACH_BITVECTOR_LEN; - } else { - lookarounds.lookTableSize += look.size(); - lookarounds.reachTableSize += look.size() * MULTI_REACH_BITVECTOR_LEN; - } - - lookarounds.cache.emplace(look, make_pair(look_idx, reach_idx)); - lookarounds.table.emplace_back(look); - - DEBUG_PRINTF("adding look at idx %zu\n", look_idx); - DEBUG_PRINTF("adding reach at idx %zu\n", reach_idx); - look_index = verify_u32(look_idx); - reach_index = verify_u32(reach_idx); -} - static bool checkReachMask(const CharReach &cr, u8 &andmask, u8 &cmpmask) { size_t reach_size = cr.count(); @@ -1278,8 +1244,7 @@ bool makeRoleShufti(const vector &look, RoseProgram &program) { * available. */ static -void makeLookaroundInstruction(lookaround_info &lookarounds, - const vector &look, +void makeLookaroundInstruction(const vector &look, RoseProgram &program) { assert(!look.empty()); @@ -1289,12 +1254,8 @@ void makeLookaroundInstruction(lookaround_info &lookarounds, if (look.size() == 1) { s8 offset = look.begin()->offset; - u32 look_idx, reach_idx; - vector> lookaround; - lookaround.emplace_back(look); - addLookaround(lookarounds, lookaround, look_idx, reach_idx); - // We don't need look_idx here. - auto ri = make_unique(offset, reach_idx, + const CharReach &reach = look.begin()->reach; + auto ri = make_unique(offset, reach, program.end_instruction()); program.add_before_end(move(ri)); return; @@ -1312,21 +1273,13 @@ void makeLookaroundInstruction(lookaround_info &lookarounds, return; } - u32 look_idx, reach_idx; - vector> lookaround; - lookaround.emplace_back(look); - addLookaround(lookarounds, lookaround, look_idx, reach_idx); - u32 look_count = verify_u32(look.size()); - - auto ri = make_unique(look_idx, reach_idx, - look_count, + auto ri = make_unique(look, program.end_instruction()); program.add_before_end(move(ri)); } static -void makeCheckLitMaskInstruction(const RoseBuildImpl &build, - lookaround_info &lookarounds, u32 lit_id, +void makeCheckLitMaskInstruction(const RoseBuildImpl &build, u32 lit_id, RoseProgram &program) { const auto &info = build.literal_info.at(lit_id); if (!info.requires_benefits) { @@ -1348,7 +1301,7 @@ void makeCheckLitMaskInstruction(const RoseBuildImpl &build, } assert(!look.empty()); - makeLookaroundInstruction(lookarounds, look, program); + makeLookaroundInstruction(look, program); } static @@ -1417,7 +1370,6 @@ bool hasDelayedLiteral(const RoseBuildImpl &build, static RoseProgram makeLitInitialProgram(const RoseBuildImpl &build, - lookaround_info &lookarounds, ProgramBuild &prog_build, u32 lit_id, const vector &lit_edges, bool is_anchored_replay_program) { @@ -1431,7 +1383,7 @@ RoseProgram makeLitInitialProgram(const RoseBuildImpl &build, } // Check lit mask. - makeCheckLitMaskInstruction(build, lookarounds, lit_id, program); + makeCheckLitMaskInstruction(build, lit_id, program); // Check literal groups. This is an optimisation that we only perform for // delayed literals, as their groups may be switched off; ordinarily, we @@ -1458,20 +1410,6 @@ RoseProgram makeLitInitialProgram(const RoseBuildImpl &build, return program; } -#if defined(DEBUG) || defined(DUMP_SUPPORT) -static UNUSED -string dumpMultiLook(const vector &looks) { - ostringstream oss; - for (auto it = looks.begin(); it != looks.end(); ++it) { - if (it != looks.begin()) { - oss << ", "; - } - oss << "{" << int(it->offset) << ": " << describeClass(it->reach) << "}"; - } - return oss.str(); -} -#endif - static bool makeRoleMultipathShufti(const vector> &multi_look, RoseProgram &program) { @@ -1612,8 +1550,7 @@ bool makeRoleMultipathShufti(const vector> &multi_look, } static -void makeRoleMultipathLookaround(lookaround_info &lookarounds, - const vector> &multi_look, +void makeRoleMultipathLookaround(const vector> &multi_look, RoseProgram &program) { assert(!multi_look.empty()); assert(multi_look.size() <= MAX_LOOKAROUND_PATHS); @@ -1675,13 +1612,8 @@ void makeRoleMultipathLookaround(lookaround_info &lookarounds, ordered_look.emplace_back(multi_entry); } - u32 look_idx, reach_idx; - addLookaround(lookarounds, ordered_look, look_idx, reach_idx); - u32 look_count = verify_u32(ordered_look.size()); - - auto ri = make_unique(look_idx, reach_idx, - look_count, last_start, - start_mask, + auto ri = make_unique(move(ordered_look), + last_start, start_mask, program.end_instruction()); program.add_before_end(move(ri)); } @@ -1689,8 +1621,7 @@ void makeRoleMultipathLookaround(lookaround_info &lookarounds, static void makeRoleLookaround(const RoseBuildImpl &build, const map &leftfix_info, - lookaround_info &lookarounds, RoseVertex v, - RoseProgram &program) { + RoseVertex v, RoseProgram &program) { if (!build.cc.grey.roseLookaroundMasks) { return; } @@ -1714,14 +1645,14 @@ void makeRoleLookaround(const RoseBuildImpl &build, findLookaroundMasks(build, v, look_more); mergeLookaround(look, look_more); if (!look.empty()) { - makeLookaroundInstruction(lookarounds, look, program); + makeLookaroundInstruction(look, program); } return; } if (!makeRoleMultipathShufti(looks, program)) { assert(looks.size() <= 8); - makeRoleMultipathLookaround(lookarounds, looks, program); + makeRoleMultipathLookaround(looks, program); } } @@ -1902,7 +1833,6 @@ RoseProgram makeRoleProgram(const RoseBuildImpl &build, const map &leftfix_info, const map &suffixes, const map &engine_info_by_queue, - lookaround_info &lookarounds, const unordered_map &roleStateIndices, ProgramBuild &prog_build, const RoseEdge &e) { const RoseGraph &g = build.g; @@ -1929,7 +1859,7 @@ RoseProgram makeRoleProgram(const RoseBuildImpl &build, makeRoleCheckNotHandled(prog_build, v, program); } - makeRoleLookaround(build, leftfix_info, lookarounds, v, program); + makeRoleLookaround(build, leftfix_info, v, program); makeRoleCheckLeftfix(build, leftfix_info, v, program); // Next, we can add program instructions that have effects. This must be @@ -2029,7 +1959,6 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, const map &leftfix_info, const map &suffixes, const map &engine_info_by_queue, - lookaround_info &lookarounds, const unordered_map &roleStateIndices, ProgramBuild &prog_build, u32 lit_id, const vector &lit_edges, @@ -2040,8 +1969,8 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, // Construct initial program up front, as its early checks must be able // to jump to end and terminate processing for this literal. - auto lit_program = makeLitInitialProgram(build, lookarounds, prog_build, - lit_id, lit_edges, + auto lit_program = makeLitInitialProgram(build, prog_build, lit_id, + lit_edges, is_anchored_replay_program); RoseProgram role_programs; @@ -2060,8 +1989,8 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, assert(contains(roleStateIndices, u)); u32 pred_state = roleStateIndices.at(u); auto role_prog = makeRoleProgram(build, leftfix_info, suffixes, - engine_info_by_queue, lookarounds, - roleStateIndices, prog_build, e); + engine_info_by_queue, roleStateIndices, + prog_build, e); if (!role_prog.empty()) { pred_blocks[pred_state].add_block(move(role_prog)); } @@ -2080,8 +2009,8 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].index, g[target(e, g)].index); auto role_prog = makeRoleProgram(build, leftfix_info, suffixes, - engine_info_by_queue, lookarounds, - roleStateIndices, prog_build, e); + engine_info_by_queue, roleStateIndices, + prog_build, e); role_programs.add_block(move(role_prog)); } @@ -2104,7 +2033,6 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, } RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build, - lookaround_info &lookarounds, ProgramBuild &prog_build, const vector &lit_ids) { assert(!lit_ids.empty()); @@ -2126,7 +2054,7 @@ RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build, build.cc); } - makeCheckLitMaskInstruction(build, lookarounds, lit_id, prog); + makeCheckLitMaskInstruction(build, lit_id, prog); makePushDelayedInstructions(build.literals, prog_build, build.literal_info.at(lit_id).delayed_ids, prog); diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index d8e542b8..8758ef64 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -214,25 +214,6 @@ struct left_build_info { std::vector> lookaround; }; -struct lookaround_info : noncopyable { - /** \brief LookEntry list cache, so that we can reuse the look index and - * reach index for the same lookaround. */ - ue2::unordered_map>, - std::pair> cache; - - /** \brief Lookaround table for Rose roles. */ - std::vector>> table; - - /** \brief Lookaround look table size. */ - size_t lookTableSize = 0; - - /** \brief Lookaround reach table size. - * since single path lookaround and multi-path lookaround have different - * bitvectors range (32 and 256), we need to maintain both look table size - * and reach table size. */ - size_t reachTableSize = 0; -}; - /** * \brief Provides a brief summary of properties of an NFA that has already been * finalised and stored in the blob. @@ -261,14 +242,12 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, const std::map &leftfix_info, const std::map &suffixes, const std::map &engine_info_by_queue, - lookaround_info &lookarounds, const unordered_map &roleStateIndices, ProgramBuild &prog_build, u32 lit_id, const std::vector &lit_edges, bool is_anchored_replay_program); RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build, - lookaround_info &lookarounds, ProgramBuild &prog_build, const std::vector &lit_ids); diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 777e7234..57395c9d 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -383,10 +383,6 @@ struct RoseEngine { u32 leftOffset; u32 roseCount; - u32 lookaroundTableOffset; //!< base of lookaround offset list (of s8 values) - u32 lookaroundReachOffset; /**< base of lookaround reach bitvectors (32 - * bytes for single-path lookaround and 256 bytes - * for multi-path lookaround) */ u32 eodProgramOffset; //!< EOD program, otherwise 0. diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index cdfe96ac..78b123d5 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -231,8 +231,8 @@ struct ROSE_STRUCT_CHECK_SINGLE_LOOKAROUND { struct ROSE_STRUCT_CHECK_LOOKAROUND { u8 code; //!< From enum RoseInstructionCode. - u32 look_index; //!< Index for lookaround offset list. - u32 reach_index; //!< Index for lookaround reach bitvectors. + u32 look_index; //!< Offset in bytecode of lookaround offset list. + u32 reach_index; //!< Offset in bytecode of lookaround reach bitvectors. u32 count; //!< The count of lookaround entries in one instruction. u32 fail_jump; //!< Jump forward this many bytes on failure. }; @@ -561,8 +561,8 @@ struct ROSE_STRUCT_CLEAR_WORK_DONE { struct ROSE_STRUCT_MULTIPATH_LOOKAROUND { u8 code; //!< From enum RoseInstructionCode. - u32 look_index; //!< Index for lookaround offset list. - u32 reach_index; //!< Index for lookaround reach bitvectors. + u32 look_index; //!< Offset in bytecode of lookaround offset list. + u32 reach_index; //!< Offset in bytecode of lookaround reach bitvectors. u32 count; //!< The lookaround byte numbers for each path. s32 last_start; //!< The latest start offset among 8 paths. u8 start_mask[MULTIPATH_MAX_LEN]; /*!< Used to initialize path if left-most From dfe1b8a2afea013b1661d0ef20d1e56db3587959 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 1 May 2017 14:57:05 +1000 Subject: [PATCH 285/326] ng_depth: rename calcDepth functions, return vec --- src/nfagraph/ng_calc_components.cpp | 3 +-- src/nfagraph/ng_depth.cpp | 36 ++++++++++++++----------- src/nfagraph/ng_depth.h | 42 ++++++++++++++--------------- src/nfagraph/ng_equivalence.cpp | 4 +-- src/nfagraph/ng_expr_info.cpp | 3 +-- src/nfagraph/ng_extparam.cpp | 6 ++--- src/nfagraph/ng_fuzzy.cpp | 4 +-- src/nfagraph/ng_puff.cpp | 8 +++--- src/nfagraph/ng_repeat.cpp | 7 +++-- src/nfagraph/ng_som_util.cpp | 4 +-- src/nfagraph/ng_stop.cpp | 9 +++---- src/nfagraph/ng_violet.cpp | 9 +++---- src/rose/rose_build_add.cpp | 3 +-- src/smallwrite/smallwrite_build.cpp | 3 +-- 14 files changed, 65 insertions(+), 76 deletions(-) diff --git a/src/nfagraph/ng_calc_components.cpp b/src/nfagraph/ng_calc_components.cpp index 54221c7b..bfe73eb2 100644 --- a/src/nfagraph/ng_calc_components.cpp +++ b/src/nfagraph/ng_calc_components.cpp @@ -235,8 +235,7 @@ void splitIntoComponents(unique_ptr g, *shell_comp = false; // Compute "shell" head and tail subgraphs. - vector depths; - calcDepths(*g, depths); + auto depths = calcBidiDepths(*g); auto head_shell = findHeadShell(*g, depths, max_head_depth); auto tail_shell = findTailShell(*g, depths, max_tail_depth); for (auto v : head_shell) { diff --git a/src/nfagraph/ng_depth.cpp b/src/nfagraph/ng_depth.cpp index 63e0e46b..6c45fa2f 100644 --- a/src/nfagraph/ng_depth.cpp +++ b/src/nfagraph/ng_depth.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,7 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief NFA graph vertex depth calculations. */ #include "ng_depth.h" @@ -269,12 +270,11 @@ void calcAndStoreDepth(const Graph &g, } } -void calcDepths(const NGHolder &g, std::vector &depths) { +vector calcDepths(const NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); const size_t numVertices = num_vertices(g); - depths.clear(); - depths.resize(numVertices); + vector depths(numVertices); vector dMin; vector dMax; @@ -291,14 +291,15 @@ void calcDepths(const NGHolder &g, std::vector &depths) { DEBUG_PRINTF("doing startds\n"); calcAndStoreDepth(g, g.startDs, deadNodes, dMin, dMax, depths, &NFAVertexDepth::fromStartDotStar); + + return depths; } -void calcDepths(const NGHolder &g, std::vector &depths) { +vector calcRevDepths(const NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); const size_t numVertices = num_vertices(g); - depths.clear(); - depths.resize(numVertices); + vector depths(numVertices); vector dMin; vector dMax; @@ -324,14 +325,15 @@ void calcDepths(const NGHolder &g, std::vector &depths) { calcAndStoreDepth( rg, g.acceptEod, deadNodes, dMin, dMax, depths, &NFAVertexRevDepth::toAcceptEod); + + return depths; } -void calcDepths(const NGHolder &g, vector &depths) { +vector calcBidiDepths(const NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); const size_t numVertices = num_vertices(g); - depths.clear(); - depths.resize(numVertices); + vector depths(numVertices); vector dMin; vector dMax; @@ -366,10 +368,11 @@ void calcDepths(const NGHolder &g, vector &depths) { calcAndStoreDepth( rg, g.acceptEod, deadNodes, dMin, dMax, depths, &NFAVertexBidiDepth::toAcceptEod); + + return depths; } -void calcDepthsFrom(const NGHolder &g, const NFAVertex src, - vector &depths) { +vector calcDepthsFrom(const NGHolder &g, const NFAVertex src) { assert(hasCorrectlyNumberedVertices(g)); const size_t numVertices = num_vertices(g); @@ -379,13 +382,14 @@ void calcDepthsFrom(const NGHolder &g, const NFAVertex src, vector dMin, dMax; calcDepthFromSource(g, src, deadNodes, dMin, dMax); - depths.clear(); - depths.resize(numVertices); + vector depths(numVertices); for (auto v : vertices_range(g)) { - u32 idx = g[v].index; + auto idx = g[v].index; depths.at(idx) = getDepths(idx, dMin, dMax); } + + return depths; } } // namespace ue2 diff --git a/src/nfagraph/ng_depth.h b/src/nfagraph/ng_depth.h index 16231ea1..36cca87e 100644 --- a/src/nfagraph/ng_depth.h +++ b/src/nfagraph/ng_depth.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,23 +26,22 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file +/** + * \file * \brief NFA graph vertex depth calculations. */ -#ifndef STRUCTURAL_ANALYSIS_H -#define STRUCTURAL_ANALYSIS_H +#ifndef NG_DEPTH_H +#define NG_DEPTH_H -#include "nfagraph/ng_holder.h" #include "ue2common.h" +#include "nfagraph/ng_holder.h" #include "util/depth.h" #include namespace ue2 { -class NGHolder; - /** * \brief Encapsulates min/max depths relative to the start and startDs * vertices. @@ -72,28 +71,29 @@ struct NFAVertexBidiDepth { }; /** - * \brief Calculate depths from start and startDs. - * Fills the vector \p depths (indexed by \p vertex_index). + * \brief Calculate depths from start and startDs. Returns them in a vector, + * indexed by vertex index. */ -void calcDepths(const NGHolder &g, std::vector &depths); +std::vector calcDepths(const NGHolder &g); /** - * \brief Calculate depths to accept and acceptEod. - * Fills the vector \p depths (indexed by \p vertex_index). + * \brief Calculate depths to accept and acceptEod. Returns them in a vector, + * indexed by vertex index. */ -void calcDepths(const NGHolder &g, std::vector &depths); +std::vector calcRevDepths(const NGHolder &g); /** - * \brief Calculate depths to/from all special vertices. - * Fills the vector \p depths (indexed by \p vertex_index). + * \brief Calculate depths to/from all special vertices. Returns them in a + * vector, indexed by vertex index. */ -void calcDepths(const NGHolder &g, std::vector &depths); +std::vector calcBidiDepths(const NGHolder &g); -/** Calculate the (min, max) depths from the given \p src to every vertex in - * the graph and return them in a vector, indexed by \p vertex_index. */ -void calcDepthsFrom(const NGHolder &g, const NFAVertex src, - std::vector &depths); +/** + * \brief Calculate the (min, max) depths from the given \p src to every vertex + * in the graph and return them in a vector, indexed by \p vertex_index. + */ +std::vector calcDepthsFrom(const NGHolder &g, const NFAVertex src); } // namespace ue2 -#endif +#endif // NG_DEPTH_H diff --git a/src/nfagraph/ng_equivalence.cpp b/src/nfagraph/ng_equivalence.cpp index b9e2bd0d..f03a6629 100644 --- a/src/nfagraph/ng_equivalence.cpp +++ b/src/nfagraph/ng_equivalence.cpp @@ -342,9 +342,9 @@ vector partitionGraph(vector> &infos, vector rdepths; if (eq == LEFT_EQUIVALENCE) { - calcDepths(g, depths); + depths = calcDepths(g); } else { - calcDepths(g, rdepths); + rdepths = calcRevDepths(g); } // partition the graph based on CharReach diff --git a/src/nfagraph/ng_expr_info.cpp b/src/nfagraph/ng_expr_info.cpp index 9417b674..6a625ddf 100644 --- a/src/nfagraph/ng_expr_info.cpp +++ b/src/nfagraph/ng_expr_info.cpp @@ -147,8 +147,7 @@ void fillExpressionInfo(ReportManager &rm, NGHolder &g, removeLeadingVirtualVerticesFromRoot(g, g.start); removeLeadingVirtualVerticesFromRoot(g, g.startDs); - vector depths; - calcDepthsFrom(g, g.start, depths); + auto depths = calcDepthsFrom(g, g.start); DepthMinMax d; diff --git a/src/nfagraph/ng_extparam.cpp b/src/nfagraph/ng_extparam.cpp index 19fa2295..bc7f81ef 100644 --- a/src/nfagraph/ng_extparam.cpp +++ b/src/nfagraph/ng_extparam.cpp @@ -734,8 +734,7 @@ void pruneExtUnreachable(NGHolder &g, const ReportManager &rm) { const auto &report = rm.getReport(*reports.begin()); - vector depths; - calcDepths(g, depths); + auto depths = calcBidiDepths(g); vector dead; @@ -957,8 +956,7 @@ void replaceMinLengthWithOffset(NGHolder &g, ReportManager &rm) { */ static void removeUnneededOffsetBounds(NGHolder &g, ReportManager &rm) { - vector depths; - calcDepths(g, depths); + auto depths = calcDepths(g); replaceReports(g, [&](NFAVertex v, ReportID id) { const auto &d = depths.at(g[v].index); diff --git a/src/nfagraph/ng_fuzzy.cpp b/src/nfagraph/ng_fuzzy.cpp index fc468126..2c3d85bd 100644 --- a/src/nfagraph/ng_fuzzy.cpp +++ b/src/nfagraph/ng_fuzzy.cpp @@ -603,9 +603,7 @@ private: // check if we will edit our way into a vacuous pattern static bool will_turn_vacuous(const NGHolder &g, u32 edit_distance) { - vector depths; - - calcDepths(g, depths); + auto depths = calcRevDepths(g); depth min_depth = depth::infinity(); auto idx = g[g.start].index; diff --git a/src/nfagraph/ng_puff.cpp b/src/nfagraph/ng_puff.cpp index 7281471f..984518b0 100644 --- a/src/nfagraph/ng_puff.cpp +++ b/src/nfagraph/ng_puff.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -94,8 +94,7 @@ void wireNewAccepts(NGHolder &g, NFAVertex head, static bool isFixedDepth(const NGHolder &g, NFAVertex v) { // If the vertex is reachable from startDs, it can't be fixed depth. - vector depthFromStartDs; - calcDepthsFrom(g, g.startDs, depthFromStartDs); + auto depthFromStartDs = calcDepthsFrom(g, g.startDs); u32 idx = g[v].index; const DepthMinMax &ds = depthFromStartDs.at(idx); @@ -104,8 +103,7 @@ bool isFixedDepth(const NGHolder &g, NFAVertex v) { return false; } - vector depthFromStart; - calcDepthsFrom(g, g.start, depthFromStart); + auto depthFromStart = calcDepthsFrom(g, g.start); /* we can still consider the head of a puff chain as at fixed depth if * it has a self-loop: so we look at all the preds of v (other than v diff --git a/src/nfagraph/ng_repeat.cpp b/src/nfagraph/ng_repeat.cpp index c51618ea..60ad2200 100644 --- a/src/nfagraph/ng_repeat.cpp +++ b/src/nfagraph/ng_repeat.cpp @@ -118,13 +118,12 @@ struct ReachSubgraph { static void findInitDepths(const NGHolder &g, ue2::unordered_map &depths) { - vector d; - calcDepths(g, d); + auto d = calcDepths(g); for (auto v : vertices_range(g)) { - u32 idx = g[v].index; + size_t idx = g[v].index; assert(idx < d.size()); - depths.insert(make_pair(v, d[idx])); + depths.emplace(v, d[idx]); } } diff --git a/src/nfagraph/ng_som_util.cpp b/src/nfagraph/ng_som_util.cpp index 78a39119..a3b6ee5f 100644 --- a/src/nfagraph/ng_som_util.cpp +++ b/src/nfagraph/ng_som_util.cpp @@ -78,8 +78,8 @@ vector getDistancesFromSOM(const NGHolder &g_orig) { //dumpGraph("som_depth.dot", g); - vector temp_depths; // numbered by vertex index in g - calcDepthsFrom(g, g.start, temp_depths); + // Find depths, indexed by vertex index in g + auto temp_depths = calcDepthsFrom(g, g.start); // Transfer depths, indexed by vertex index in g_orig. vector depths(num_vertices(g_orig)); diff --git a/src/nfagraph/ng_stop.cpp b/src/nfagraph/ng_stop.cpp index e601f541..c335540a 100644 --- a/src/nfagraph/ng_stop.cpp +++ b/src/nfagraph/ng_stop.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -60,10 +60,9 @@ namespace { /** Depths from start, startDs for this graph. */ struct InitDepths { - explicit InitDepths(const NGHolder &g) { - calcDepthsFrom(g, g.start, start); - calcDepthsFrom(g, g.startDs, startDs); - } + explicit InitDepths(const NGHolder &g) + : start(calcDepthsFrom(g, g.start)), + startDs(calcDepthsFrom(g, g.startDs)) {} depth maxDist(const NGHolder &g, NFAVertex v) const { u32 idx = g[v].index; diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index e2825643..4195045c 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -1472,8 +1472,7 @@ void avoidOutfixes(RoseInGraph &vg, bool last_chance, if (last_chance) { /* look for a prefix split as it allows us to accept very weak anchored * literals. */ - vector depths; - calcDepths(h, depths); + auto depths = calcDepths(h); split = findBestPrefixSplit(h, depths, vg, {e}, last_chance, cc); @@ -1973,8 +1972,7 @@ bool improvePrefix(NGHolder &h, RoseInGraph &vg, const vector &ee, renumber_vertices(h); renumber_edges(h); - vector depths; - calcDepths(h, depths); + auto depths = calcDepths(h); /* If the reason the prefix is not transient is due to a very long literal * following, we can make it transient by restricting ourselves to using @@ -2856,8 +2854,7 @@ bool splitForImplementabilty(RoseInGraph &vg, NGHolder &h, unique_ptr split; bool last_chance = true; if (h.kind == NFA_PREFIX) { - vector depths; - calcDepths(h, depths); + auto depths = calcDepths(h); split = findBestPrefixSplit(h, depths, vg, edges, last_chance, cc); } else { diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index 26f88445..4c895caf 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -1928,8 +1928,7 @@ void removeAddedLiterals(RoseBuildImpl &tbi, const flat_set &lit_ids) { } bool RoseBuildImpl::addAnchoredAcyclic(const NGHolder &h) { - vector vertexDepths; - calcDepthsFrom(h, h.start, vertexDepths); + auto vertexDepths = calcDepthsFrom(h, h.start); map > reportMap; /* NFAVertex -> literal ids */ map depthMap; /* literal id -> min/max depth */ diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index 4acfc713..ba2f244d 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -198,8 +198,7 @@ static bool pruneOverlong(NGHolder &g, const depth &max_depth, const ReportManager &rm) { bool modified = false; - std::vector depths; - calcDepths(g, depths); + auto depths = calcDepths(g); for (auto v : vertices_range(g)) { if (is_special(v, g)) { From c17085ba350d4706393114f93e06bdc32b33cba9 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 1 May 2017 15:08:06 +1000 Subject: [PATCH 286/326] ng_depth: modernize findLoopReachable --- src/nfagraph/ng_depth.cpp | 40 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/src/nfagraph/ng_depth.cpp b/src/nfagraph/ng_depth.cpp index 6c45fa2f..67a6b27b 100644 --- a/src/nfagraph/ng_depth.cpp +++ b/src/nfagraph/ng_depth.cpp @@ -124,34 +124,35 @@ private: } // namespace -template +template static -void findLoopReachable(const GraphT &g, - const typename GraphT::vertex_descriptor srcVertex, - vector &deadNodes) { - typedef typename GraphT::edge_descriptor EdgeT; - typedef typename GraphT::vertex_descriptor VertexT; - typedef set EdgeSet; +vector findLoopReachable(const Graph &g, + const typename Graph::vertex_descriptor src) { + vector deadNodes(num_vertices(g)); + + using Edge = typename Graph::edge_descriptor; + using Vertex = typename Graph::vertex_descriptor; + using EdgeSet = set; EdgeSet deadEdges; BackEdges be(deadEdges); - depth_first_search(g, visitor(be).root_vertex(srcVertex)); + depth_first_search(g, visitor(be).root_vertex(src)); auto af = make_bad_edge_filter(&deadEdges); auto acyclic_g = make_filtered_graph(g, af); - vector topoOrder; /* actually reverse topological order */ + vector topoOrder; /* actually reverse topological order */ topoOrder.reserve(deadNodes.size()); topological_sort(acyclic_g, back_inserter(topoOrder)); for (const auto &e : deadEdges) { - u32 srcIdx = g[source(e, g)].index; + size_t srcIdx = g[source(e, g)].index; if (srcIdx != NODE_START_DOTSTAR) { deadNodes[srcIdx] = true; } } - for (VertexT v : reverse(topoOrder)) { + for (auto v : reverse(topoOrder)) { for (const auto &e : in_edges_range(v, g)) { if (deadNodes[g[source(e, g)].index]) { deadNodes[g[v].index] = true; @@ -159,6 +160,8 @@ void findLoopReachable(const GraphT &g, } } } + + return deadNodes; } template @@ -282,8 +285,7 @@ vector calcDepths(const NGHolder &g) { * create a filtered graph for max depth calculations: all nodes/edges * reachable from a loop need to be removed */ - vector deadNodes(numVertices); - findLoopReachable(g, g.start, deadNodes); + auto deadNodes = findLoopReachable(g, g.start); DEBUG_PRINTF("doing start\n"); calcAndStoreDepth(g, g.start, deadNodes, dMin, dMax, depths, @@ -313,8 +315,7 @@ vector calcRevDepths(const NGHolder &g) { * create a filtered graph for max depth calculations: all nodes/edges * reachable from a loop need to be removed */ - vector deadNodes(numVertices); - findLoopReachable(rg, g.acceptEod, deadNodes); + auto deadNodes = findLoopReachable(rg, g.acceptEod); DEBUG_PRINTF("doing accept\n"); calcAndStoreDepth( @@ -341,8 +342,7 @@ vector calcBidiDepths(const NGHolder &g) { * create a filtered graph for max depth calculations: all nodes/edges * reachable from a loop need to be removed */ - vector deadNodes(numVertices); - findLoopReachable(g, g.start, deadNodes); + auto deadNodes = findLoopReachable(g, g.start); DEBUG_PRINTF("doing start\n"); calcAndStoreDepth( @@ -356,8 +356,7 @@ vector calcBidiDepths(const NGHolder &g) { /* Now go backwards */ typedef reverse_graph RevNFAGraph; const RevNFAGraph rg(g); - deadNodes.assign(numVertices, false); - findLoopReachable(rg, g.acceptEod, deadNodes); + deadNodes = findLoopReachable(rg, g.acceptEod); DEBUG_PRINTF("doing accept\n"); calcAndStoreDepth( @@ -376,8 +375,7 @@ vector calcDepthsFrom(const NGHolder &g, const NFAVertex src) { assert(hasCorrectlyNumberedVertices(g)); const size_t numVertices = num_vertices(g); - vector deadNodes(numVertices); - findLoopReachable(g, g.start, deadNodes); + auto deadNodes = findLoopReachable(g, g.start); vector dMin, dMax; calcDepthFromSource(g, src, deadNodes, dMin, dMax); From 725de51f897fc1737314f829fe5673abfc79c57a Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 1 May 2017 11:33:37 +1000 Subject: [PATCH 287/326] determinisation: cleanups, remove shrinkStateSet --- src/nfagraph/ng_mcclellan.cpp | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/src/nfagraph/ng_mcclellan.cpp b/src/nfagraph/ng_mcclellan.cpp index e061084e..9448a0bf 100644 --- a/src/nfagraph/ng_mcclellan.cpp +++ b/src/nfagraph/ng_mcclellan.cpp @@ -379,7 +379,9 @@ public: NFAVertex v = sq.first; u32 vert_id = graph[v].index; squash.set(vert_id); - squash_mask[vert_id] = shrinkStateSet(sq.second); + squash_mask[vert_id] + = Automaton_Traits::copy_states(std::move(sq.second), + numStates); } cr_by_index = populateCR(graph, v_by_index, alpha); @@ -387,21 +389,11 @@ public: dynamic_bitset<> temp(numStates); markToppableStarts(graph, unused, single_trigger, triggers, &temp); - toppable = Automaton_Traits::copy_states(temp, numStates); + toppable = Automaton_Traits::copy_states(std::move(temp), + numStates); } } -private: - // Convert an NFAStateSet (as used by the squash code) into a StateSet - StateSet shrinkStateSet(const NFAStateSet &in) const { - StateSet out = Automaton_Traits::init_states(numStates); - for (size_t i = in.find_first(); i != in.npos && i < out.size(); - i = in.find_next(i)) { - out.set(i); - } - return out; - } - public: void transition(const StateSet &in, StateSet *next) { transition_graph(*this, v_by_index, in, next); @@ -475,7 +467,7 @@ struct Big_Traits { return StateSet(num); } - static StateSet copy_states(const dynamic_bitset<> &in, UNUSED u32 num) { + static StateSet copy_states(dynamic_bitset<> in, UNUSED u32 num) { assert(in.size() == num); return in; } From 29ad557b9cf1d502abfe7a2b2887ebea636903c3 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 1 May 2017 13:31:09 +1000 Subject: [PATCH 288/326] smwr: more aggressive pruning of overlong paths in NFA --- src/smallwrite/smallwrite_build.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index ba2f244d..43900207 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -198,15 +198,16 @@ static bool pruneOverlong(NGHolder &g, const depth &max_depth, const ReportManager &rm) { bool modified = false; - auto depths = calcDepths(g); + auto depths = calcBidiDepths(g); for (auto v : vertices_range(g)) { if (is_special(v, g)) { continue; } const auto &d = depths.at(g[v].index); - depth min_depth = min(d.fromStart.min, d.fromStartDotStar.min); - if (min_depth > max_depth) { + depth min_match_offset = min(d.fromStart.min, d.fromStartDotStar.min) + + min(d.toAccept.min, d.toAcceptEod.min); + if (min_match_offset > max_depth) { clear_vertex(v, g); modified = true; continue; From 4a417c42e5cfb8a726254853493b13e12ffc2ee3 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 2 May 2017 10:20:59 +1000 Subject: [PATCH 289/326] smallwrite: remove unnecessary assertion The alpha remap array is always big enough to remap characters. Silences Coverity issue CID 167663. --- src/smallwrite/smallwrite_build.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index 43900207..bb933cbe 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -698,7 +698,6 @@ unique_ptr buildDfa(LitTrie &trie, bool nocase) { continue; } auto v_state = state_ids.at(v); - assert((u16)trie[v].c < alpha.size()); u16 sym = alpha[trie[v].c]; DEBUG_PRINTF("edge to %u on 0x%02x (sym %u)\n", v_state, trie[v].c, sym); From 923e602601d02f3ef5b2ec5084bf2044da946848 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 2 May 2017 11:54:22 +1000 Subject: [PATCH 290/326] ng_equivalence: use flat_set for VertexInfoSet --- src/nfagraph/ng_equivalence.cpp | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/src/nfagraph/ng_equivalence.cpp b/src/nfagraph/ng_equivalence.cpp index f03a6629..438e5ea8 100644 --- a/src/nfagraph/ng_equivalence.cpp +++ b/src/nfagraph/ng_equivalence.cpp @@ -63,10 +63,10 @@ class VertexInfo; struct VertexInfoPtrCmp { // for flat_set bool operator()(const VertexInfo *a, const VertexInfo *b) const; - // for unordered_set - size_t operator()(const VertexInfo *a) const; }; +using VertexInfoSet = flat_set; + /** Precalculated (and maintained) information about a vertex. */ class VertexInfo { public: @@ -74,8 +74,8 @@ public: : v(v_in), vert_index(g[v].index), cr(g[v].char_reach), equivalence_class(~0), vertex_flags(g[v].assert_flags) {} - flat_set pred; //!< predecessors of this vertex - flat_set succ; //!< successors of this vertex + VertexInfoSet pred; //!< predecessors of this vertex + VertexInfoSet succ; //!< successors of this vertex NFAVertex v; size_t vert_index; CharReach cr; @@ -86,21 +86,11 @@ public: unsigned vertex_flags; }; -} - -typedef ue2::unordered_set VertexInfoSet; - // compare two vertex info pointers on their vertex index bool VertexInfoPtrCmp::operator()(const VertexInfo *a, const VertexInfo *b) const { return a->vert_index < b->vert_index; } -// provide a "hash" for vertex info pointer by returning its vertex index -size_t VertexInfoPtrCmp::operator()(const VertexInfo *a) const { - return a->vert_index; -} - -namespace { // to avoid traversing infomap each time we need to check the class during // partitioning, we will cache the information pertaining to a particular class From c510b85bf1ca19660ce22c010143405eda20f196 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 3 May 2017 14:47:01 +1000 Subject: [PATCH 291/326] whitespace changes --- src/hs_common.h | 4 ++-- src/nfagraph/ng_som.cpp | 4 ++-- src/ue2common.h | 6 +++--- src/util/make_unique.h | 6 +++--- unit/internal/bitutils.cpp | 30 +++++++++++++++--------------- 5 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/hs_common.h b/src/hs_common.h index b25b1842..fac08253 100644 --- a/src/hs_common.h +++ b/src/hs_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -545,7 +545,7 @@ hs_error_t hs_valid_platform(void); * At a minimum, Hyperscan requires Supplemental Streaming SIMD Extensions 3 * (SSSE3). */ -#define HS_ARCH_ERROR (-11) +#define HS_ARCH_ERROR (-11) /** @} */ diff --git a/src/nfagraph/ng_som.cpp b/src/nfagraph/ng_som.cpp index 6481eff7..67438103 100644 --- a/src/nfagraph/ng_som.cpp +++ b/src/nfagraph/ng_som.cpp @@ -1734,8 +1734,8 @@ namespace { struct SomRevNfa { SomRevNfa(NFAVertex s, ReportID r, bytecode_ptr n) : sink(s), report(r), nfa(move(n)) {} - SomRevNfa(SomRevNfa&& s) // MSVC2013 needs this for emplace - : sink(s.sink), report(s.report), nfa(move(s.nfa)) {} + SomRevNfa(SomRevNfa &&s) // MSVC2013 needs this for emplace + : sink(s.sink), report(s.report), nfa(move(s.nfa)) {} NFAVertex sink; ReportID report; bytecode_ptr nfa; diff --git a/src/ue2common.h b/src/ue2common.h index e1f03f72..4bec8315 100644 --- a/src/ue2common.h +++ b/src/ue2common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -189,8 +189,8 @@ typedef u32 ReportID; #define unlikely(x) __builtin_expect(!!(x), 0) #endif #else -#define likely(x) (x) -#define unlikely(x) (x) +#define likely(x) (x) +#define unlikely(x) (x) #endif #if !defined(RELEASE_BUILD) || defined(DEBUG) diff --git a/src/util/make_unique.h b/src/util/make_unique.h index 12148af1..651e8c5c 100644 --- a/src/util/make_unique.h +++ b/src/util/make_unique.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,9 +39,9 @@ namespace ue2 { #if defined(USE_STD) - using std::make_unique; +using std::make_unique; #else - using boost::make_unique; +using boost::make_unique; #endif } diff --git a/unit/internal/bitutils.cpp b/unit/internal/bitutils.cpp index 7241c0b8..3f788544 100644 --- a/unit/internal/bitutils.cpp +++ b/unit/internal/bitutils.cpp @@ -36,26 +36,26 @@ // open coded implementations to test against static u32 our_clz(u32 x) { - u32 n; + u32 n; - if (x == 0) return(32); - n = 0; - if (x <= 0x0000FFFF) { n = n + 16; x = x << 16; } - if (x <= 0x00FFFFFF) { n = n + 8; x = x << 8; } - if (x <= 0x0FFFFFFF) { n = n + 4; x = x << 4; } - if (x <= 0x3FFFFFFF) { n = n + 2; x = x << 2; } - if (x <= 0x7FFFFFFF) { n = n + 1; } - return n; + if (x == 0) return(32); + n = 0; + if (x <= 0x0000FFFF) { n = n + 16; x = x << 16; } + if (x <= 0x00FFFFFF) { n = n + 8; x = x << 8; } + if (x <= 0x0FFFFFFF) { n = n + 4; x = x << 4; } + if (x <= 0x3FFFFFFF) { n = n + 2; x = x << 2; } + if (x <= 0x7FFFFFFF) { n = n + 1; } + return n; } static u32 our_clzll(u64a x) { - // Synthesise from 32-bit variant. - u32 high = x >> 32; - if (high) { - return our_clz(high); - } - return 32 + our_clz(x); + // Synthesise from 32-bit variant. + u32 high = x >> 32; + if (high) { + return our_clz(high); + } + return 32 + our_clz(x); } From e12298568e64d9d51b3d3ffcdb7c0639a64a6b14 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Thu, 4 May 2017 10:37:45 +1000 Subject: [PATCH 292/326] hsbench: use boost:crc for db filename --- tools/hsbench/engine_hyperscan.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/hsbench/engine_hyperscan.cpp b/tools/hsbench/engine_hyperscan.cpp index eadc1cc4..9674e5c8 100644 --- a/tools/hsbench/engine_hyperscan.cpp +++ b/tools/hsbench/engine_hyperscan.cpp @@ -36,7 +36,6 @@ #include "huge.h" #include "timer.h" -#include "crc32.h" #include "database.h" #include "hs_compile.h" #include "hs_internal.h" @@ -46,12 +45,15 @@ #include #include +#include #include #include #include #include #include +#include + using namespace std; EngineContext::EngineContext(const hs_database_t *db) { @@ -230,11 +232,13 @@ string dbSettingsHash(const string &filename, u32 mode) { string info = info_oss.str(); - u32 crc = Crc32c_ComputeBuf(0, info.data(), info.size()); + boost::crc_32_type crc; + + crc.process_bytes(info.data(), info.length()); // return STL string with printable version of digest ostringstream oss; - oss << hex << setw(8) << setfill('0') << crc << dec; + oss << hex << setw(8) << setfill('0') << crc.checksum() << dec; return oss.str(); } From 2b1a7da188a99657439461c04091c7db3e1f5695 Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Mon, 8 May 2017 10:51:19 +1000 Subject: [PATCH 293/326] deterministic assembleProgramBlocks() --- src/rose/rose_build_bytecode.cpp | 1 - src/rose/rose_build_program.cpp | 38 ++++++++++++++++++++++++-------- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 02304ae2..4d0793bf 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2819,7 +2819,6 @@ vector groupByFragment(const RoseBuildImpl &build) { auto &fi = m.second; DEBUG_PRINTF("frag %s -> ids: %s\n", dumpString(m.first.s).c_str(), as_string_list(fi.lit_ids).c_str()); - sort(fi.lit_ids.begin(), fi.lit_ids.end()); /* to match old behaviour */ fragments.emplace_back(frag_id, fi.groups, move(fi.lit_ids)); frag_id++; assert(frag_id == fragments.size()); diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index eb9db5a6..23a8b959 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -1923,17 +1923,37 @@ void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 lit_id, prog.add_before_end(make_unique(~info.group_mask)); } -RoseProgram assembleProgramBlocks(vector &&blocks) { - DEBUG_PRINTF("%zu blocks before dedupe\n", blocks.size()); +namespace { +struct ProgKey { + ProgKey(const RoseProgram &p) : prog(&p) { } - sort(blocks.begin(), blocks.end(), - [](const RoseProgram &a, const RoseProgram &b) { - RoseProgramHash hasher; - return hasher(a) < hasher(b); - }); + bool operator==(const ProgKey &b) const { + return RoseProgramEquivalence()(*prog, *b.prog); + } - blocks.erase(unique(blocks.begin(), blocks.end(), RoseProgramEquivalence()), - blocks.end()); + friend size_t hash_value(const ProgKey &a) { + return RoseProgramHash()(*a.prog); + } +private: + const RoseProgram *prog; +}; +} + +RoseProgram assembleProgramBlocks(vector &&blocks_in) { + DEBUG_PRINTF("%zu blocks before dedupe\n", blocks_in.size()); + + vector blocks; + blocks.reserve(blocks_in.size()); /* to ensure stable reference for seen */ + + unordered_set seen; + for (auto &block : blocks_in) { + if (contains(seen, block)) { + continue; + } + + blocks.push_back(move(block)); + seen.emplace(blocks.back()); + } DEBUG_PRINTF("%zu blocks after dedupe\n", blocks.size()); From 423569ec82dab9df013de67c169a9e436a6e67f7 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Fri, 31 Mar 2017 10:38:03 +1100 Subject: [PATCH 294/326] De-multiaccel --- CMakeLists.txt | 23 -- src/nfa/accel.c | 219 +------------ src/nfa/accel.h | 61 +--- src/nfa/accel_dump.cpp | 103 +----- src/nfa/accelcompile.cpp | 273 +--------------- src/nfa/accelcompile.h | 30 +- src/nfa/limex_accel.c | 3 - src/nfa/limex_compile.cpp | 59 +--- src/nfa/multiaccel_common.h | 265 ---------------- src/nfa/multiaccel_compilehelper.cpp | 439 -------------------------- src/nfa/multiaccel_compilehelper.h | 75 ----- src/nfa/multiaccel_doubleshift.h | 149 --------- src/nfa/multiaccel_doubleshiftgrab.h | 152 --------- src/nfa/multiaccel_long.h | 145 --------- src/nfa/multiaccel_longgrab.h | 148 --------- src/nfa/multiaccel_shift.h | 145 --------- src/nfa/multiaccel_shiftgrab.h | 148 --------- src/nfa/multishufti.c | 115 ------- src/nfa/multishufti.h | 70 ----- src/nfa/multishufti_avx2.h | 121 ------- src/nfa/multishufti_sse.h | 265 ---------------- src/nfa/multitruffle.c | 111 ------- src/nfa/multitruffle.h | 73 ----- src/nfa/multitruffle_avx2.h | 125 -------- src/nfa/multitruffle_sse.h | 265 ---------------- src/nfa/multivermicelli.c | 109 ------- src/nfa/multivermicelli.h | 62 ---- src/nfa/multivermicelli_avx2.h | 283 ----------------- src/nfa/multivermicelli_sse.h | 452 --------------------------- src/nfa/shufti.c | 96 +++++- src/nfa/shufti_common.h | 146 --------- src/nfa/truffle.c | 110 ++++++- src/nfa/truffle_common.h | 147 --------- src/nfagraph/ng_limex_accel.cpp | 129 -------- src/nfagraph/ng_limex_accel.h | 10 +- unit/CMakeLists.txt | 2 - unit/internal/multiaccel_matcher.cpp | 301 ------------------ unit/internal/multiaccel_shift.cpp | 81 ----- 38 files changed, 217 insertions(+), 5293 deletions(-) delete mode 100644 src/nfa/multiaccel_common.h delete mode 100644 src/nfa/multiaccel_compilehelper.cpp delete mode 100644 src/nfa/multiaccel_compilehelper.h delete mode 100644 src/nfa/multiaccel_doubleshift.h delete mode 100644 src/nfa/multiaccel_doubleshiftgrab.h delete mode 100644 src/nfa/multiaccel_long.h delete mode 100644 src/nfa/multiaccel_longgrab.h delete mode 100644 src/nfa/multiaccel_shift.h delete mode 100644 src/nfa/multiaccel_shiftgrab.h delete mode 100644 src/nfa/multishufti.c delete mode 100644 src/nfa/multishufti.h delete mode 100644 src/nfa/multishufti_avx2.h delete mode 100644 src/nfa/multishufti_sse.h delete mode 100644 src/nfa/multitruffle.c delete mode 100644 src/nfa/multitruffle.h delete mode 100644 src/nfa/multitruffle_avx2.h delete mode 100644 src/nfa/multitruffle_sse.h delete mode 100644 src/nfa/multivermicelli.c delete mode 100644 src/nfa/multivermicelli.h delete mode 100644 src/nfa/multivermicelli_avx2.h delete mode 100644 src/nfa/multivermicelli_sse.h delete mode 100644 src/nfa/shufti_common.h delete mode 100644 src/nfa/truffle_common.h delete mode 100644 unit/internal/multiaccel_matcher.cpp delete mode 100644 unit/internal/multiaccel_shift.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 4f5d661f..93f3c152 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -557,25 +557,6 @@ set (hs_exec_SRCS src/nfa/mpv.h src/nfa/mpv.c src/nfa/mpv_internal.h - src/nfa/multiaccel_common.h - src/nfa/multiaccel_doubleshift.h - src/nfa/multiaccel_doubleshiftgrab.h - src/nfa/multiaccel_long.h - src/nfa/multiaccel_longgrab.h - src/nfa/multiaccel_shift.h - src/nfa/multiaccel_shiftgrab.h - src/nfa/multishufti.c - src/nfa/multishufti_avx2.h - src/nfa/multishufti_sse.h - src/nfa/multishufti.h - src/nfa/multitruffle.c - src/nfa/multitruffle_avx2.h - src/nfa/multitruffle_sse.h - src/nfa/multitruffle.h - src/nfa/multivermicelli.c - src/nfa/multivermicelli.h - src/nfa/multivermicelli_sse.h - src/nfa/multivermicelli_avx2.h src/nfa/nfa_api.h src/nfa/nfa_api_dispatch.c src/nfa/nfa_internal.h @@ -589,13 +570,11 @@ set (hs_exec_SRCS src/nfa/sheng_impl.h src/nfa/sheng_impl4.h src/nfa/sheng_internal.h - src/nfa/shufti_common.h src/nfa/shufti.c src/nfa/shufti.h src/nfa/tamarama.c src/nfa/tamarama.h src/nfa/tamarama_internal.h - src/nfa/truffle_common.h src/nfa/truffle.c src/nfa/truffle.h src/nfa/vermicelli.h @@ -736,8 +715,6 @@ SET (hs_SRCS src/nfa/mpv_internal.h src/nfa/mpvcompile.cpp src/nfa/mpvcompile.h - src/nfa/multiaccel_compilehelper.cpp - src/nfa/multiaccel_compilehelper.h src/nfa/nfa_api.h src/nfa/nfa_api_queue.h src/nfa/nfa_api_util.h diff --git a/src/nfa/accel.c b/src/nfa/accel.c index 99eab11d..2bc60945 100644 --- a/src/nfa/accel.c +++ b/src/nfa/accel.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,9 +30,6 @@ #include "shufti.h" #include "truffle.h" #include "vermicelli.h" -#include "multishufti.h" -#include "multitruffle.h" -#include "multivermicelli.h" #include "ue2common.h" const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) { @@ -132,220 +129,6 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) { rv = c_end; break; - /* multibyte matchers */ - case ACCEL_MLVERM: - DEBUG_PRINTF("accel mlverm %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = long_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len); - break; - case ACCEL_MLVERM_NOCASE: - DEBUG_PRINTF("accel mlverm nc %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = long_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len); - break; - case ACCEL_MLGVERM: - DEBUG_PRINTF("accel mlgverm %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = longgrab_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len); - break; - case ACCEL_MLGVERM_NOCASE: - DEBUG_PRINTF("accel mlgverm nc %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = longgrab_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len); - break; - case ACCEL_MSVERM: - DEBUG_PRINTF("accel msverm %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = shift_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len); - break; - case ACCEL_MSVERM_NOCASE: - DEBUG_PRINTF("accel msverm nc %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = shift_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len); - break; - case ACCEL_MSGVERM: - DEBUG_PRINTF("accel msgverm %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = shiftgrab_vermicelliExec(accel->mverm.c, 0, c, c_end, accel->mverm.len); - break; - case ACCEL_MSGVERM_NOCASE: - DEBUG_PRINTF("accel msgverm nc %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = shiftgrab_vermicelliExec(accel->mverm.c, 1, c, c_end, accel->mverm.len); - break; - case ACCEL_MDSVERM: - DEBUG_PRINTF("accel mdsverm %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = doubleshift_vermicelliExec(accel->mdverm.c, 0, c, c_end, - accel->mdverm.len1, accel->mdverm.len2); - break; - case ACCEL_MDSVERM_NOCASE: - DEBUG_PRINTF("accel mdsverm nc %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = doubleshift_vermicelliExec(accel->mdverm.c, 1, c, c_end, - accel->mdverm.len1, accel->mdverm.len2); - break; - case ACCEL_MDSGVERM: - DEBUG_PRINTF("accel mdsgverm %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = doubleshiftgrab_vermicelliExec(accel->mdverm.c, 0, c, c_end, - accel->mdverm.len1, accel->mdverm.len2); - break; - case ACCEL_MDSGVERM_NOCASE: - DEBUG_PRINTF("accel mdsgverm nc %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = doubleshiftgrab_vermicelliExec(accel->mdverm.c, 1, c, c_end, - accel->mdverm.len1, accel->mdverm.len2); - break; - case ACCEL_MLSHUFTI: - DEBUG_PRINTF("accel mlshufti %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = long_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end, - accel->mshufti.len); - break; - case ACCEL_MLGSHUFTI: - DEBUG_PRINTF("accel mlgshufti %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = longgrab_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end, - accel->mshufti.len); - break; - case ACCEL_MSSHUFTI: - DEBUG_PRINTF("accel msshufti %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = shift_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end, - accel->mshufti.len); - break; - case ACCEL_MSGSHUFTI: - DEBUG_PRINTF("accel msgshufti %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = shiftgrab_shuftiExec(accel->mshufti.lo, accel->mshufti.hi, c, c_end, - accel->mshufti.len); - break; - case ACCEL_MDSSHUFTI: - DEBUG_PRINTF("accel mdsshufti %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = doubleshift_shuftiExec(accel->mdshufti.lo, accel->mdshufti.hi, c, c_end, - accel->mdshufti.len1, accel->mdshufti.len2); - break; - case ACCEL_MDSGSHUFTI: - DEBUG_PRINTF("accel msgshufti %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = doubleshiftgrab_shuftiExec(accel->mdshufti.lo, accel->mdshufti.hi, c, c_end, - accel->mdshufti.len1, accel->mdshufti.len2); - break; - case ACCEL_MLTRUFFLE: - DEBUG_PRINTF("accel mltruffle %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = long_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2, - c, c_end, accel->mtruffle.len); - break; - case ACCEL_MLGTRUFFLE: - DEBUG_PRINTF("accel mlgtruffle %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = longgrab_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2, - c, c_end, accel->mtruffle.len); - break; - case ACCEL_MSTRUFFLE: - DEBUG_PRINTF("accel mstruffle %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = shift_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2, - c, c_end, accel->mtruffle.len); - break; - case ACCEL_MSGTRUFFLE: - DEBUG_PRINTF("accel msgtruffle %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = shiftgrab_truffleExec(accel->mtruffle.mask1, accel->mtruffle.mask2, - c, c_end, accel->mtruffle.len); - break; - case ACCEL_MDSTRUFFLE: - DEBUG_PRINTF("accel mdstruffle %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = doubleshift_truffleExec(accel->mdtruffle.mask1, - accel->mdtruffle.mask2, c, c_end, - accel->mdtruffle.len1, - accel->mdtruffle.len2); - break; - case ACCEL_MDSGTRUFFLE: - DEBUG_PRINTF("accel mdsgtruffle %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = doubleshiftgrab_truffleExec(accel->mdtruffle.mask1, - accel->mdtruffle.mask2, c, c_end, - accel->mdtruffle.len1, - accel->mdtruffle.len2); - break; - default: assert(!"not here"); diff --git a/src/nfa/accel.h b/src/nfa/accel.h index a13563b6..3a03d059 100644 --- a/src/nfa/accel.h +++ b/src/nfa/accel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -61,36 +61,7 @@ enum AccelType { ACCEL_DSHUFTI, ACCEL_TRUFFLE, ACCEL_RED_TAPE, - /* multibyte vermicellis */ - ACCEL_MLVERM, - ACCEL_MLVERM_NOCASE, - ACCEL_MLGVERM, - ACCEL_MLGVERM_NOCASE, - ACCEL_MSVERM, - ACCEL_MSVERM_NOCASE, - ACCEL_MSGVERM, - ACCEL_MSGVERM_NOCASE, - ACCEL_MDSVERM, - ACCEL_MDSVERM_NOCASE, - ACCEL_MDSGVERM, - ACCEL_MDSGVERM_NOCASE, - /* multibyte shuftis */ - ACCEL_MLSHUFTI, - ACCEL_MLGSHUFTI, - ACCEL_MSSHUFTI, - ACCEL_MSGSHUFTI, - ACCEL_MDSSHUFTI, - ACCEL_MDSGSHUFTI, - /* multibyte truffles */ - ACCEL_MLTRUFFLE, - ACCEL_MLGTRUFFLE, - ACCEL_MSTRUFFLE, - ACCEL_MSGTRUFFLE, - ACCEL_MDSTRUFFLE, - ACCEL_MDSGTRUFFLE, - /* masked dverm */ ACCEL_DVERM_MASKED, - }; /** \brief Structure for accel framework. */ @@ -140,42 +111,12 @@ union AccelAux { m128 lo2; m128 hi2; } dshufti; - struct { - u8 accel_type; - u8 offset; - m128 lo; - m128 hi; - u8 len; - } mshufti; - struct { - u8 accel_type; - u8 offset; - m128 lo; - m128 hi; - u8 len1; - u8 len2; - } mdshufti; struct { u8 accel_type; u8 offset; m128 mask1; m128 mask2; } truffle; - struct { - u8 accel_type; - u8 offset; - m128 mask1; - m128 mask2; - u8 len; - } mtruffle; - struct { - u8 accel_type; - u8 offset; - m128 mask1; - m128 mask2; - u8 len1; - u8 len2; - } mdtruffle; }; /** diff --git a/src/nfa/accel_dump.cpp b/src/nfa/accel_dump.cpp index e99e71a5..0d19fa8c 100644 --- a/src/nfa/accel_dump.cpp +++ b/src/nfa/accel_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -93,54 +93,6 @@ const char *accelName(u8 accel_type) { return "truffle"; case ACCEL_RED_TAPE: return "red tape"; - case ACCEL_MLVERM: - return "multibyte long vermicelli"; - case ACCEL_MLVERM_NOCASE: - return "multibyte long vermicelli nocase"; - case ACCEL_MLGVERM: - return "multibyte long-grab vermicelli"; - case ACCEL_MLGVERM_NOCASE: - return "multibyte long-grab vermicelli nocase"; - case ACCEL_MSVERM: - return "multibyte shift vermicelli"; - case ACCEL_MSVERM_NOCASE: - return "multibyte shift vermicelli nocase"; - case ACCEL_MSGVERM: - return "multibyte shift-grab vermicelli"; - case ACCEL_MSGVERM_NOCASE: - return "multibyte shift-grab vermicelli nocase"; - case ACCEL_MDSVERM: - return "multibyte doubleshift vermicelli"; - case ACCEL_MDSVERM_NOCASE: - return "multibyte doubleshift vermicelli nocase"; - case ACCEL_MDSGVERM: - return "multibyte doubleshift-grab vermicelli"; - case ACCEL_MDSGVERM_NOCASE: - return "multibyte doubleshift-grab vermicelli nocase"; - case ACCEL_MLSHUFTI: - return "multibyte long shufti"; - case ACCEL_MLGSHUFTI: - return "multibyte long-grab shufti"; - case ACCEL_MSSHUFTI: - return "multibyte shift shufti"; - case ACCEL_MSGSHUFTI: - return "multibyte shift-grab shufti"; - case ACCEL_MDSSHUFTI: - return "multibyte doubleshift shufti"; - case ACCEL_MDSGSHUFTI: - return "multibyte doubleshift-grab shufti"; - case ACCEL_MLTRUFFLE: - return "multibyte long truffle"; - case ACCEL_MLGTRUFFLE: - return "multibyte long-grab truffle"; - case ACCEL_MSTRUFFLE: - return "multibyte shift truffle"; - case ACCEL_MSGTRUFFLE: - return "multibyte shift-grab truffle"; - case ACCEL_MDSTRUFFLE: - return "multibyte doubleshift truffle"; - case ACCEL_MDSGTRUFFLE: - return "multibyte doubleshift-grab truffle"; default: return "unknown!"; } @@ -283,59 +235,6 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) { (const u8 *)&accel.truffle.mask2); break; } - case ACCEL_MLVERM: - case ACCEL_MLVERM_NOCASE: - case ACCEL_MLGVERM: - case ACCEL_MLGVERM_NOCASE: - case ACCEL_MSVERM: - case ACCEL_MSVERM_NOCASE: - case ACCEL_MSGVERM: - case ACCEL_MSGVERM_NOCASE: - fprintf(f, " [\\x%02hhx] len:%u\n", accel.mverm.c, accel.mverm.len); - break; - case ACCEL_MDSVERM: - case ACCEL_MDSVERM_NOCASE: - case ACCEL_MDSGVERM: - case ACCEL_MDSGVERM_NOCASE: - fprintf(f, " [\\x%02hhx] len1:%u len2:%u\n", accel.mdverm.c, accel.mdverm.len1, - accel.mdverm.len2); - break; - case ACCEL_MLSHUFTI: - case ACCEL_MLGSHUFTI: - case ACCEL_MSSHUFTI: - case ACCEL_MSGSHUFTI: - fprintf(f, " len:%u\n", accel.mshufti.len); - dumpShuftiMasks(f, (const u8 *)&accel.mshufti.lo, - (const u8 *)&accel.mshufti.hi); - dumpShuftiCharReach(f, (const u8 *)&accel.mshufti.lo, - (const u8 *)&accel.mshufti.hi); - break; - case ACCEL_MDSSHUFTI: - case ACCEL_MDSGSHUFTI: - fprintf(f, " len1:%u len2:%u\n", accel.mdshufti.len1, accel.mdshufti.len2); - dumpShuftiMasks(f, (const u8 *)&accel.mdshufti.lo, - (const u8 *)&accel.mdshufti.hi); - dumpShuftiCharReach(f, (const u8 *)&accel.mdshufti.lo, - (const u8 *)&accel.mdshufti.hi); - break; - case ACCEL_MLTRUFFLE: - case ACCEL_MLGTRUFFLE: - case ACCEL_MSTRUFFLE: - case ACCEL_MSGTRUFFLE: - fprintf(f, " len:%u\n", accel.mtruffle.len); - dumpTruffleMasks(f, (const u8 *)&accel.mtruffle.mask1, - (const u8 *)&accel.mtruffle.mask2); - dumpTruffleCharReach(f, (const u8 *)&accel.mtruffle.mask1, - (const u8 *)&accel.mtruffle.mask2); - break; - case ACCEL_MDSTRUFFLE: - case ACCEL_MDSGTRUFFLE: - fprintf(f, " len1:%u len2:%u\n", accel.mdtruffle.len1, accel.mdtruffle.len2); - dumpTruffleMasks(f, (const u8 *)&accel.mdtruffle.mask1, - (const u8 *)&accel.mdtruffle.mask2); - dumpTruffleCharReach(f, (const u8 *)&accel.mdtruffle.mask1, - (const u8 *)&accel.mdtruffle.mask2); - break; default: fprintf(f, "\n"); break; diff --git a/src/nfa/accelcompile.cpp b/src/nfa/accelcompile.cpp index 32e569ba..a224410d 100644 --- a/src/nfa/accelcompile.cpp +++ b/src/nfa/accelcompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -225,274 +225,6 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) { aux->accel_type = ACCEL_NONE; } -static -void buildAccelMulti(const AccelInfo &info, AccelAux *aux) { - if (info.ma_type == MultibyteAccelInfo::MAT_NONE) { - DEBUG_PRINTF("no multimatch for us :("); - return; - } - - u32 offset = info.multiaccel_offset; - const CharReach &stops = info.multiaccel_stops; - - assert(aux->accel_type == ACCEL_NONE); - if (stops.all()) { - return; - } - - size_t outs = stops.count(); - DEBUG_PRINTF("%zu outs\n", outs); - assert(outs && outs < 256); - - switch (info.ma_type) { - case MultibyteAccelInfo::MAT_LONG: - if (outs == 1) { - aux->accel_type = ACCEL_MLVERM; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first(); - aux->mverm.len = info.ma_len1; - DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); - return; - } - if (outs == 2 && stops.isCaselessChar()) { - aux->accel_type = ACCEL_MLVERM_NOCASE; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first() & CASE_CLEAR; - aux->mverm.len = info.ma_len1; - DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", - aux->verm.c); - return; - } - break; - case MultibyteAccelInfo::MAT_LONGGRAB: - if (outs == 1) { - aux->accel_type = ACCEL_MLGVERM; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first(); - aux->mverm.len = info.ma_len1; - DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); - return; - } - if (outs == 2 && stops.isCaselessChar()) { - aux->accel_type = ACCEL_MLGVERM_NOCASE; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first() & CASE_CLEAR; - aux->mverm.len = info.ma_len1; - DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", - aux->verm.c); - return; - } - break; - case MultibyteAccelInfo::MAT_SHIFT: - if (outs == 1) { - aux->accel_type = ACCEL_MSVERM; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first(); - aux->mverm.len = info.ma_len1; - DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); - return; - } - if (outs == 2 && stops.isCaselessChar()) { - aux->accel_type = ACCEL_MSVERM_NOCASE; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first() & CASE_CLEAR; - aux->mverm.len = info.ma_len1; - DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", - aux->verm.c); - return; - } - break; - case MultibyteAccelInfo::MAT_SHIFTGRAB: - if (outs == 1) { - aux->accel_type = ACCEL_MSGVERM; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first(); - aux->mverm.len = info.ma_len1; - DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); - return; - } - if (outs == 2 && stops.isCaselessChar()) { - aux->accel_type = ACCEL_MSGVERM_NOCASE; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first() & CASE_CLEAR; - aux->mverm.len = info.ma_len1; - DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", - aux->verm.c); - return; - } - break; - case MultibyteAccelInfo::MAT_DSHIFT: - if (outs == 1) { - aux->accel_type = ACCEL_MDSVERM; - aux->mdverm.offset = offset; - aux->mdverm.c = stops.find_first(); - aux->mdverm.len1 = info.ma_len1; - aux->mdverm.len2 = info.ma_len2; - DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); - return; - } - if (outs == 2 && stops.isCaselessChar()) { - aux->accel_type = ACCEL_MDSVERM_NOCASE; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first() & CASE_CLEAR; - aux->mdverm.len1 = info.ma_len1; - aux->mdverm.len2 = info.ma_len2; - DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", - aux->verm.c); - return; - } - break; - case MultibyteAccelInfo::MAT_DSHIFTGRAB: - if (outs == 1) { - aux->accel_type = ACCEL_MDSGVERM; - aux->mdverm.offset = offset; - aux->mdverm.c = stops.find_first(); - aux->mdverm.len1 = info.ma_len1; - aux->mdverm.len2 = info.ma_len2; - DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); - return; - } - if (outs == 2 && stops.isCaselessChar()) { - aux->accel_type = ACCEL_MDSGVERM_NOCASE; - aux->mverm.offset = offset; - aux->mverm.c = stops.find_first() & CASE_CLEAR; - aux->mdverm.len1 = info.ma_len1; - aux->mdverm.len2 = info.ma_len2; - DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", - aux->verm.c); - return; - } - break; - default: - // shouldn't happen - assert(0); - return; - } - - DEBUG_PRINTF("attempting shufti for %zu chars\n", outs); - - switch (info.ma_type) { - case MultibyteAccelInfo::MAT_LONG: - if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo, - (u8 *)&aux->mshufti.hi) == -1) { - break; - } - aux->accel_type = ACCEL_MLSHUFTI; - aux->mshufti.offset = offset; - aux->mshufti.len = info.ma_len1; - return; - case MultibyteAccelInfo::MAT_LONGGRAB: - if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo, - (u8 *)&aux->mshufti.hi) == -1) { - break; - } - aux->accel_type = ACCEL_MLGSHUFTI; - aux->mshufti.offset = offset; - aux->mshufti.len = info.ma_len1; - return; - case MultibyteAccelInfo::MAT_SHIFT: - if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo, - (u8 *)&aux->mshufti.hi) == -1) { - break; - } - aux->accel_type = ACCEL_MSSHUFTI; - aux->mshufti.offset = offset; - aux->mshufti.len = info.ma_len1; - return; - case MultibyteAccelInfo::MAT_SHIFTGRAB: - if (shuftiBuildMasks(stops, (u8 *)&aux->mshufti.lo, - (u8 *)&aux->mshufti.hi) == -1) { - break; - } - aux->accel_type = ACCEL_MSGSHUFTI; - aux->mshufti.offset = offset; - aux->mshufti.len = info.ma_len1; - return; - case MultibyteAccelInfo::MAT_DSHIFT: - if (shuftiBuildMasks(stops, (u8 *)&aux->mdshufti.lo, - (u8 *)&aux->mdshufti.hi) == -1) { - break; - } - aux->accel_type = ACCEL_MDSSHUFTI; - aux->mdshufti.offset = offset; - aux->mdshufti.len1 = info.ma_len1; - aux->mdshufti.len2 = info.ma_len2; - return; - case MultibyteAccelInfo::MAT_DSHIFTGRAB: - if (shuftiBuildMasks(stops, (u8 *)&aux->mdshufti.lo, - (u8 *)&aux->mdshufti.hi) == -1) { - break; - } - aux->accel_type = ACCEL_MDSGSHUFTI; - aux->mdshufti.offset = offset; - aux->mdshufti.len1 = info.ma_len1; - aux->mdshufti.len2 = info.ma_len2; - return; - default: - // shouldn't happen - assert(0); - return; - } - DEBUG_PRINTF("shufti build failed, falling through\n"); - - if (outs <= ACCEL_MAX_STOP_CHAR) { - DEBUG_PRINTF("building Truffle for %zu chars\n", outs); - switch (info.ma_type) { - case MultibyteAccelInfo::MAT_LONG: - aux->accel_type = ACCEL_MLTRUFFLE; - aux->mtruffle.offset = offset; - aux->mtruffle.len = info.ma_len1; - truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, - (u8 *)&aux->mtruffle.mask2); - break; - case MultibyteAccelInfo::MAT_LONGGRAB: - aux->accel_type = ACCEL_MLGTRUFFLE; - aux->mtruffle.offset = offset; - aux->mtruffle.len = info.ma_len1; - truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, - (u8 *)&aux->mtruffle.mask2); - break; - case MultibyteAccelInfo::MAT_SHIFT: - aux->accel_type = ACCEL_MSTRUFFLE; - aux->mtruffle.offset = offset; - aux->mtruffle.len = info.ma_len1; - truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, - (u8 *)&aux->mtruffle.mask2); - break; - case MultibyteAccelInfo::MAT_SHIFTGRAB: - aux->accel_type = ACCEL_MSGTRUFFLE; - aux->mtruffle.offset = offset; - aux->mtruffle.len = info.ma_len1; - truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, - (u8 *)&aux->mtruffle.mask2); - break; - case MultibyteAccelInfo::MAT_DSHIFT: - aux->accel_type = ACCEL_MDSTRUFFLE; - aux->mdtruffle.offset = offset; - aux->mdtruffle.len1 = info.ma_len1; - aux->mdtruffle.len2 = info.ma_len2; - truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, - (u8 *)&aux->mdtruffle.mask2); - break; - case MultibyteAccelInfo::MAT_DSHIFTGRAB: - aux->accel_type = ACCEL_MDSGTRUFFLE; - aux->mdtruffle.offset = offset; - aux->mdtruffle.len1 = info.ma_len1; - aux->mdtruffle.len2 = info.ma_len2; - truffleBuildMasks(stops, (u8 *)&aux->mtruffle.mask1, - (u8 *)&aux->mdtruffle.mask2); - break; - default: - // shouldn't happen - assert(0); - return; - } - return; - } - - DEBUG_PRINTF("unable to accelerate multibyte case with %zu outs\n", outs); -} - bool buildAccelAux(const AccelInfo &info, AccelAux *aux) { assert(aux->accel_type == ACCEL_NONE); if (info.single_stops.none()) { @@ -500,9 +232,6 @@ bool buildAccelAux(const AccelInfo &info, AccelAux *aux) { aux->accel_type = ACCEL_RED_TAPE; aux->generic.offset = info.single_offset; } - if (aux->accel_type == ACCEL_NONE) { - buildAccelMulti(info, aux); - } if (aux->accel_type == ACCEL_NONE) { buildAccelDouble(info, aux); } diff --git a/src/nfa/accelcompile.h b/src/nfa/accelcompile.h index 9b30146c..9bd4ff18 100644 --- a/src/nfa/accelcompile.h +++ b/src/nfa/accelcompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,30 +37,9 @@ union AccelAux; namespace ue2 { -struct MultibyteAccelInfo { - /* multibyte accel schemes, ordered by strength */ - enum multiaccel_type { - MAT_SHIFT, - MAT_SHIFTGRAB, - MAT_DSHIFT, - MAT_DSHIFTGRAB, - MAT_LONG, - MAT_LONGGRAB, - MAT_MAX, - MAT_NONE = MAT_MAX - }; - CharReach cr; - u32 offset = 0; - u32 len1 = 0; - u32 len2 = 0; - multiaccel_type type = MAT_NONE; -}; - struct AccelInfo { AccelInfo() : single_offset(0U), double_offset(0U), - single_stops(CharReach::dot()), - multiaccel_offset(0), ma_len1(0), ma_len2(0), - ma_type(MultibyteAccelInfo::MAT_NONE) {} + single_stops(CharReach::dot()) {} u32 single_offset; /**< offset correction to apply to single schemes */ u32 double_offset; /**< offset correction to apply to double schemes */ CharReach double_stop1; /**< single-byte accel stop literals for double @@ -68,11 +47,6 @@ struct AccelInfo { flat_set> double_stop2; /**< double-byte accel stop * literals */ CharReach single_stops; /**< escapes for single byte acceleration */ - u32 multiaccel_offset; /**< offset correction to apply to multibyte schemes */ - CharReach multiaccel_stops; /**< escapes for multibyte acceleration */ - u32 ma_len1; /**< multiaccel len1 */ - u32 ma_len2; /**< multiaccel len2 */ - MultibyteAccelInfo::multiaccel_type ma_type; /**< multiaccel type */ }; bool buildAccelAux(const AccelInfo &info, AccelAux *aux); diff --git a/src/nfa/limex_accel.c b/src/nfa/limex_accel.c index a96dea43..c34216f3 100644 --- a/src/nfa/limex_accel.c +++ b/src/nfa/limex_accel.c @@ -39,9 +39,6 @@ #include "nfa_internal.h" #include "shufti.h" #include "truffle.h" -#include "multishufti.h" -#include "multitruffle.h" -#include "multivermicelli.h" #include "ue2common.h" #include "vermicelli.h" #include "util/arch.h" diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 3cdf5de1..7183d4b7 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -93,8 +93,6 @@ struct precalcAccel { CharReach double_cr; flat_set> double_lits; /* double-byte accel stop literals */ u32 double_offset; - - MultibyteAccelInfo ma_info; }; struct limex_accel_info { @@ -358,16 +356,12 @@ void buildReachMapping(const build_info &args, vector &reach, } struct AccelBuild { - AccelBuild() : v(NGHolder::null_vertex()), state(0), offset(0), ma_len1(0), - ma_len2(0), ma_type(MultibyteAccelInfo::MAT_NONE) {} + AccelBuild() : v(NGHolder::null_vertex()), state(0), offset(0) {} NFAVertex v; u32 state; u32 offset; // offset correction to apply CharReach stop1; // single-byte accel stop literals flat_set> stop2; // double-byte accel stop literals - u32 ma_len1; // multiaccel len1 - u32 ma_len2; // multiaccel len2 - MultibyteAccelInfo::multiaccel_type ma_type; // multiaccel type }; static @@ -382,12 +376,7 @@ void findStopLiterals(const build_info &bi, NFAVertex v, AccelBuild &build) { build.stop1 = CharReach::dot(); } else { const precalcAccel &precalc = bi.accel.precalc.at(ss); - unsigned ma_len = precalc.ma_info.len1 + precalc.ma_info.len2; - if (ma_len >= MULTIACCEL_MIN_LEN) { - build.ma_len1 = precalc.ma_info.len1; - build.stop1 = precalc.ma_info.cr; - build.offset = precalc.ma_info.offset; - } else if (precalc.double_lits.empty()) { + if (precalc.double_lits.empty()) { build.stop1 = precalc.single_cr; build.offset = precalc.single_offset; } else { @@ -606,7 +595,6 @@ void fillAccelInfo(build_info &bi) { limex_accel_info &accel = bi.accel; unordered_map &accel_map = accel.accel_map; const map &br_cyclic = bi.br_cyclic; - const CompileContext &cc = bi.cc; const unordered_map &state_ids = bi.state_ids; const u32 num_states = bi.num_states; @@ -663,27 +651,17 @@ void fillAccelInfo(build_info &bi) { DEBUG_PRINTF("accel %u ok with offset s%u, d%u\n", i, as.offset, as.double_offset); - // try multibyte acceleration first - MultibyteAccelInfo mai = nfaCheckMultiAccel(g, states, cc); - precalcAccel &pa = accel.precalc[state_set]; - useful |= state_set; - - // if we successfully built a multibyte accel scheme, use that - if (mai.type != MultibyteAccelInfo::MAT_NONE) { - pa.ma_info = mai; - - DEBUG_PRINTF("multibyte acceleration!\n"); - continue; - } - pa.single_offset = as.offset; pa.single_cr = as.cr; + if (as.double_byte.size() != 0) { pa.double_offset = as.double_offset; pa.double_lits = as.double_byte; pa.double_cr = as.double_cr; - }; + } + + useful |= state_set; } for (const auto &m : accel_map) { @@ -700,19 +678,8 @@ void fillAccelInfo(build_info &bi) { state_set.reset(); state_set.set(state_id); - bool is_multi = false; - auto p_it = accel.precalc.find(state_set); - if (p_it != accel.precalc.end()) { - const precalcAccel &pa = p_it->second; - offset = max(pa.double_offset, pa.single_offset); - is_multi = pa.ma_info.type != MultibyteAccelInfo::MAT_NONE; - assert(offset <= MAX_ACCEL_DEPTH); - } - accel.accelerable.insert(v); - if (!is_multi) { - findAccelFriends(g, v, br_cyclic, offset, &accel.friends[v]); - } + findAccelFriends(g, v, br_cyclic, offset, &accel.friends[v]); } } @@ -954,16 +921,8 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask, if (contains(accel.precalc, effective_states)) { const auto &precalc = accel.precalc.at(effective_states); - if (precalc.ma_info.type != MultibyteAccelInfo::MAT_NONE) { - ainfo.ma_len1 = precalc.ma_info.len1; - ainfo.ma_len2 = precalc.ma_info.len2; - ainfo.multiaccel_offset = precalc.ma_info.offset; - ainfo.multiaccel_stops = precalc.ma_info.cr; - ainfo.ma_type = precalc.ma_info.type; - } else { - ainfo.single_offset = precalc.single_offset; - ainfo.single_stops = precalc.single_cr; - } + ainfo.single_offset = precalc.single_offset; + ainfo.single_stops = precalc.single_cr; } } diff --git a/src/nfa/multiaccel_common.h b/src/nfa/multiaccel_common.h deleted file mode 100644 index 1a13c3b6..00000000 --- a/src/nfa/multiaccel_common.h +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTIACCEL_COMMON_H_ -#define MULTIACCEL_COMMON_H_ - -#include "config.h" -#include "ue2common.h" -#include "util/join.h" -#include "util/bitutils.h" - -/* - * When doing shifting, remember that the total number of shifts should be n-1 - */ -#define VARISHIFT(src, dst, len) \ - do { \ - (dst) &= (src) >> (len); \ - } while (0) -#define STATIC_SHIFT1(x) \ - do { \ - (x) &= (x) >> 1; \ - } while (0) -#define STATIC_SHIFT2(x) \ - do { \ - (x) &= (x) >> 2;\ - } while (0) -#define STATIC_SHIFT4(x) \ - do { \ - (x) &= (x) >> 4; \ - } while (0) -#define STATIC_SHIFT8(x) \ - do { \ - (x) &= (x) >> 8; \ - } while (0) -#define SHIFT1(x) \ - do {} while (0) -#define SHIFT2(x) \ - do { \ - STATIC_SHIFT1(x); \ - } while (0) -#define SHIFT3(x) \ - do { \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT1(x); \ - } while (0) -#define SHIFT4(x) \ - do { \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT2(x); \ - } while (0) -#define SHIFT5(x) \ - do { \ - SHIFT4(x); \ - STATIC_SHIFT1(x); \ - } while (0) -#define SHIFT6(x) \ - do { \ - SHIFT4(x); \ - STATIC_SHIFT2(x); \ - } while (0) -#define SHIFT7(x) \ - do { \ - SHIFT4(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT2(x); \ - } while (0) -#define SHIFT8(x) \ - do { \ - SHIFT4(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT9(x) \ - do { \ - SHIFT8(x); \ - STATIC_SHIFT1(x); \ - } while (0) -#define SHIFT10(x) \ - do { \ - SHIFT8(x); \ - STATIC_SHIFT2(x); \ - } while (0) -#define SHIFT11(x) \ - do { \ - SHIFT8(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT2(x); \ - } while (0) -#define SHIFT12(x); \ - do { \ - SHIFT8(x);\ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT13(x); \ - do { \ - SHIFT8(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT14(x) \ - do { \ - SHIFT8(x); \ - STATIC_SHIFT2(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT15(x) \ - do { \ - SHIFT8(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT2(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT16(x) \ - do { \ - SHIFT8(x); \ - STATIC_SHIFT8(x); \ - } while (0) -#define SHIFT17(x) \ - do { \ - SHIFT16(x); \ - STATIC_SHIFT1(x); \ - } while (0) -#define SHIFT18(x) \ - do { \ - SHIFT16(x); \ - STATIC_SHIFT2(x); \ - } while (0) -#define SHIFT19(x) \ - do { \ - SHIFT16(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT2(x); \ - } while (0) -#define SHIFT20(x) \ - do { \ - SHIFT16(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT21(x) \ - do { \ - SHIFT16(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT22(x) \ - do { \ - SHIFT16(x); \ - STATIC_SHIFT2(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT23(x) \ - do { \ - SHIFT16(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT2(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT24(x) \ - do { \ - SHIFT16(x); \ - STATIC_SHIFT8(x); \ - } while (0) -#define SHIFT25(x) \ - do { \ - SHIFT24(x); \ - STATIC_SHIFT1(x); \ - } while (0) -#define SHIFT26(x) \ - do { \ - SHIFT24(x); \ - STATIC_SHIFT2(x); \ - } while (0) -#define SHIFT27(x) \ - do { \ - SHIFT24(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT2(x); \ - } while (0) -#define SHIFT28(x) \ - do { \ - SHIFT24(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT29(x) \ - do { \ - SHIFT24(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT30(x) \ - do { \ - SHIFT24(x); \ - STATIC_SHIFT2(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT31(x) \ - do { \ - SHIFT24(x); \ - STATIC_SHIFT1(x); \ - STATIC_SHIFT2(x); \ - STATIC_SHIFT4(x); \ - } while (0) -#define SHIFT32(x) \ - do { \ - SHIFT24(x); \ - STATIC_SHIFT8(x); \ - } while (0) - -/* - * this function is used by 32-bit multiaccel matchers. 32-bit matchers accept - * a 32-bit integer as a buffer, where low 16 bits is movemask result and - * high 16 bits are "don't care" values. this function is not expected to return - * a result higher than 16. - */ -static really_inline -const u8 *match32(const u8 *buf, const u32 z) { - if (unlikely(z != 0)) { - u32 pos = ctz32(z); - assert(pos < 16); - return buf + pos; - } - return NULL; -} - -/* - * this function is used by 64-bit multiaccel matchers. 64-bit matchers accept - * a 64-bit integer as a buffer, where low 32 bits is movemask result and - * high 32 bits are "don't care" values. this function is not expected to return - * a result higher than 32. - */ -static really_inline -const u8 *match64(const u8 *buf, const u64a z) { - if (unlikely(z != 0)) { - u32 pos = ctz64(z); - assert(pos < 32); - return buf + pos; - } - return NULL; -} - -#endif /* MULTIACCEL_COMMON_H_ */ diff --git a/src/nfa/multiaccel_compilehelper.cpp b/src/nfa/multiaccel_compilehelper.cpp deleted file mode 100644 index 4c1f8101..00000000 --- a/src/nfa/multiaccel_compilehelper.cpp +++ /dev/null @@ -1,439 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "multiaccel_compilehelper.h" - -using namespace std; -using namespace ue2; - -#ifdef DEBUG -static const char* state_to_str[] = { - "FIRST_RUN", - "SECOND_RUN", - "WAITING_FOR_GRAB", - "FIRST_TAIL", - "SECOND_TAIL", - "STOPPED", - "INVALID" -}; -static const char* type_to_str[] = { - "SHIFT", - "SHIFTGRAB", - "DOUBLESHIFT", - "DOUBLESHIFTGRAB", - "LONG", - "LONGGRAB", - "NONE" -}; - -static -void dumpMultiaccelState(const accel_data &d) { - DEBUG_PRINTF("type: %s state: %s len1: %u tlen1: %u len2: %u tlen2: %u\n", - type_to_str[(unsigned) d.type], - state_to_str[(unsigned) d.state], - d.len1, d.tlen1, d.len2, d.tlen2); -} -#endif - -/* stop all the matching. this may render most schemes invalid. */ -static -void stop(accel_data &d) { - switch (d.state) { - case STATE_STOPPED: - case STATE_INVALID: - break; - case STATE_FIRST_TAIL: - case STATE_SECOND_RUN: - /* - * Shift matchers are special case, because they have "tails". - * When shift matcher reaches a mid/endpoint, tail mode is - * activated, which looks for more matches to extend the match. - * - * For example, consider pattern /a{5}ba{3}/. Under normal circumstances, - * long-grab matcher will be picked for this pattern (matching a run of a's, - * followed by a not-a), because doubleshift matcher would be confused by - * consecutive a's and would parse the pattern as a.{0}a.{0}a (two shifts - * by 1) and throw out the rest of the pattern. - * - * With tails, we defer ending the run until we actually run out of - * matching characters, so the above pattern will now be parsed by - * doubleshift matcher as /a.{3}a.{3}a/ (two shifts by 4). - * - * So if we are stopping shift matchers, we should check if we aren't in - * the process of matching first tail or second run. If we are, we can't - * finish the second run as we are stopping, but we can try and split - * the first tail instead to obtain a valid second run. - */ - if ((d.type == MultibyteAccelInfo::MAT_DSHIFT || - d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) && d.tlen1 == 0) { - // can't split an empty void... - d.state = STATE_INVALID; - break; - } - d.len2 = 0; - d.state = STATE_STOPPED; - break; - case STATE_SECOND_TAIL: - d.state = STATE_STOPPED; - break; - case STATE_WAITING_FOR_GRAB: - case STATE_FIRST_RUN: - if (d.type == MultibyteAccelInfo::MAT_LONG) { - d.state = STATE_STOPPED; - } else { - d.state = STATE_INVALID; - } - break; - } -} - -static -void validate(accel_data &d, unsigned max_len) { - // try and fit in all our tails - if (d.len1 + d.tlen1 + d.len2 + d.tlen2 < max_len && d.len2 > 0) { - // case 1: everything fits in - d.len1 += d.tlen1; - d.len2 += d.tlen2; - d.tlen1 = 0; - d.tlen2 = 0; - } else if (d.len1 + d.tlen1 + d.len2 < max_len && d.len2 > 0) { - // case 2: everything but the second tail fits in - d.len1 += d.tlen1; - d.tlen1 = 0; - // try going for a partial tail - if (d.tlen2 != 0) { - int new_tlen2 = max_len - 1 - d.len1 - d.len2; - if (new_tlen2 > 0) { - d.len2 += new_tlen2; - } - d.tlen2 = 0; - } - } else if (d.len1 + d.tlen1 < max_len) { - // case 3: first run and its tail fits in - if (d.type == MultibyteAccelInfo::MAT_DSHIFT || - d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) { - // split the tail into a second run - d.len2 = d.tlen1; - } else { - d.len1 += d.tlen1; - d.len2 = 0; - } - d.tlen1 = 0; - d.tlen2 = 0; - } else if (d.len1 < max_len) { - // case 4: nothing but the first run fits in - // try going for a partial tail - if (d.tlen1 != 0) { - int new_tlen1 = max_len - 1 - d.len1; - if (new_tlen1 > 0) { - d.len1 += new_tlen1; - } - d.tlen1 = 0; - } - d.len2 = 0; - d.tlen2 = 0; - } - // if we removed our second run, doubleshift matchers are no longer valid - if ((d.type == MultibyteAccelInfo::MAT_DSHIFT || - d.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) && d.len2 == 0) { - d.state = STATE_INVALID; - } else if ((d.type == MultibyteAccelInfo::MAT_LONG) && d.len1 >= max_len) { - // long matchers can just stop whenever they want to - d.len1 = max_len - 1; - } - - // now, general sanity checks - if ((d.len1 + d.tlen1 + d.len2 + d.tlen2) >= max_len) { - d.state = STATE_INVALID; - } - if ((d.len1 + d.tlen1 + d.len2 + d.tlen2) < MULTIACCEL_MIN_LEN) { - d.state = STATE_INVALID; - } -} - -static -void match(accel_data &d, const CharReach &ref_cr, const CharReach &cur_cr) { - switch (d.type) { - case MultibyteAccelInfo::MAT_LONG: - { - /* - * For long matcher, we want lots of consecutive same-or-subset - * char-reaches - */ - if ((ref_cr & cur_cr) == cur_cr) { - d.len1++; - } else { - d.state = STATE_STOPPED; - } - } - break; - - case MultibyteAccelInfo::MAT_LONGGRAB: - { - /* - * For long-grab matcher, we want lots of consecutive same-or-subset - * char-reaches with a negative match in the end. - */ - if ((ref_cr & cur_cr) == cur_cr) { - d.len1++; - } else if (!(ref_cr & cur_cr).any()) { - /* we grabbed, stop immediately */ - d.state = STATE_STOPPED; - } else { - /* our run-n-grab was interrupted; mark as invalid */ - d.state = STATE_INVALID; - } - } - break; - - case MultibyteAccelInfo::MAT_SHIFTGRAB: - { - /* - * For shift-grab matcher, we want two matches separated by anything; - * however the second vertex *must* be a negative (non-overlapping) match. - * - * Shiftgrab matcher is identical to shift except for presence of grab. - */ - if (d.state == STATE_WAITING_FOR_GRAB) { - if ((ref_cr & cur_cr).any()) { - d.state = STATE_INVALID; - } else { - d.state = STATE_FIRST_RUN; - d.len1++; - } - return; - } - } - /* no break, falling through */ - case MultibyteAccelInfo::MAT_SHIFT: - { - /* - * For shift-matcher, we want two matches separated by anything. - */ - if (ref_cr == cur_cr) { - // keep matching tail - switch (d.state) { - case STATE_FIRST_RUN: - d.state = STATE_FIRST_TAIL; - break; - case STATE_FIRST_TAIL: - d.tlen1++; - break; - default: - // shouldn't happen - assert(0); - } - } else { - switch (d.state) { - case STATE_FIRST_RUN: - // simply advance - d.len1++; - break; - case STATE_FIRST_TAIL: - // we found a non-matching char after tail, so stop - d.state = STATE_STOPPED; - break; - default: - // shouldn't happen - assert(0); - } - } - } - break; - - case MultibyteAccelInfo::MAT_DSHIFTGRAB: - { - /* - * For double shift-grab matcher, we want two matches separated by - * either negative matches or dots; however the second vertex *must* - * be a negative match. - * - * Doubleshiftgrab matcher is identical to doubleshift except for - * presence of grab. - */ - if (d.state == STATE_WAITING_FOR_GRAB) { - if ((ref_cr & cur_cr).any()) { - d.state = STATE_INVALID; - } else { - d.state = STATE_FIRST_RUN; - d.len1++; - } - return; - } - } - /* no break, falling through */ - case MultibyteAccelInfo::MAT_DSHIFT: - { - /* - * For double shift matcher, we want three matches, each separated - * by a lot of anything. - * - * Doubleshift matcher is complicated by presence of tails. - */ - if (ref_cr == cur_cr) { - // decide if we are activating second shift or matching tails - switch (d.state) { - case STATE_FIRST_RUN: - d.state = STATE_FIRST_TAIL; - d.len2 = 1; // we're now ready for our second run - break; - case STATE_FIRST_TAIL: - d.tlen1++; - break; - case STATE_SECOND_RUN: - d.state = STATE_SECOND_TAIL; - break; - case STATE_SECOND_TAIL: - d.tlen2++; - break; - default: - // shouldn't happen - assert(0); - } - } else { - switch (d.state) { - case STATE_FIRST_RUN: - d.len1++; - break; - case STATE_FIRST_TAIL: - // start second run - d.state = STATE_SECOND_RUN; - d.len2++; - break; - case STATE_SECOND_RUN: - d.len2++; - break; - case STATE_SECOND_TAIL: - // stop - d.state = STATE_STOPPED; - break; - default: - // shouldn't happen - assert(0); - } - } - } - break; - - default: - // shouldn't happen - assert(0); - break; - } -} - -MultiaccelCompileHelper::MultiaccelCompileHelper(const CharReach &ref_cr, - u32 off, unsigned max_length) - : cr(ref_cr), offset(off), max_len(max_length) { - int accel_num = (int) MultibyteAccelInfo::MAT_MAX; - accels.resize(accel_num); - - // mark everything as valid - for (int i = 0; i < accel_num; i++) { - accel_data &ad = accels[i]; - ad.len1 = 1; - ad.type = (MultibyteAccelInfo::multiaccel_type) i; - - /* for shift-grab matchers, we are waiting for the grab right at the start */ - if (ad.type == MultibyteAccelInfo::MAT_SHIFTGRAB - || ad.type == MultibyteAccelInfo::MAT_DSHIFTGRAB) { - ad.state = STATE_WAITING_FOR_GRAB; - } else { - ad.state = STATE_FIRST_RUN; - } - } -} - -bool MultiaccelCompileHelper::canAdvance() { - for (const accel_data &ad : accels) { - if (ad.state != STATE_STOPPED && ad.state != STATE_INVALID) { - return true; - } - } - return false; -} - -void MultiaccelCompileHelper::advance(const CharReach &cur_cr) { - for (accel_data &ad : accels) { - if (ad.state == STATE_STOPPED || ad.state == STATE_INVALID) { - continue; - } - match(ad, cr, cur_cr); -#ifdef DEBUG - dumpMultiaccelState(ad); -#endif - } -} - -MultibyteAccelInfo MultiaccelCompileHelper::getBestScheme() { - int best_len = 0; - accel_data best; - - DEBUG_PRINTF("Stopping multiaccel compile\n"); - - for (accel_data &ad : accels) { - // stop our matching - stop(ad); - validate(ad, max_len); - -#ifdef DEBUG - dumpMultiaccelState(ad); -#endif - - // skip invalid schemes - if (ad.state == STATE_INVALID) { - continue; - } - DEBUG_PRINTF("Marking as viable\n"); - - // TODO: relative strengths of accel schemes? maybe e.g. a shorter - // long match would in some cases be preferable to a longer - // double shift match (for example, depending on length)? - int as_len = ad.len1 + ad.len2; - if (as_len >= best_len) { - DEBUG_PRINTF("Marking as best\n"); - best_len = as_len; - best = ad; - } - } - // if we found at least one accel scheme, return it - if (best.state != STATE_INVALID) { -#ifdef DEBUG - DEBUG_PRINTF("Picked best multiaccel state:\n"); - dumpMultiaccelState(best); -#endif - MultibyteAccelInfo info; - info.cr = cr; - info.offset = offset; - info.len1 = best.len1; - info.len2 = best.len2; - info.type = best.type; - return info; - } - return MultibyteAccelInfo(); -} diff --git a/src/nfa/multiaccel_compilehelper.h b/src/nfa/multiaccel_compilehelper.h deleted file mode 100644 index 27dbe634..00000000 --- a/src/nfa/multiaccel_compilehelper.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTIACCELCOMPILE_H_ -#define MULTIACCELCOMPILE_H_ - -#include "ue2common.h" - -#include "nfagraph/ng_limex_accel.h" - -#include - -namespace ue2 { - -/* accel scheme state machine */ -enum accel_scheme_state { - STATE_FIRST_RUN, - STATE_SECOND_RUN, - STATE_WAITING_FOR_GRAB, - STATE_FIRST_TAIL, - STATE_SECOND_TAIL, - STATE_STOPPED, - STATE_INVALID -}; - -struct accel_data { - MultibyteAccelInfo::multiaccel_type type = MultibyteAccelInfo::MAT_NONE; - accel_scheme_state state = STATE_INVALID; - unsigned len1 = 0; /* length of first run */ - unsigned len2 = 0; /* length of second run, if present */ - unsigned tlen1 = 0; /* first tail length */ - unsigned tlen2 = 0; /* second tail length */ -}; - -class MultiaccelCompileHelper { -private: - const CharReach &cr; - u32 offset; - std::vector accels; - unsigned max_len; -public: - MultiaccelCompileHelper(const CharReach &cr, u32 off, unsigned max_len); - bool canAdvance(); - MultibyteAccelInfo getBestScheme(); - void advance(const ue2::CharReach &cr); -}; - -}; // namespace - -#endif /* MULTIACCELCOMPILE_H_ */ diff --git a/src/nfa/multiaccel_doubleshift.h b/src/nfa/multiaccel_doubleshift.h deleted file mode 100644 index 7ed7534c..00000000 --- a/src/nfa/multiaccel_doubleshift.h +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTIACCEL_DOUBLESHIFT_H_ -#define MULTIACCEL_DOUBLESHIFT_H_ - -#include "multiaccel_common.h" - -#define DOUBLESHIFT_MATCH(len, match_t, match_sz) \ - static really_inline \ - const u8 * JOIN4(doubleshiftMatch_, match_sz, _, len)(const u8 *buf, match_t z, u32 len2) {\ - if (unlikely(z)) { \ - match_t tmp = z; \ - z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \ - tmp |= ((match_t) (1 << (len + len2)) - 1) << (match_sz / 2); \ - VARISHIFT(z, z, len); \ - VARISHIFT(tmp, tmp, len2); \ - VARISHIFT(tmp, z, len); \ - return JOIN(match, match_sz)(buf, z); \ - } \ - return NULL; \ - } - -#define DOUBLESHIFT_MATCH_32_DEF(n) \ - DOUBLESHIFT_MATCH(n, u32, 32) -#define DOUBLESHIFT_MATCH_64_DEF(n) \ - DOUBLESHIFT_MATCH(n, u64a, 64) -#define DOUBLESHIFT_MATCH_DEF(n) \ - DOUBLESHIFT_MATCH_32_DEF(n) \ - DOUBLESHIFT_MATCH_64_DEF(n) - -DOUBLESHIFT_MATCH_DEF(1) -DOUBLESHIFT_MATCH_DEF(2) -DOUBLESHIFT_MATCH_DEF(3) -DOUBLESHIFT_MATCH_DEF(4) -DOUBLESHIFT_MATCH_DEF(5) -DOUBLESHIFT_MATCH_DEF(6) -DOUBLESHIFT_MATCH_DEF(7) -DOUBLESHIFT_MATCH_DEF(8) -DOUBLESHIFT_MATCH_DEF(9) -DOUBLESHIFT_MATCH_DEF(10) -DOUBLESHIFT_MATCH_DEF(11) -DOUBLESHIFT_MATCH_DEF(12) -DOUBLESHIFT_MATCH_DEF(13) -DOUBLESHIFT_MATCH_DEF(14) -DOUBLESHIFT_MATCH_DEF(15) -DOUBLESHIFT_MATCH_64_DEF(16) -DOUBLESHIFT_MATCH_64_DEF(17) -DOUBLESHIFT_MATCH_64_DEF(18) -DOUBLESHIFT_MATCH_64_DEF(19) -DOUBLESHIFT_MATCH_64_DEF(20) -DOUBLESHIFT_MATCH_64_DEF(21) -DOUBLESHIFT_MATCH_64_DEF(22) -DOUBLESHIFT_MATCH_64_DEF(23) -DOUBLESHIFT_MATCH_64_DEF(24) -DOUBLESHIFT_MATCH_64_DEF(25) -DOUBLESHIFT_MATCH_64_DEF(26) -DOUBLESHIFT_MATCH_64_DEF(27) -DOUBLESHIFT_MATCH_64_DEF(28) -DOUBLESHIFT_MATCH_64_DEF(29) -DOUBLESHIFT_MATCH_64_DEF(30) -DOUBLESHIFT_MATCH_64_DEF(31) - -static -const UNUSED u8 * (*doubleshift_match_funcs_32[])(const u8 *buf, u32 z, u32 len2) = -{ -// skip the first - 0, - &doubleshiftMatch_32_1, - &doubleshiftMatch_32_2, - &doubleshiftMatch_32_3, - &doubleshiftMatch_32_4, - &doubleshiftMatch_32_5, - &doubleshiftMatch_32_6, - &doubleshiftMatch_32_7, - &doubleshiftMatch_32_8, - &doubleshiftMatch_32_9, - &doubleshiftMatch_32_10, - &doubleshiftMatch_32_11, - &doubleshiftMatch_32_12, - &doubleshiftMatch_32_13, - &doubleshiftMatch_32_14, - &doubleshiftMatch_32_15, -}; - -static -const UNUSED u8 * (*doubleshift_match_funcs_64[])(const u8 *buf, u64a z, u32 len2) = -{ -// skip the first - 0, - &doubleshiftMatch_64_1, - &doubleshiftMatch_64_2, - &doubleshiftMatch_64_3, - &doubleshiftMatch_64_4, - &doubleshiftMatch_64_5, - &doubleshiftMatch_64_6, - &doubleshiftMatch_64_7, - &doubleshiftMatch_64_8, - &doubleshiftMatch_64_9, - &doubleshiftMatch_64_10, - &doubleshiftMatch_64_11, - &doubleshiftMatch_64_12, - &doubleshiftMatch_64_13, - &doubleshiftMatch_64_14, - &doubleshiftMatch_64_15, - &doubleshiftMatch_64_16, - &doubleshiftMatch_64_17, - &doubleshiftMatch_64_18, - &doubleshiftMatch_64_19, - &doubleshiftMatch_64_20, - &doubleshiftMatch_64_21, - &doubleshiftMatch_64_22, - &doubleshiftMatch_64_23, - &doubleshiftMatch_64_24, - &doubleshiftMatch_64_25, - &doubleshiftMatch_64_26, - &doubleshiftMatch_64_27, - &doubleshiftMatch_64_28, - &doubleshiftMatch_64_29, - &doubleshiftMatch_64_30, - &doubleshiftMatch_64_31, -}; - -#endif /* MULTIACCEL_DOUBLESHIFT_H_ */ diff --git a/src/nfa/multiaccel_doubleshiftgrab.h b/src/nfa/multiaccel_doubleshiftgrab.h deleted file mode 100644 index 51955b4a..00000000 --- a/src/nfa/multiaccel_doubleshiftgrab.h +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTIACCEL_DOUBLESHIFTGRAB_H_ -#define MULTIACCEL_DOUBLESHIFTGRAB_H_ - -#include "multiaccel_common.h" - -#define DOUBLESHIFTGRAB_MATCH(len, match_t, match_sz) \ - static really_inline \ - const u8 * JOIN4(doubleshiftgrabMatch_, match_sz, _, len)(const u8 *buf, match_t z, u32 len2) {\ - if (unlikely(z)) { \ - match_t neg = ~z; \ - match_t tmp = z; \ - z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \ - tmp |= ((match_t) (1 << (len + len2)) - 1) << (match_sz / 2); \ - neg |= ((match_t) (1 << len) - 1) << (match_sz / 2); \ - VARISHIFT(z, z, len); \ - VARISHIFT(tmp, tmp, len2); \ - VARISHIFT(neg, z, 1); \ - VARISHIFT(tmp, z, len); \ - return JOIN(match, match_sz)(buf, z); \ - } \ - return NULL; \ - } - -#define DOUBLESHIFTGRAB_MATCH_32_DEF(n) \ - DOUBLESHIFTGRAB_MATCH(n, u32, 32) -#define DOUBLESHIFTGRAB_MATCH_64_DEF(n) \ - DOUBLESHIFTGRAB_MATCH(n, u64a, 64) -#define DOUBLESHIFTGRAB_MATCH_DEF(n) \ - DOUBLESHIFTGRAB_MATCH_32_DEF(n) \ - DOUBLESHIFTGRAB_MATCH_64_DEF(n) - -DOUBLESHIFTGRAB_MATCH_DEF(1) -DOUBLESHIFTGRAB_MATCH_DEF(2) -DOUBLESHIFTGRAB_MATCH_DEF(3) -DOUBLESHIFTGRAB_MATCH_DEF(4) -DOUBLESHIFTGRAB_MATCH_DEF(5) -DOUBLESHIFTGRAB_MATCH_DEF(6) -DOUBLESHIFTGRAB_MATCH_DEF(7) -DOUBLESHIFTGRAB_MATCH_DEF(8) -DOUBLESHIFTGRAB_MATCH_DEF(9) -DOUBLESHIFTGRAB_MATCH_DEF(10) -DOUBLESHIFTGRAB_MATCH_DEF(11) -DOUBLESHIFTGRAB_MATCH_DEF(12) -DOUBLESHIFTGRAB_MATCH_DEF(13) -DOUBLESHIFTGRAB_MATCH_DEF(14) -DOUBLESHIFTGRAB_MATCH_DEF(15) -DOUBLESHIFTGRAB_MATCH_64_DEF(16) -DOUBLESHIFTGRAB_MATCH_64_DEF(17) -DOUBLESHIFTGRAB_MATCH_64_DEF(18) -DOUBLESHIFTGRAB_MATCH_64_DEF(19) -DOUBLESHIFTGRAB_MATCH_64_DEF(20) -DOUBLESHIFTGRAB_MATCH_64_DEF(21) -DOUBLESHIFTGRAB_MATCH_64_DEF(22) -DOUBLESHIFTGRAB_MATCH_64_DEF(23) -DOUBLESHIFTGRAB_MATCH_64_DEF(24) -DOUBLESHIFTGRAB_MATCH_64_DEF(25) -DOUBLESHIFTGRAB_MATCH_64_DEF(26) -DOUBLESHIFTGRAB_MATCH_64_DEF(27) -DOUBLESHIFTGRAB_MATCH_64_DEF(28) -DOUBLESHIFTGRAB_MATCH_64_DEF(29) -DOUBLESHIFTGRAB_MATCH_64_DEF(30) -DOUBLESHIFTGRAB_MATCH_64_DEF(31) - -static -const UNUSED u8 * (*doubleshiftgrab_match_funcs_32[])(const u8 *buf, u32 z, u32 len2) = -{ -// skip the first - 0, - &doubleshiftgrabMatch_32_1, - &doubleshiftgrabMatch_32_2, - &doubleshiftgrabMatch_32_3, - &doubleshiftgrabMatch_32_4, - &doubleshiftgrabMatch_32_5, - &doubleshiftgrabMatch_32_6, - &doubleshiftgrabMatch_32_7, - &doubleshiftgrabMatch_32_8, - &doubleshiftgrabMatch_32_9, - &doubleshiftgrabMatch_32_10, - &doubleshiftgrabMatch_32_11, - &doubleshiftgrabMatch_32_12, - &doubleshiftgrabMatch_32_13, - &doubleshiftgrabMatch_32_14, - &doubleshiftgrabMatch_32_15, -}; - -static -const UNUSED u8 * (*doubleshiftgrab_match_funcs_64[])(const u8 *buf, u64a z, u32 len2) = -{ -// skip the first - 0, - &doubleshiftgrabMatch_64_1, - &doubleshiftgrabMatch_64_2, - &doubleshiftgrabMatch_64_3, - &doubleshiftgrabMatch_64_4, - &doubleshiftgrabMatch_64_5, - &doubleshiftgrabMatch_64_6, - &doubleshiftgrabMatch_64_7, - &doubleshiftgrabMatch_64_8, - &doubleshiftgrabMatch_64_9, - &doubleshiftgrabMatch_64_10, - &doubleshiftgrabMatch_64_11, - &doubleshiftgrabMatch_64_12, - &doubleshiftgrabMatch_64_13, - &doubleshiftgrabMatch_64_14, - &doubleshiftgrabMatch_64_15, - &doubleshiftgrabMatch_64_16, - &doubleshiftgrabMatch_64_17, - &doubleshiftgrabMatch_64_18, - &doubleshiftgrabMatch_64_19, - &doubleshiftgrabMatch_64_20, - &doubleshiftgrabMatch_64_21, - &doubleshiftgrabMatch_64_22, - &doubleshiftgrabMatch_64_23, - &doubleshiftgrabMatch_64_24, - &doubleshiftgrabMatch_64_25, - &doubleshiftgrabMatch_64_26, - &doubleshiftgrabMatch_64_27, - &doubleshiftgrabMatch_64_28, - &doubleshiftgrabMatch_64_29, - &doubleshiftgrabMatch_64_30, - &doubleshiftgrabMatch_64_31, -}; - -#endif /* MULTIACCEL_DOUBLESHIFTGRAB_H_ */ diff --git a/src/nfa/multiaccel_long.h b/src/nfa/multiaccel_long.h deleted file mode 100644 index 515f0bc2..00000000 --- a/src/nfa/multiaccel_long.h +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTIACCEL_LONG_H_ -#define MULTIACCEL_LONG_H_ - -#include "multiaccel_common.h" - -#define LONG_MATCH(len, match_t, match_sz) \ - static really_inline \ - const u8 * JOIN4(longMatch_, match_sz, _, len)(const u8 *buf, match_t z) { \ - if (unlikely(z)) { \ - z |= ((match_t) (1 << (len - 1)) - 1) << (match_sz / 2); \ - JOIN(SHIFT, len)(z); \ - return JOIN(match, match_sz)(buf, z); \ - } \ - return NULL; \ - } - -#define LONG_MATCH_32_DEF(n) \ - LONG_MATCH(n, u32, 32) -#define LONG_MATCH_64_DEF(n) \ - LONG_MATCH(n, u64a, 64) -#define LONG_MATCH_DEF(n) \ - LONG_MATCH_32_DEF(n) \ - LONG_MATCH_64_DEF(n) - -LONG_MATCH_DEF(1) -LONG_MATCH_DEF(2) -LONG_MATCH_DEF(3) -LONG_MATCH_DEF(4) -LONG_MATCH_DEF(5) -LONG_MATCH_DEF(6) -LONG_MATCH_DEF(7) -LONG_MATCH_DEF(8) -LONG_MATCH_DEF(9) -LONG_MATCH_DEF(10) -LONG_MATCH_DEF(11) -LONG_MATCH_DEF(12) -LONG_MATCH_DEF(13) -LONG_MATCH_DEF(14) -LONG_MATCH_DEF(15) -LONG_MATCH_64_DEF(16) -LONG_MATCH_64_DEF(17) -LONG_MATCH_64_DEF(18) -LONG_MATCH_64_DEF(19) -LONG_MATCH_64_DEF(20) -LONG_MATCH_64_DEF(21) -LONG_MATCH_64_DEF(22) -LONG_MATCH_64_DEF(23) -LONG_MATCH_64_DEF(24) -LONG_MATCH_64_DEF(25) -LONG_MATCH_64_DEF(26) -LONG_MATCH_64_DEF(27) -LONG_MATCH_64_DEF(28) -LONG_MATCH_64_DEF(29) -LONG_MATCH_64_DEF(30) -LONG_MATCH_64_DEF(31) - -static -const UNUSED u8 *(*long_match_funcs_32[])(const u8 *buf, u32 z) = -{ - // skip the first three - 0, - &longMatch_32_1, - &longMatch_32_2, - &longMatch_32_3, - &longMatch_32_4, - &longMatch_32_5, - &longMatch_32_6, - &longMatch_32_7, - &longMatch_32_8, - &longMatch_32_9, - &longMatch_32_10, - &longMatch_32_11, - &longMatch_32_12, - &longMatch_32_13, - &longMatch_32_14, - &longMatch_32_15, - }; - -static -const UNUSED u8 *(*long_match_funcs_64[])(const u8 *buf, u64a z) = -{ -// skip the first three - 0, - &longMatch_64_1, - &longMatch_64_2, - &longMatch_64_3, - &longMatch_64_4, - &longMatch_64_5, - &longMatch_64_6, - &longMatch_64_7, - &longMatch_64_8, - &longMatch_64_9, - &longMatch_64_10, - &longMatch_64_11, - &longMatch_64_12, - &longMatch_64_13, - &longMatch_64_14, - &longMatch_64_15, - &longMatch_64_16, - &longMatch_64_17, - &longMatch_64_18, - &longMatch_64_19, - &longMatch_64_20, - &longMatch_64_21, - &longMatch_64_22, - &longMatch_64_23, - &longMatch_64_24, - &longMatch_64_25, - &longMatch_64_26, - &longMatch_64_27, - &longMatch_64_28, - &longMatch_64_29, - &longMatch_64_30, - &longMatch_64_31, -}; - -#endif /* MULTIACCEL_LONG_H_ */ diff --git a/src/nfa/multiaccel_longgrab.h b/src/nfa/multiaccel_longgrab.h deleted file mode 100644 index 09daaf82..00000000 --- a/src/nfa/multiaccel_longgrab.h +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTIACCEL_LONGGRAB_H_ -#define MULTIACCEL_LONGGRAB_H_ - -#include "multiaccel_common.h" - -#define LONGGRAB_MATCH(len, match_t, match_sz) \ - static really_inline \ - const u8 * JOIN4(longgrabMatch_, match_sz, _, len)(const u8 *buf, match_t z) { \ - if (unlikely(z)) { \ - match_t tmp = ~z; \ - tmp |= ((match_t) (1 << len) - 1) << (match_sz / 2); \ - z |= ((match_t) (1 << (len - 1)) - 1) << (match_sz / 2); \ - JOIN(SHIFT, len)(z); \ - VARISHIFT(tmp, z, len); \ - return JOIN(match, match_sz)(buf, z); \ - } \ - return NULL; \ - } - -#define LONGGRAB_MATCH_32_DEF(n) \ - LONGGRAB_MATCH(n, u32, 32) -#define LONGGRAB_MATCH_64_DEF(n) \ - LONGGRAB_MATCH(n, u64a, 64) -#define LONGGRAB_MATCH_DEF(n) \ - LONGGRAB_MATCH_32_DEF(n) \ - LONGGRAB_MATCH_64_DEF(n) - -LONGGRAB_MATCH_DEF(1) -LONGGRAB_MATCH_DEF(2) -LONGGRAB_MATCH_DEF(3) -LONGGRAB_MATCH_DEF(4) -LONGGRAB_MATCH_DEF(5) -LONGGRAB_MATCH_DEF(6) -LONGGRAB_MATCH_DEF(7) -LONGGRAB_MATCH_DEF(8) -LONGGRAB_MATCH_DEF(9) -LONGGRAB_MATCH_DEF(10) -LONGGRAB_MATCH_DEF(11) -LONGGRAB_MATCH_DEF(12) -LONGGRAB_MATCH_DEF(13) -LONGGRAB_MATCH_DEF(14) -LONGGRAB_MATCH_DEF(15) -LONGGRAB_MATCH_64_DEF(16) -LONGGRAB_MATCH_64_DEF(17) -LONGGRAB_MATCH_64_DEF(18) -LONGGRAB_MATCH_64_DEF(19) -LONGGRAB_MATCH_64_DEF(20) -LONGGRAB_MATCH_64_DEF(21) -LONGGRAB_MATCH_64_DEF(22) -LONGGRAB_MATCH_64_DEF(23) -LONGGRAB_MATCH_64_DEF(24) -LONGGRAB_MATCH_64_DEF(25) -LONGGRAB_MATCH_64_DEF(26) -LONGGRAB_MATCH_64_DEF(27) -LONGGRAB_MATCH_64_DEF(28) -LONGGRAB_MATCH_64_DEF(29) -LONGGRAB_MATCH_64_DEF(30) -LONGGRAB_MATCH_64_DEF(31) - -static -const UNUSED u8 *(*longgrab_match_funcs_32[])(const u8 *buf, u32 z) = -{ -// skip the first three - 0, - &longgrabMatch_32_1, - &longgrabMatch_32_2, - &longgrabMatch_32_3, - &longgrabMatch_32_4, - &longgrabMatch_32_5, - &longgrabMatch_32_6, - &longgrabMatch_32_7, - &longgrabMatch_32_8, - &longgrabMatch_32_9, - &longgrabMatch_32_10, - &longgrabMatch_32_11, - &longgrabMatch_32_12, - &longgrabMatch_32_13, - &longgrabMatch_32_14, - &longgrabMatch_32_15, - }; - -static -const UNUSED u8 *(*longgrab_match_funcs_64[])(const u8 *buf, u64a z) = -{ -// skip the first three - 0, - &longgrabMatch_64_1, - &longgrabMatch_64_2, - &longgrabMatch_64_3, - &longgrabMatch_64_4, - &longgrabMatch_64_5, - &longgrabMatch_64_6, - &longgrabMatch_64_7, - &longgrabMatch_64_8, - &longgrabMatch_64_9, - &longgrabMatch_64_10, - &longgrabMatch_64_11, - &longgrabMatch_64_12, - &longgrabMatch_64_13, - &longgrabMatch_64_14, - &longgrabMatch_64_15, - &longgrabMatch_64_16, - &longgrabMatch_64_17, - &longgrabMatch_64_18, - &longgrabMatch_64_19, - &longgrabMatch_64_20, - &longgrabMatch_64_21, - &longgrabMatch_64_22, - &longgrabMatch_64_23, - &longgrabMatch_64_24, - &longgrabMatch_64_25, - &longgrabMatch_64_26, - &longgrabMatch_64_27, - &longgrabMatch_64_28, - &longgrabMatch_64_29, - &longgrabMatch_64_30, - &longgrabMatch_64_31, -}; - -#endif /* MULTIACCEL_LONGGRAB_H_ */ diff --git a/src/nfa/multiaccel_shift.h b/src/nfa/multiaccel_shift.h deleted file mode 100644 index fd362a8b..00000000 --- a/src/nfa/multiaccel_shift.h +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTIACCEL_SHIFT_H_ -#define MULTIACCEL_SHIFT_H_ - -#include "multiaccel_common.h" - -#define SHIFT_MATCH(len, match_t, match_sz) \ - static really_inline \ - const u8 * JOIN4(shiftMatch_, match_sz, _, len)(const u8 *buf, match_t z) {\ - if (unlikely(z)) { \ - z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \ - VARISHIFT(z, z, len); \ - return JOIN(match, match_sz)(buf, z); \ - } \ - return NULL; \ - } - -#define SHIFT_MATCH_32_DEF(n) \ - SHIFT_MATCH(n, u32, 32) -#define SHIFT_MATCH_64_DEF(n) \ - SHIFT_MATCH(n, u64a, 64) -#define SHIFT_MATCH_DEF(n) \ - SHIFT_MATCH_32_DEF(n) \ - SHIFT_MATCH_64_DEF(n) - -SHIFT_MATCH_DEF(1) -SHIFT_MATCH_DEF(2) -SHIFT_MATCH_DEF(3) -SHIFT_MATCH_DEF(4) -SHIFT_MATCH_DEF(5) -SHIFT_MATCH_DEF(6) -SHIFT_MATCH_DEF(7) -SHIFT_MATCH_DEF(8) -SHIFT_MATCH_DEF(9) -SHIFT_MATCH_DEF(10) -SHIFT_MATCH_DEF(11) -SHIFT_MATCH_DEF(12) -SHIFT_MATCH_DEF(13) -SHIFT_MATCH_DEF(14) -SHIFT_MATCH_DEF(15) -SHIFT_MATCH_64_DEF(16) -SHIFT_MATCH_64_DEF(17) -SHIFT_MATCH_64_DEF(18) -SHIFT_MATCH_64_DEF(19) -SHIFT_MATCH_64_DEF(20) -SHIFT_MATCH_64_DEF(21) -SHIFT_MATCH_64_DEF(22) -SHIFT_MATCH_64_DEF(23) -SHIFT_MATCH_64_DEF(24) -SHIFT_MATCH_64_DEF(25) -SHIFT_MATCH_64_DEF(26) -SHIFT_MATCH_64_DEF(27) -SHIFT_MATCH_64_DEF(28) -SHIFT_MATCH_64_DEF(29) -SHIFT_MATCH_64_DEF(30) -SHIFT_MATCH_64_DEF(31) - -static -const UNUSED u8 * (*shift_match_funcs_32[])(const u8 *buf, u32 z) = -{ -// skip the first - 0, - &shiftMatch_32_1, - &shiftMatch_32_2, - &shiftMatch_32_3, - &shiftMatch_32_4, - &shiftMatch_32_5, - &shiftMatch_32_6, - &shiftMatch_32_7, - &shiftMatch_32_8, - &shiftMatch_32_9, - &shiftMatch_32_10, - &shiftMatch_32_11, - &shiftMatch_32_12, - &shiftMatch_32_13, - &shiftMatch_32_14, - &shiftMatch_32_15, -}; - -static -const UNUSED u8 * (*shift_match_funcs_64[])(const u8 *buf, u64a z) = -{ -// skip the first - 0, - &shiftMatch_64_1, - &shiftMatch_64_2, - &shiftMatch_64_3, - &shiftMatch_64_4, - &shiftMatch_64_5, - &shiftMatch_64_6, - &shiftMatch_64_7, - &shiftMatch_64_8, - &shiftMatch_64_9, - &shiftMatch_64_10, - &shiftMatch_64_11, - &shiftMatch_64_12, - &shiftMatch_64_13, - &shiftMatch_64_14, - &shiftMatch_64_15, - &shiftMatch_64_16, - &shiftMatch_64_17, - &shiftMatch_64_18, - &shiftMatch_64_19, - &shiftMatch_64_20, - &shiftMatch_64_21, - &shiftMatch_64_22, - &shiftMatch_64_23, - &shiftMatch_64_24, - &shiftMatch_64_25, - &shiftMatch_64_26, - &shiftMatch_64_27, - &shiftMatch_64_28, - &shiftMatch_64_29, - &shiftMatch_64_30, - &shiftMatch_64_31, -}; - -#endif /* MULTIACCEL_SHIFT_H_ */ diff --git a/src/nfa/multiaccel_shiftgrab.h b/src/nfa/multiaccel_shiftgrab.h deleted file mode 100644 index 032ed086..00000000 --- a/src/nfa/multiaccel_shiftgrab.h +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTIACCEL_SHIFTGRAB_H_ -#define MULTIACCEL_SHIFTGRAB_H_ - -#include "multiaccel_common.h" - -#define SHIFTGRAB_MATCH(len, match_t, match_sz) \ - static really_inline \ - const u8 * JOIN4(shiftgrabMatch_, match_sz, _, len)(const u8 *buf, match_t z) {\ - if (unlikely(z)) { \ - match_t tmp = ~z; \ - z |= ((match_t) (1 << (len)) - 1) << (match_sz / 2); \ - tmp |= ((match_t) (1 << len) - 1) << (match_sz / 2); \ - VARISHIFT(z, z, len); \ - VARISHIFT(tmp, z, 1); \ - return JOIN(match, match_sz)(buf, z); \ - } \ - return NULL; \ - } - -#define SHIFTGRAB_MATCH_32_DEF(n) \ - SHIFTGRAB_MATCH(n, u32, 32) -#define SHIFTGRAB_MATCH_64_DEF(n) \ - SHIFTGRAB_MATCH(n, u64a, 64) -#define SHIFTGRAB_MATCH_DEF(n) \ - SHIFTGRAB_MATCH_32_DEF(n) \ - SHIFTGRAB_MATCH_64_DEF(n) - -SHIFTGRAB_MATCH_DEF(1) -SHIFTGRAB_MATCH_DEF(2) -SHIFTGRAB_MATCH_DEF(3) -SHIFTGRAB_MATCH_DEF(4) -SHIFTGRAB_MATCH_DEF(5) -SHIFTGRAB_MATCH_DEF(6) -SHIFTGRAB_MATCH_DEF(7) -SHIFTGRAB_MATCH_DEF(8) -SHIFTGRAB_MATCH_DEF(9) -SHIFTGRAB_MATCH_DEF(10) -SHIFTGRAB_MATCH_DEF(11) -SHIFTGRAB_MATCH_DEF(12) -SHIFTGRAB_MATCH_DEF(13) -SHIFTGRAB_MATCH_DEF(14) -SHIFTGRAB_MATCH_DEF(15) -SHIFTGRAB_MATCH_64_DEF(16) -SHIFTGRAB_MATCH_64_DEF(17) -SHIFTGRAB_MATCH_64_DEF(18) -SHIFTGRAB_MATCH_64_DEF(19) -SHIFTGRAB_MATCH_64_DEF(20) -SHIFTGRAB_MATCH_64_DEF(21) -SHIFTGRAB_MATCH_64_DEF(22) -SHIFTGRAB_MATCH_64_DEF(23) -SHIFTGRAB_MATCH_64_DEF(24) -SHIFTGRAB_MATCH_64_DEF(25) -SHIFTGRAB_MATCH_64_DEF(26) -SHIFTGRAB_MATCH_64_DEF(27) -SHIFTGRAB_MATCH_64_DEF(28) -SHIFTGRAB_MATCH_64_DEF(29) -SHIFTGRAB_MATCH_64_DEF(30) -SHIFTGRAB_MATCH_64_DEF(31) - -static -const UNUSED u8 * (*shiftgrab_match_funcs_32[])(const u8 *buf, u32 z) = -{ -// skip the first - 0, - &shiftgrabMatch_32_1, - &shiftgrabMatch_32_2, - &shiftgrabMatch_32_3, - &shiftgrabMatch_32_4, - &shiftgrabMatch_32_5, - &shiftgrabMatch_32_6, - &shiftgrabMatch_32_7, - &shiftgrabMatch_32_8, - &shiftgrabMatch_32_9, - &shiftgrabMatch_32_10, - &shiftgrabMatch_32_11, - &shiftgrabMatch_32_12, - &shiftgrabMatch_32_13, - &shiftgrabMatch_32_14, - &shiftgrabMatch_32_15, -}; - -static -const UNUSED u8 * (*shiftgrab_match_funcs_64[])(const u8 *buf, u64a z) = - { -// skip the first - 0, - &shiftgrabMatch_64_1, - &shiftgrabMatch_64_2, - &shiftgrabMatch_64_3, - &shiftgrabMatch_64_4, - &shiftgrabMatch_64_5, - &shiftgrabMatch_64_6, - &shiftgrabMatch_64_7, - &shiftgrabMatch_64_8, - &shiftgrabMatch_64_9, - &shiftgrabMatch_64_10, - &shiftgrabMatch_64_11, - &shiftgrabMatch_64_12, - &shiftgrabMatch_64_13, - &shiftgrabMatch_64_14, - &shiftgrabMatch_64_15, - &shiftgrabMatch_64_16, - &shiftgrabMatch_64_17, - &shiftgrabMatch_64_18, - &shiftgrabMatch_64_19, - &shiftgrabMatch_64_20, - &shiftgrabMatch_64_21, - &shiftgrabMatch_64_22, - &shiftgrabMatch_64_23, - &shiftgrabMatch_64_24, - &shiftgrabMatch_64_25, - &shiftgrabMatch_64_26, - &shiftgrabMatch_64_27, - &shiftgrabMatch_64_28, - &shiftgrabMatch_64_29, - &shiftgrabMatch_64_30, - &shiftgrabMatch_64_31, -}; - -#endif /* MULTIACCEL_SHIFTGRAB_H_ */ diff --git a/src/nfa/multishufti.c b/src/nfa/multishufti.c deleted file mode 100644 index 80a2bcd0..00000000 --- a/src/nfa/multishufti.c +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Shufti: character class acceleration. - * - * Utilises the SSSE3 pshufb shuffle instruction - */ - -#include "config.h" -#include "ue2common.h" -#include "util/arch.h" - -#include "multishufti.h" - -#include "multiaccel_common.h" - -#if !defined(HAVE_AVX2) - -#define MATCH_ALGO long_ -#include "multiaccel_long.h" -#include "multishufti_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO longgrab_ -#include "multiaccel_longgrab.h" -#include "multishufti_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shift_ -#include "multiaccel_shift.h" -#include "multishufti_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shiftgrab_ -#include "multiaccel_shiftgrab.h" -#include "multishufti_sse.h" -#undef MATCH_ALGO - -#define MULTIACCEL_DOUBLE - -#define MATCH_ALGO doubleshift_ -#include "multiaccel_doubleshift.h" -#include "multishufti_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO doubleshiftgrab_ -#include "multiaccel_doubleshiftgrab.h" -#include "multishufti_sse.h" -#undef MATCH_ALGO - -#undef MULTIACCEL_DOUBLE - -#else - -#define MATCH_ALGO long_ -#include "multiaccel_long.h" -#include "multishufti_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO longgrab_ -#include "multiaccel_longgrab.h" -#include "multishufti_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shift_ -#include "multiaccel_shift.h" -#include "multishufti_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shiftgrab_ -#include "multiaccel_shiftgrab.h" -#include "multishufti_avx2.h" -#undef MATCH_ALGO - -#define MULTIACCEL_DOUBLE - -#define MATCH_ALGO doubleshift_ -#include "multiaccel_doubleshift.h" -#include "multishufti_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO doubleshiftgrab_ -#include "multiaccel_doubleshiftgrab.h" -#include "multishufti_avx2.h" -#undef MATCH_ALGO - -#undef MULTIACCEL_DOUBLE - -#endif diff --git a/src/nfa/multishufti.h b/src/nfa/multishufti.h deleted file mode 100644 index af578483..00000000 --- a/src/nfa/multishufti.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Multishufti: multibyte version of Shufti - * - * Utilises the SSSE3 pshufb shuffle instruction - */ - -#ifndef MULTISHUFTI_H -#define MULTISHUFTI_H - -#include "ue2common.h" -#include "util/simd_types.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -const u8 *long_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end, const u8 run_len); - -const u8 *longgrab_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end, const u8 run_len); - -const u8 *shift_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end, const u8 run_len); - -const u8 *shiftgrab_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end, const u8 run_len); - -const u8 *doubleshift_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end, const u8 run_len, - const u8 run2_len); - -const u8 *doubleshiftgrab_shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end, const u8 run_len, - const u8 run2_len); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/nfa/multishufti_avx2.h b/src/nfa/multishufti_avx2.h deleted file mode 100644 index 042f5570..00000000 --- a/src/nfa/multishufti_avx2.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "shufti_common.h" - -#include "ue2common.h" -#include "util/bitutils.h" -#include "util/simd_utils.h" - -static really_inline -const u8 *JOIN(MATCH_ALGO, fwdBlock)(m256 mask_lo, m256 mask_hi, m256 chars, - const u8 *buf, const m256 low4bits, - const m256 zeroes, const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - u32 z = block(mask_lo, mask_hi, chars, low4bits, zeroes); - return (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])(buf, ~z -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); -} - -const u8 *JOIN(MATCH_ALGO, shuftiExec)(m128 mask_lo, m128 mask_hi, - const u8 *buf, - const u8 *buf_end, u8 run_len -#ifdef MULTIACCEL_DOUBLE - , u8 run_len2 -#endif - ) { - assert(buf && buf_end); - assert(buf < buf_end); - - // Slow path for small cases. - if (buf_end - buf < 32) { - return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, - buf, buf_end); - } - - const m256 zeroes = zeroes256(); - const m256 low4bits = set32x8(0xf); - const m256 wide_mask_lo = set2x128(mask_lo); - const m256 wide_mask_hi = set2x128(mask_hi); - const u8 *rv; - - size_t min = (size_t)buf % 32; - assert(buf_end - buf >= 32); - - // Preconditioning: most of the time our buffer won't be aligned. - m256 chars = loadu256(buf); - rv = JOIN(MATCH_ALGO, fwdBlock)(wide_mask_lo, wide_mask_hi, chars, buf, - low4bits, zeroes, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - buf += (32 - min); - - // Unrolling was here, but it wasn't doing anything but taking up space. - // Reroll FTW. - const u8 *last_block = buf_end - 32; - while (buf < last_block) { - m256 lchars = load256(buf); - rv = JOIN(MATCH_ALGO, fwdBlock)(wide_mask_lo, wide_mask_hi, lchars, buf, - low4bits, zeroes, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - buf += 32; - } - - // Use an unaligned load to mop up the last 32 bytes and get an accurate - // picture to buf_end. - assert(buf <= buf_end && buf >= buf_end - 32); - chars = loadu256(buf_end - 32); - rv = JOIN(MATCH_ALGO, fwdBlock)(wide_mask_lo, wide_mask_hi, chars, buf_end - 32, - low4bits, zeroes, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - - return buf_end; -} diff --git a/src/nfa/multishufti_sse.h b/src/nfa/multishufti_sse.h deleted file mode 100644 index 0a9b543e..00000000 --- a/src/nfa/multishufti_sse.h +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "shufti_common.h" - -#include "ue2common.h" -#include "util/bitutils.h" -#include "util/simd_utils.h" - -/* Normal SSSE3 shufti */ - -static really_inline -const u8 *JOIN(MATCH_ALGO, fwdBlock)(m128 mask_lo, m128 mask_hi, m128 chars, - const u8 *buf, const m128 low4bits, - const m128 zeroes, const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - // negate first 16 bits - u32 z = block(mask_lo, mask_hi, chars, low4bits, zeroes) ^ 0xFFFF; - return (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])(buf, z -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); -} - -/* - * 16-byte pipeline, for smaller scans - */ -static -const u8 *JOIN(MATCH_ALGO, shuftiPipeline16)(m128 mask_lo, m128 mask_hi, - const u8 *buf, const u8 *buf_end, - const m128 low4bits, - const m128 zeroes, const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - const u8* ptr, *last_buf; - u32 last_res; - - // pipeline prologue: scan first 16 bytes - m128 data = load128(buf); - u32 z = block(mask_lo, mask_hi, data, low4bits, zeroes) ^ 0xFFFF; - last_buf = buf; - last_res = z; - buf += 16; - - // now, start the pipeline! - assert((size_t)buf % 16 == 0); - for (; buf + 15 < buf_end; buf += 16) { - // scan more data - data = load128(buf); - z = block(mask_lo, mask_hi, data, low4bits, zeroes) ^ 0xFFFF; - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_buf = buf; - last_res = z; - } - assert(buf <= buf_end && buf >= buf_end - 16); - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - return NULL; -} - -/* - * 32-byte pipeline, for bigger scans - */ -static -const u8 *JOIN(MATCH_ALGO, shuftiPipeline32)(m128 mask_lo, m128 mask_hi, - const u8 *buf, const u8 *buf_end, - const m128 low4bits, - const m128 zeroes, const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - const u8* ptr, *last_buf; - u32 res; - - // pipeline prologue: scan first 32 bytes - m128 data1 = load128(buf); - u32 z1 = block(mask_lo, mask_hi, data1, low4bits, zeroes) ^ 0xFFFF; - m128 data2 = load128(buf + 16); - u32 z2 = block(mask_lo, mask_hi, data2, low4bits, zeroes) ^ 0xFFFF; - - // store the results - u32 last_res = z1 | (z2 << 16); - last_buf = buf; - buf += 32; - - - // now, start the pipeline! - assert((size_t)buf % 16 == 0); - for (; buf + 31 < buf_end; buf += 32) { - // scan more data - data1 = load128(buf); - z1 = block(mask_lo, mask_hi, data1, low4bits, zeroes) ^ 0xFFFF; - data2 = load128(buf + 16); - z2 = block(mask_lo, mask_hi, data2, low4bits, zeroes) ^ 0xFFFF; - res = z1 | (z2 << 16); - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_res = res; - last_buf = buf; - } - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - // if we still have some data left, scan it too - for (; buf + 15 < buf_end; buf += 16) { - m128 chars = load128(buf); - ptr = JOIN(MATCH_ALGO, fwdBlock)(mask_lo, mask_hi, chars, buf, - low4bits, zeroes, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - } - assert(buf <= buf_end && buf >= buf_end - 16); - - return NULL; -} - -const u8 *JOIN(MATCH_ALGO, shuftiExec)(m128 mask_lo, m128 mask_hi, - const u8 *buf, - const u8 *buf_end, u8 run_len -#ifdef MULTIACCEL_DOUBLE - , u8 run_len2 -#endif - ) { - assert(buf && buf_end); - assert(buf < buf_end); - - // Slow path for small cases. - if (buf_end - buf < 16) { - return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, - buf, buf_end); - } - - const m128 zeroes = zeroes128(); - const m128 low4bits = _mm_set1_epi8(0xf); - const u8 *rv; - - size_t min = (size_t)buf % 16; - assert(buf_end - buf >= 16); - - // Preconditioning: most of the time our buffer won't be aligned. - m128 chars = loadu128(buf); - rv = JOIN(MATCH_ALGO, fwdBlock)(mask_lo, mask_hi, chars, buf, - low4bits, zeroes, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - buf += (16 - min); - - // if we have enough data, run bigger pipeline; otherwise run smaller one - if (buf_end - buf >= 128) { - rv = JOIN(MATCH_ALGO, shuftiPipeline32)(mask_lo, mask_hi, - buf, buf_end, low4bits, zeroes, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(rv)) { - return rv; - } - } else if (buf_end - buf >= 16){ - rv = JOIN(MATCH_ALGO, shuftiPipeline16)(mask_lo, mask_hi, - buf, buf_end, low4bits, zeroes, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(rv)) { - return rv; - } - } - - // Use an unaligned load to mop up the last 16 bytes and get an accurate - // picture to buf_end. - chars = loadu128(buf_end - 16); - rv = JOIN(MATCH_ALGO, fwdBlock)(mask_lo, mask_hi, chars, - buf_end - 16, low4bits, zeroes, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - - return buf_end; -} diff --git a/src/nfa/multitruffle.c b/src/nfa/multitruffle.c deleted file mode 100644 index c333414c..00000000 --- a/src/nfa/multitruffle.c +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "ue2common.h" -#include "util/arch.h" - -#include "multitruffle.h" -#include "util/bitutils.h" -#include "util/simd_utils.h" - -#include "multiaccel_common.h" - -#if !defined(HAVE_AVX2) - -#define MATCH_ALGO long_ -#include "multiaccel_long.h" -#include "multitruffle_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO longgrab_ -#include "multiaccel_longgrab.h" -#include "multitruffle_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shift_ -#include "multiaccel_shift.h" -#include "multitruffle_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shiftgrab_ -#include "multiaccel_shiftgrab.h" -#include "multitruffle_sse.h" -#undef MATCH_ALGO - -#define MULTIACCEL_DOUBLE - -#define MATCH_ALGO doubleshift_ -#include "multiaccel_doubleshift.h" -#include "multitruffle_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO doubleshiftgrab_ -#include "multiaccel_doubleshiftgrab.h" -#include "multitruffle_sse.h" -#undef MATCH_ALGO - -#undef MULTIACCEL_DOUBLE - -#else - -#define MATCH_ALGO long_ -#include "multiaccel_long.h" -#include "multitruffle_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO longgrab_ -#include "multiaccel_longgrab.h" -#include "multitruffle_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shift_ -#include "multiaccel_shift.h" -#include "multitruffle_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shiftgrab_ -#include "multiaccel_shiftgrab.h" -#include "multitruffle_avx2.h" -#undef MATCH_ALGO - -#define MULTIACCEL_DOUBLE - -#define MATCH_ALGO doubleshift_ -#include "multiaccel_doubleshift.h" -#include "multitruffle_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO doubleshiftgrab_ -#include "multiaccel_doubleshiftgrab.h" -#include "multitruffle_avx2.h" -#undef MATCH_ALGO - -#undef MULTIACCEL_DOUBLE - -#endif diff --git a/src/nfa/multitruffle.h b/src/nfa/multitruffle.h deleted file mode 100644 index 8703b5ca..00000000 --- a/src/nfa/multitruffle.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTITRUFFLE_H -#define MULTITRUFFLE_H - -/** \file - * \brief Multitruffle: multibyte version of Truffle. - * - * Utilises the SSSE3 pshufb shuffle instruction - */ - -#include "util/simd_types.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -const u8 *long_truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end, const u8 run_len); - -const u8 *longgrab_truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end, const u8 run_len); - -const u8 *shift_truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end, const u8 run_len); - -const u8 *shiftgrab_truffleExec(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, const u8 *buf, - const u8 *buf_end, const u8 run_len); - -const u8 *doubleshift_truffleExec(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, const u8 *buf, - const u8 *buf_end, const u8 run_len, - const u8 run2_len); - -const u8 *doubleshiftgrab_truffleExec(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, const u8 *buf, - const u8 *buf_end, const u8 run_len, - const u8 run2_len); - -#ifdef __cplusplus -} -#endif - - -#endif /* MULTITRUFFLE_H */ diff --git a/src/nfa/multitruffle_avx2.h b/src/nfa/multitruffle_avx2.h deleted file mode 100644 index e52db5fc..00000000 --- a/src/nfa/multitruffle_avx2.h +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * Matches a byte in a charclass using three shuffles - */ - -#include "config.h" -#include "ue2common.h" -#include "multiaccel_common.h" - -/* - * include "block" function - */ -#include "truffle_common.h" - -/* - * single-byte truffle fwd match function, should only be defined when not - * compiling multiaccel - */ -static really_inline -const u8 *JOIN(MATCH_ALGO, fwdBlock)(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, - m256 v, const u8 *buf, const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - u64a z = (u64a) block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v); - return (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len])(buf, z ^ 0xFFFFFFFF -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); -} - -const u8 *JOIN(MATCH_ALGO, truffleExec)(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end, const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - DEBUG_PRINTF("run_len %zu\n", buf_end - buf); - const m256 wide_clear = set2x128(shuf_mask_lo_highclear); - const m256 wide_set = set2x128(shuf_mask_lo_highset); - - assert(buf && buf_end); - assert(buf < buf_end); - const u8 *rv; - - if (buf_end - buf < 32) { - return truffleMini(wide_clear, wide_set, buf, buf_end); - } - - size_t min = (size_t)buf % 32; - assert(buf_end - buf >= 32); - - // Preconditioning: most of the time our buffer won't be aligned. - m256 chars = loadu256(buf); - rv = JOIN(MATCH_ALGO, fwdBlock)(wide_clear, wide_set, chars, buf, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - buf += (32 - min); - - const u8 *last_block = buf_end - 32; - while (buf < last_block) { - m256 lchars = load256(buf); - rv = JOIN(MATCH_ALGO, fwdBlock)(wide_clear, wide_set, lchars, - buf, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - buf += 32; - } - - // Use an unaligned load to mop up the last 32 bytes and get an accurate - // picture to buf_end. - assert(buf <= buf_end && buf >= buf_end - 32); - chars = loadu256(buf_end - 32); - rv = JOIN(MATCH_ALGO, fwdBlock)(wide_clear, wide_set, chars, - buf_end - 32, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - - return buf_end; -} diff --git a/src/nfa/multitruffle_sse.h b/src/nfa/multitruffle_sse.h deleted file mode 100644 index b287e4fc..00000000 --- a/src/nfa/multitruffle_sse.h +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "ue2common.h" -#include "multiaccel_common.h" - -/* - * include "block" function - */ -#include "truffle_common.h" - -/* - * single-byte truffle fwd match function, should only be defined when not - * compiling multiaccel - */ - -static really_inline -const u8 *JOIN(MATCH_ALGO, fwdBlock)(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, - m128 v, const u8 *buf, const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v) ^ 0xFFFF; - return (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len])(buf, z -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); -} - -/* - * 16-byte pipeline, for smaller scans - */ -static -const u8 *JOIN(MATCH_ALGO, trufflePipeline16)(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - const u8* ptr, *last_buf; - u32 last_res; - - // pipeline prologue: scan first 16 bytes - m128 data = load128(buf); - u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data) ^ 0xFFFF; - last_buf = buf; - last_res = z; - buf += 16; - - // now, start the pipeline! - assert((size_t)buf % 16 == 0); - for (; buf + 15 < buf_end; buf += 16) { - // scan more data - data = load128(buf); - z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data) ^ 0xFFFF; - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_buf = buf; - last_res = z; - } - assert(buf <= buf_end && buf >= buf_end - 16); - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - return NULL; -} - -/* - * 32-byte pipeline, for bigger scans - */ -static -const u8 *JOIN(MATCH_ALGO, trufflePipeline32)(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - const u8* ptr, *last_buf; - u32 res; - - // pipeline prologue: scan first 32 bytes - m128 data1 = load128(buf); - u32 z1 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data1) ^ 0xFFFF; - m128 data2 = load128(buf + 16); - u32 z2 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data2) ^ 0xFFFF; - - // store the results - u32 last_res = z1 | (z2 << 16); - last_buf = buf; - buf += 32; - - - // now, start the pipeline! - assert((size_t)buf % 16 == 0); - for (; buf + 31 < buf_end; buf += 32) { - // scan more data - data1 = load128(buf); - z1 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data1) ^ 0xFFFF; - data2 = load128(buf + 16); - z2 = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, data2) ^ 0xFFFF; - res = z1 | (z2 << 16); - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_res = res; - last_buf = buf; - } - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - // if we still have some data left, scan it too - for (; buf + 15 < buf_end; buf += 16) { - m128 chars = load128(buf); - ptr = JOIN(MATCH_ALGO, fwdBlock)(shuf_mask_lo_highclear, shuf_mask_lo_highset, - chars, buf, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - } - assert(buf <= buf_end && buf >= buf_end - 16); - - return NULL; -} - -const u8 *JOIN(MATCH_ALGO, truffleExec)(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end, const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - DEBUG_PRINTF("run_len %zu\n", buf_end - buf); - - assert(buf && buf_end); - assert(buf < buf_end); - const u8 *rv; - - if (buf_end - buf < 16) { - return truffleMini(shuf_mask_lo_highclear, shuf_mask_lo_highset, buf, buf_end); - } - - size_t min = (size_t)buf % 16; - assert(buf_end - buf >= 16); - - // Preconditioning: most of the time our buffer won't be aligned. - m128 chars = loadu128(buf); - rv = JOIN(MATCH_ALGO, fwdBlock)(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars, buf, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - buf += (16 - min); - - // if we have enough data, run bigger pipeline; otherwise run smaller one - if (buf_end - buf >= 128) { - rv = JOIN(MATCH_ALGO, trufflePipeline32)(shuf_mask_lo_highclear, shuf_mask_lo_highset, - buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(rv)) { - return rv; - } - } else if (buf_end - buf >= 16){ - rv = JOIN(MATCH_ALGO, trufflePipeline16)(shuf_mask_lo_highclear, shuf_mask_lo_highset, - buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(rv)) { - return rv; - } - } - - // Use an unaligned load to mop up the last 16 bytes and get an accurate - // picture to buf_end. - chars = loadu128(buf_end - 16); - rv = JOIN(MATCH_ALGO, fwdBlock)(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars, - buf_end - 16, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (rv) { - return rv; - } - - return buf_end; -} diff --git a/src/nfa/multivermicelli.c b/src/nfa/multivermicelli.c deleted file mode 100644 index fe6cbdb5..00000000 --- a/src/nfa/multivermicelli.c +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "ue2common.h" -#include "util/arch.h" - -#include "multivermicelli.h" - -#include "multiaccel_common.h" - -#if !defined(HAVE_AVX2) - -#define MATCH_ALGO long_ -#include "multiaccel_long.h" -#include "multivermicelli_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO longgrab_ -#include "multiaccel_longgrab.h" -#include "multivermicelli_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shift_ -#include "multiaccel_shift.h" -#include "multivermicelli_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shiftgrab_ -#include "multiaccel_shiftgrab.h" -#include "multivermicelli_sse.h" -#undef MATCH_ALGO - -#define MULTIACCEL_DOUBLE - -#define MATCH_ALGO doubleshift_ -#include "multiaccel_doubleshift.h" -#include "multivermicelli_sse.h" -#undef MATCH_ALGO - -#define MATCH_ALGO doubleshiftgrab_ -#include "multiaccel_doubleshiftgrab.h" -#include "multivermicelli_sse.h" -#undef MATCH_ALGO - -#undef MULTIACCEL_DOUBLE - -#else - -#define MATCH_ALGO long_ -#include "multiaccel_long.h" -#include "multivermicelli_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO longgrab_ -#include "multiaccel_longgrab.h" -#include "multivermicelli_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shift_ -#include "multiaccel_shift.h" -#include "multivermicelli_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO shiftgrab_ -#include "multiaccel_shiftgrab.h" -#include "multivermicelli_avx2.h" -#undef MATCH_ALGO - -#define MULTIACCEL_DOUBLE - -#define MATCH_ALGO doubleshift_ -#include "multiaccel_doubleshift.h" -#include "multivermicelli_avx2.h" -#undef MATCH_ALGO - -#define MATCH_ALGO doubleshiftgrab_ -#include "multiaccel_doubleshiftgrab.h" -#include "multivermicelli_avx2.h" -#undef MATCH_ALGO - -#undef MULTIACCEL_DOUBLE - -#endif diff --git a/src/nfa/multivermicelli.h b/src/nfa/multivermicelli.h deleted file mode 100644 index 55f9b1f2..00000000 --- a/src/nfa/multivermicelli.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MULTIVERMICELLI_H_ -#define MULTIVERMICELLI_H_ - -#ifdef __cplusplus -extern "C" -{ -#endif - -const u8 *long_vermicelliExec(char c, char nocase, const u8 *buf, - const u8 *buf_end, const u8 run_len); - -const u8 *longgrab_vermicelliExec(char c, char nocase, const u8 *buf, - const u8 *buf_end, const u8 run_len); - -const u8 *shift_vermicelliExec(char c, char nocase, const u8 *buf, - const u8 *buf_end, const u8 run_len); - -const u8 *shiftgrab_vermicelliExec(char c, char nocase, const u8 *buf, - const u8 *buf_end, const u8 run_len); - -const u8 *doubleshift_vermicelliExec(char c, char nocase, const u8 *buf, - const u8 *buf_end, const u8 run_len, - const u8 run2_len); - -const u8 *doubleshiftgrab_vermicelliExec(char c, char nocase, const u8 *buf, - const u8 *buf_end, const u8 run_len, - const u8 run2_len); - -#ifdef __cplusplus -} -#endif - - -#endif /* MULTIVERMICELLI_H_ */ diff --git a/src/nfa/multivermicelli_avx2.h b/src/nfa/multivermicelli_avx2.h deleted file mode 100644 index 9081aa3f..00000000 --- a/src/nfa/multivermicelli_avx2.h +++ /dev/null @@ -1,283 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "util/bitutils.h" -#include "util/simd_utils.h" -#include "util/unaligned.h" - -#include "multiaccel_common.h" - -static really_inline -const u8 *JOIN(MATCH_ALGO, vermUnalignNocase)(m256 chars, - const u8 *buf, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - m256 casemask = set32x8(CASE_CLEAR); - const u8 *ptr; - m256 data = loadu256(buf); - u32 z = movemask256(eq256(chars, and256(casemask, data))); - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (buf, z -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - return NULL; -} - -static really_inline -const u8 *JOIN(MATCH_ALGO, vermUnalign)(m256 chars, - const u8 *buf, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - const u8 *ptr; - - m256 data = loadu256(buf); - u32 z = movemask256(eq256(chars, data)); - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (buf, z -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - return NULL; -} - -/* - * 32-byte pipeline - */ -static really_inline -const u8 *JOIN(MATCH_ALGO, vermPipeline)(m256 chars, - const u8 *buf, - const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - const u8* ptr, *last_buf; - u32 last_res; - - // pipeline prologue: scan first 32 bytes - m256 data = load256(buf); - u32 z = movemask256(eq256(chars, data)); - last_res = z; - last_buf = buf; - buf += 32; - - // now, start the pipeline! - assert((size_t)buf % 32 == 0); - for (; buf + 31 < buf_end; buf += 32) { - // scan more data - data = load256(buf); - z = movemask256(eq256(chars, data)); - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_buf = buf; - last_res = z; - } - assert(buf <= buf_end && buf >= buf_end - 32); - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - return NULL; -} - -/* - * 32-byte caseless pipeline - */ -static really_inline -const u8 *JOIN(MATCH_ALGO, vermPipelineNocase)(m256 chars, - const u8 *buf, - const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - m256 casemask = set32x8(CASE_CLEAR); - const u8* ptr, *last_buf; - u32 last_res; - - // pipeline prologue: scan first 32 bytes - m256 data = load256(buf); - u32 z = movemask256(eq256(chars, and256(casemask, data))); - last_res = z; - last_buf = buf; - buf += 32; - - - // now, start the pipeline! - assert((size_t)buf % 32 == 0); - for (; buf + 31 < buf_end; buf += 32) { - // scan more data - data = load256(buf); - z = movemask256(eq256(chars, and256(casemask, data))); - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_buf = buf; - last_res = z; - } - assert(buf <= buf_end && buf >= buf_end - 32); - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - return NULL; -} - -const u8 *JOIN(MATCH_ALGO, vermicelliExec)(char c, char nocase, - const u8 *buf, - const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - DEBUG_PRINTF("verm scan %s\\x%02hhx over %zu bytes\n", - nocase ? "nocase " : "", c, (size_t)(buf_end - buf)); - assert(buf < buf_end); - - const u8 *ptr; - - // Handle small scans. - if (buf_end - buf < 32) { - for (; buf < buf_end; buf++) { - char cur = (char)*buf; - if (nocase) { - cur &= CASE_CLEAR; - } - if (cur == c) { - break; - } - } - return buf; - } - - m256 chars = set32x8(c); /* nocase already uppercase */ - - uintptr_t min = (uintptr_t)buf % 32; - - if (min) { - ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars, - buf, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ) : JOIN(MATCH_ALGO, vermUnalign)(chars, - buf, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - buf += 32 - min; - } - - if (buf_end - buf >= 32){ - ptr = nocase ? JOIN(MATCH_ALGO, vermPipelineNocase)(chars, - buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ) : JOIN(MATCH_ALGO, vermPipeline)(chars, - buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - } - - // final unaligned scan - ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars, - buf_end - 32, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ) : JOIN(MATCH_ALGO, vermUnalign)(chars, - buf_end - 32, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - - // run our pipeline - return ptr ? ptr : buf_end; -} diff --git a/src/nfa/multivermicelli_sse.h b/src/nfa/multivermicelli_sse.h deleted file mode 100644 index cdacd2c4..00000000 --- a/src/nfa/multivermicelli_sse.h +++ /dev/null @@ -1,452 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "util/bitutils.h" -#include "util/simd_utils.h" -#include "util/unaligned.h" - -#define VERM_BOUNDARY 16 -#define VERM_TYPE m128 -#define VERM_SET_FN set16x8 - -#include "multiaccel_common.h" - -static really_inline -const u8 *JOIN(MATCH_ALGO, vermUnalignNocase)(m128 chars, - const u8 *buf, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - m128 casemask = set16x8(CASE_CLEAR); - const u8 *ptr; - m128 data = loadu128(buf); - u32 z = movemask128(eq128(chars, and128(casemask, data))); - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (buf, z -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - return NULL; -} - -static really_inline -const u8 *JOIN(MATCH_ALGO, vermUnalign)(m128 chars, - const u8 *buf, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - const u8 *ptr; - - m128 data = loadu128(buf); - u32 z = movemask128(eq128(chars, data)); - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (buf, z -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - return NULL; -} - -/* - * 16-byte pipeline, for smaller scans - */ -static -const u8 *JOIN(MATCH_ALGO, vermPipeline16)(m128 chars, - const u8 *buf, - const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - const u8* ptr, *last_buf; - u32 last_res; - - // pipeline prologue: scan first 16 bytes - m128 data = load128(buf); - u32 z = movemask128(eq128(chars, data)); - last_buf = buf; - last_res = z; - buf += 16; - - // now, start the pipeline! - assert((size_t)buf % 16 == 0); - for (; buf + 15 < buf_end; buf += 16) { - // scan more data - data = load128(buf); - z = movemask128(eq128(chars, data)); - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_buf = buf; - last_res = z; - } - assert(buf <= buf_end && buf >= buf_end - 16); - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - return NULL; -} - -/* - * 16-byte pipeline, for smaller scans - */ -static -const u8 *JOIN(MATCH_ALGO, vermPipeline16Nocase)(m128 chars, - const u8 *buf, - const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - m128 casemask = set16x8(CASE_CLEAR); - const u8* ptr, *last_buf; - u32 last_res; - - // pipeline prologue: scan first 16 bytes - m128 data = load128(buf); - u32 z = movemask128(eq128(chars, and128(casemask, data))); - last_buf = buf; - last_res = z; - buf += 16; - - // now, start the pipeline! - assert((size_t)buf % 16 == 0); - for (; buf + 15 < buf_end; buf += 16) { - // scan more data - data = load128(buf); - z = movemask128(eq128(chars, and128(casemask, data))); - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_buf = buf; - last_res = z; - } - assert(buf <= buf_end && buf >= buf_end - 16); - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 32)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - return NULL; -} - -/* - * 32-byte pipeline, for bigger scans - */ -static -const u8 *JOIN(MATCH_ALGO, vermPipeline32)(m128 chars, - const u8 *buf, - const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - const u8* ptr, *last_buf; - u32 res; - - // pipeline prologue: scan first 32 bytes - m128 data1 = load128(buf); - u32 z1 = movemask128(eq128(chars, data1)); - m128 data2 = load128(buf + 16); - u32 z2 = movemask128(eq128(chars, data2)); - - // store the results - u32 last_res = z1 | (z2 << VERM_BOUNDARY); - last_buf = buf; - buf += 32; - - - // now, start the pipeline! - assert((size_t)buf % 16 == 0); - for (; buf + 31 < buf_end; buf += 32) { - // scan more data - data1 = load128(buf); - z1 = movemask128(eq128(chars, data1)); - data2 = load128(buf + 16); - z2 = movemask128(eq128(chars, data2)); - res = z1 | (z2 << 16); - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_res = res; - last_buf = buf; - } - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - // if we still have some data left, scan it too - if (buf + 15 < buf_end) { - return JOIN(MATCH_ALGO, vermPipeline16)(chars, buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - } - assert(buf <= buf_end && buf >= buf_end - 16); - - return NULL; -} - -/* - * 32-byte caseless pipeline, for bigger scans - */ -static -const u8 *JOIN(MATCH_ALGO, vermPipeline32Nocase)(m128 chars, - const u8 *buf, - const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - m128 casemask = set16x8(CASE_CLEAR); - const u8* ptr, *last_buf; - u32 last_res; - - // pipeline prologue: scan first 32 bytes - m128 data1 = load128(buf); - u32 z1 = movemask128(eq128(chars, and128(casemask, data1))); - m128 data2 = load128(buf + 16); - u32 z2 = movemask128(eq128(chars, and128(casemask, data2))); - u32 z = z1 | (z2 << VERM_BOUNDARY); - - last_res = z; - last_buf = buf; - buf += 32; - - // now, start the pipeline! - assert((size_t)buf % 16 == 0); - for (; buf + 31 < buf_end; buf += 32) { - // scan more data - data1 = load128(buf); - z1 = movemask128(eq128(chars, and128(casemask, data1))); - data2 = load128(buf + 16); - z2 = movemask128(eq128(chars, and128(casemask, data2))); - z = z1 | (z2 << 16); - - // do a comparison on previous result - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - last_res = z; - last_buf = buf; - } - - // epilogue: compare final results - ptr = (*JOIN4(MATCH_ALGO, match_funcs, _, 64)[run_len]) - (last_buf, last_res -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - - // if we still have some data left, scan it too - if (buf + 15 < buf_end) { - return JOIN(MATCH_ALGO, vermPipeline16Nocase)(chars, buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - } - assert(buf <= buf_end && buf >= buf_end - 16); - - return NULL; -} - -const u8 *JOIN(MATCH_ALGO, vermicelliExec)(char c, char nocase, - const u8 *buf, - const u8 *buf_end, - const u8 run_len -#ifdef MULTIACCEL_DOUBLE - , const u8 run_len2 -#endif - ) { - DEBUG_PRINTF("verm scan %s\\x%02hhx over %zu bytes\n", - nocase ? "nocase " : "", c, (size_t)(buf_end - buf)); - assert(buf < buf_end); - - const u8 *ptr; - - // Handle small scans. - if (buf_end - buf < VERM_BOUNDARY) { - for (; buf < buf_end; buf++) { - char cur = (char)*buf; - if (nocase) { - cur &= CASE_CLEAR; - } - if (cur == c) { - break; - } - } - return buf; - } - - VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */ - - uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY; - - if (min) { - ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars, - buf, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ) : JOIN(MATCH_ALGO, vermUnalign)(chars, - buf, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - buf += VERM_BOUNDARY - min; - } - - // if we have enough data, run bigger pipeline; otherwise run smaller one - if (buf_end - buf >= 128) { - ptr = nocase ? JOIN(MATCH_ALGO, vermPipeline32Nocase)(chars, - buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ) : JOIN(MATCH_ALGO, vermPipeline32)(chars, - buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - } else if (buf_end - buf >= 16){ - ptr = nocase ? JOIN(MATCH_ALGO, vermPipeline16Nocase)(chars, - buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ) : JOIN(MATCH_ALGO, vermPipeline16)(chars, - buf, buf_end, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - if (unlikely(ptr)) { - return ptr; - } - } - - // final unaligned scan - ptr = nocase ? JOIN(MATCH_ALGO, vermUnalignNocase)(chars, - buf_end - VERM_BOUNDARY, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ) : JOIN(MATCH_ALGO, vermUnalign)(chars, - buf_end - VERM_BOUNDARY, run_len -#ifdef MULTIACCEL_DOUBLE - , run_len2 -#endif - ); - - // run our pipeline - return ptr ? ptr : buf_end; -} diff --git a/src/nfa/shufti.c b/src/nfa/shufti.c index f7b4403e..dda5060f 100644 --- a/src/nfa/shufti.c +++ b/src/nfa/shufti.c @@ -39,7 +39,52 @@ #include "util/simd_utils.h" #include "util/unaligned.h" -#include "shufti_common.h" +#ifdef DEBUG +#include + +#define DUMP_MSK(_t) \ +static UNUSED \ +void dumpMsk##_t(m##_t msk) { \ + u8 * mskAsU8 = (u8 *)&msk; \ + for (unsigned i = 0; i < sizeof(msk); i++) { \ + u8 c = mskAsU8[i]; \ + for (int j = 0; j < 8; j++) { \ + if ((c >> (7-j)) & 0x1) \ + printf("1"); \ + else \ + printf("0"); \ + } \ + printf(" "); \ + } \ +} \ +static UNUSED \ +void dumpMsk##_t##AsChars(m##_t msk) { \ + u8 * mskAsU8 = (u8 *)&msk; \ + for (unsigned i = 0; i < sizeof(msk); i++) { \ + u8 c = mskAsU8[i]; \ + if (isprint(c)) \ + printf("%c",c); \ + else \ + printf("."); \ + } \ +} + +#endif + +/** \brief Naive byte-by-byte implementation. */ +static really_inline +const u8 *shuftiFwdSlow(const u8 *lo, const u8 *hi, const u8 *buf, + const u8 *buf_end) { + assert(buf < buf_end); + + for (; buf < buf_end; ++buf) { + u8 c = *buf; + if (lo[c & 0xf] & hi[c >> 4]) { + break; + } + } + return buf; +} /** \brief Naive byte-by-byte implementation. */ static really_inline @@ -59,6 +104,30 @@ const u8 *shuftiRevSlow(const u8 *lo, const u8 *hi, const u8 *buf, #if !defined(HAVE_AVX2) /* Normal SSSE3 shufti */ +#ifdef DEBUG +DUMP_MSK(128) +#endif + +#define GET_LO_4(chars) and128(chars, low4bits) +#define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4) + +static really_inline +u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits, + const m128 compare) { + m128 c_lo = pshufb(mask_lo, GET_LO_4(chars)); + m128 c_hi = pshufb(mask_hi, GET_HI_4(chars)); + m128 t = and128(c_lo, c_hi); + +#ifdef DEBUG + DEBUG_PRINTF(" chars: "); dumpMsk128AsChars(chars); printf("\n"); + DEBUG_PRINTF(" char: "); dumpMsk128(chars); printf("\n"); + DEBUG_PRINTF(" c_lo: "); dumpMsk128(c_lo); printf("\n"); + DEBUG_PRINTF(" c_hi: "); dumpMsk128(c_hi); printf("\n"); + DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n"); +#endif + return movemask128(eq128(t, compare)); +} + static really_inline const u8 *firstMatch(const u8 *buf, u32 z) { if (unlikely(z != 0xffff)) { @@ -293,6 +362,31 @@ const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi, #else // AVX2 - 256 wide shuftis +#ifdef DEBUG +DUMP_MSK(256) +#endif + +#define GET_LO_4(chars) and256(chars, low4bits) +#define GET_HI_4(chars) rshift64_m256(andnot256(low4bits, chars), 4) + +static really_inline +u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits, + const m256 compare) { + m256 c_lo = vpshufb(mask_lo, GET_LO_4(chars)); + m256 c_hi = vpshufb(mask_hi, GET_HI_4(chars)); + m256 t = and256(c_lo, c_hi); + +#ifdef DEBUG + DEBUG_PRINTF(" chars: "); dumpMsk256AsChars(chars); printf("\n"); + DEBUG_PRINTF(" char: "); dumpMsk256(chars); printf("\n"); + DEBUG_PRINTF(" c_lo: "); dumpMsk256(c_lo); printf("\n"); + DEBUG_PRINTF(" c_hi: "); dumpMsk256(c_hi); printf("\n"); + DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n"); +#endif + + return movemask256(eq256(t, compare)); +} + static really_inline const u8 *firstMatch(const u8 *buf, u32 z) { if (unlikely(z != 0xffffffff)) { diff --git a/src/nfa/shufti_common.h b/src/nfa/shufti_common.h deleted file mode 100644 index 7048a8b1..00000000 --- a/src/nfa/shufti_common.h +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef SHUFTI_COMMON_H_ -#define SHUFTI_COMMON_H_ - -#include "ue2common.h" - -#include "util/arch.h" -#include "util/bitutils.h" -#include "util/simd_utils.h" -#include "util/unaligned.h" - -/* - * Common stuff for all versions of shufti (single, multi and multidouble) - */ - -/** \brief Naive byte-by-byte implementation. */ -static really_inline -const u8 *shuftiFwdSlow(const u8 *lo, const u8 *hi, const u8 *buf, - const u8 *buf_end) { - assert(buf < buf_end); - - for (; buf < buf_end; ++buf) { - u8 c = *buf; - if (lo[c & 0xf] & hi[c >> 4]) { - break; - } - } - return buf; -} - -#ifdef DEBUG -#include - -#define DUMP_MSK(_t) \ -static UNUSED \ -void dumpMsk##_t(m##_t msk) { \ - u8 * mskAsU8 = (u8 *)&msk; \ - for (unsigned i = 0; i < sizeof(msk); i++) { \ - u8 c = mskAsU8[i]; \ - for (int j = 0; j < 8; j++) { \ - if ((c >> (7-j)) & 0x1) \ - printf("1"); \ - else \ - printf("0"); \ - } \ - printf(" "); \ - } \ -} \ -static UNUSED \ -void dumpMsk##_t##AsChars(m##_t msk) { \ - u8 * mskAsU8 = (u8 *)&msk; \ - for (unsigned i = 0; i < sizeof(msk); i++) { \ - u8 c = mskAsU8[i]; \ - if (isprint(c)) \ - printf("%c",c); \ - else \ - printf("."); \ - } \ -} - -#endif - -#if !defined(HAVE_AVX2) - -#ifdef DEBUG -DUMP_MSK(128) -#endif - -#define GET_LO_4(chars) and128(chars, low4bits) -#define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4) - -static really_inline -u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits, - const m128 compare) { - m128 c_lo = pshufb(mask_lo, GET_LO_4(chars)); - m128 c_hi = pshufb(mask_hi, GET_HI_4(chars)); - m128 t = and128(c_lo, c_hi); - -#ifdef DEBUG - DEBUG_PRINTF(" chars: "); dumpMsk128AsChars(chars); printf("\n"); - DEBUG_PRINTF(" char: "); dumpMsk128(chars); printf("\n"); - DEBUG_PRINTF(" c_lo: "); dumpMsk128(c_lo); printf("\n"); - DEBUG_PRINTF(" c_hi: "); dumpMsk128(c_hi); printf("\n"); - DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n"); -#endif - return movemask128(eq128(t, compare)); -} - -#else - -#ifdef DEBUG -DUMP_MSK(256) -#endif - -#define GET_LO_4(chars) and256(chars, low4bits) -#define GET_HI_4(chars) rshift64_m256(andnot256(low4bits, chars), 4) - -static really_inline -u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits, - const m256 compare) { - m256 c_lo = vpshufb(mask_lo, GET_LO_4(chars)); - m256 c_hi = vpshufb(mask_hi, GET_HI_4(chars)); - m256 t = and256(c_lo, c_hi); - -#ifdef DEBUG - DEBUG_PRINTF(" chars: "); dumpMsk256AsChars(chars); printf("\n"); - DEBUG_PRINTF(" char: "); dumpMsk256(chars); printf("\n"); - DEBUG_PRINTF(" c_lo: "); dumpMsk256(c_lo); printf("\n"); - DEBUG_PRINTF(" c_hi: "); dumpMsk256(c_hi); printf("\n"); - DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n"); -#endif - - return movemask256(eq256(t, compare)); -} - -#endif - - -#endif /* SHUFTI_COMMON_H_ */ diff --git a/src/nfa/truffle.c b/src/nfa/truffle.c index 6d82f8e1..331ae6d6 100644 --- a/src/nfa/truffle.c +++ b/src/nfa/truffle.c @@ -37,8 +37,6 @@ #include "util/bitutils.h" #include "util/simd_utils.h" -#include "truffle_common.h" - #if !defined(HAVE_AVX2) static really_inline @@ -52,6 +50,57 @@ const u8 *lastMatch(const u8 *buf, u32 z) { return NULL; // no match } +static really_inline +const u8 *firstMatch(const u8 *buf, u32 z) { + if (unlikely(z != 0xffff)) { + u32 pos = ctz32(~z & 0xffff); + assert(pos < 16); + return buf + pos; + } + + return NULL; // no match +} + +static really_inline +u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) { + + m128 highconst = _mm_set1_epi8(0x80); + m128 shuf_mask_hi = _mm_set1_epi64x(0x8040201008040201); + + // and now do the real work + m128 shuf1 = pshufb(shuf_mask_lo_highclear, v); + m128 t1 = xor128(v, highconst); + m128 shuf2 = pshufb(shuf_mask_lo_highset, t1); + m128 t2 = andnot128(highconst, rshift64_m128(v, 4)); + m128 shuf3 = pshufb(shuf_mask_hi, t2); + m128 tmp = and128(or128(shuf1, shuf2), shuf3); + m128 tmp2 = eq128(tmp, zeroes128()); + u32 z = movemask128(tmp2); + + return z; +} + +static +const u8 *truffleMini(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { + uintptr_t len = buf_end - buf; + assert(len < 16); + + m128 chars = zeroes128(); + memcpy(&chars, buf, len); + + u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); + // can't be these bytes in z + u32 mask = (0xffff >> (16 - len)) ^ 0xffff; + const u8 *rv = firstMatch(buf, z | mask); + + if (rv) { + return rv; + } else { + return buf_end; + } +} + static really_inline const u8 *fwdBlock(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v, const u8 *buf) { @@ -125,7 +174,7 @@ const u8 *truffleRevMini(m128 shuf_mask_lo_highclear, m128 chars = zeroes128(); memcpy(&chars, buf, len); - u32 mask = (0xFFFF >> (16 - len)) ^ 0xFFFF; + u32 mask = (0xffff >> (16 - len)) ^ 0xffff; u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); const u8 *rv = lastMatch(buf, z | mask); @@ -184,6 +233,8 @@ const u8 *rtruffleExec(m128 shuf_mask_lo_highclear, #else +// AVX2 + static really_inline const u8 *lastMatch(const u8 *buf, u32 z) { if (unlikely(z != 0xffffffff)) { @@ -195,6 +246,57 @@ const u8 *lastMatch(const u8 *buf, u32 z) { return NULL; // no match } +static really_inline +const u8 *firstMatch(const u8 *buf, u32 z) { + if (unlikely(z != 0xffffffff)) { + u32 pos = ctz32(~z); + assert(pos < 32); + return buf + pos; + } + + return NULL; // no match +} + +static really_inline +u32 block(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, m256 v) { + + m256 highconst = _mm256_set1_epi8(0x80); + m256 shuf_mask_hi = _mm256_set1_epi64x(0x8040201008040201); + + // and now do the real work + m256 shuf1 = vpshufb(shuf_mask_lo_highclear, v); + m256 t1 = xor256(v, highconst); + m256 shuf2 = vpshufb(shuf_mask_lo_highset, t1); + m256 t2 = andnot256(highconst, rshift64_m256(v, 4)); + m256 shuf3 = vpshufb(shuf_mask_hi, t2); + m256 tmp = and256(or256(shuf1, shuf2), shuf3); + m256 tmp2 = eq256(tmp, zeroes256()); + u32 z = movemask256(tmp2); + + return z; +} + +static +const u8 *truffleMini(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { + uintptr_t len = buf_end - buf; + assert(len < 32); + + m256 chars = zeroes256(); + memcpy(&chars, buf, len); + + u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); + // can't be these bytes in z + u32 mask = (0xffffffff >> (32 - len)) ^ 0xffffffff; + const u8 *rv = firstMatch(buf, z | mask); + + if (rv) { + return rv; + } else { + return buf_end; + } +} + static really_inline const u8 *fwdBlock(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, m256 v, const u8 *buf) { @@ -266,7 +368,7 @@ const u8 *truffleRevMini(m256 shuf_mask_lo_highclear, m256 chars = zeroes256(); memcpy(&chars, buf, len); - u32 mask = (0xFFFFFFFF >> (32 - len)) ^ 0xFFFFFFFF; + u32 mask = (0xffffffff >> (32 - len)) ^ 0xffffffff; u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); const u8 *rv = lastMatch(buf, z | mask); diff --git a/src/nfa/truffle_common.h b/src/nfa/truffle_common.h deleted file mode 100644 index dc9c726c..00000000 --- a/src/nfa/truffle_common.h +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef TRUFFLE_COMMON_H_ -#define TRUFFLE_COMMON_H_ - -#include "util/arch.h" -#include "util/bitutils.h" -#include "util/simd_utils.h" - -/* - * Common stuff for all versions of truffle (single, multi and multidouble) - */ -#if !defined(HAVE_AVX2) - -static really_inline -const u8 *firstMatch(const u8 *buf, u32 z) { - if (unlikely(z != 0xffff)) { - u32 pos = ctz32(~z & 0xffff); - assert(pos < 16); - return buf + pos; - } - - return NULL; // no match -} - -static really_inline -u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) { - - m128 highconst = _mm_set1_epi8(0x80); - m128 shuf_mask_hi = _mm_set1_epi64x(0x8040201008040201); - - // and now do the real work - m128 shuf1 = pshufb(shuf_mask_lo_highclear, v); - m128 t1 = xor128(v, highconst); - m128 shuf2 = pshufb(shuf_mask_lo_highset, t1); - m128 t2 = andnot128(highconst, rshift64_m128(v, 4)); - m128 shuf3 = pshufb(shuf_mask_hi, t2); - m128 tmp = and128(or128(shuf1, shuf2), shuf3); - m128 tmp2 = eq128(tmp, zeroes128()); - u32 z = movemask128(tmp2); - - return z; -} - -static -const u8 *truffleMini(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end) { - uintptr_t len = buf_end - buf; - assert(len < 16); - - m128 chars = zeroes128(); - memcpy(&chars, buf, len); - - u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); - // can't be these bytes in z - u32 mask = (0xFFFF >> (16 - len)) ^ 0xFFFF; - const u8 *rv = firstMatch(buf, z| mask); - - if (rv) { - return rv; - } else { - return buf_end; - } -} - -#else - -static really_inline -const u8 *firstMatch(const u8 *buf, u32 z) { - if (unlikely(z != 0xffffffff)) { - u32 pos = ctz32(~z); - assert(pos < 32); - return buf + pos; - } - - return NULL; // no match -} - -static really_inline -u32 block(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, m256 v) { - - m256 highconst = _mm256_set1_epi8(0x80); - m256 shuf_mask_hi = _mm256_set1_epi64x(0x8040201008040201); - - // and now do the real work - m256 shuf1 = vpshufb(shuf_mask_lo_highclear, v); - m256 t1 = xor256(v, highconst); - m256 shuf2 = vpshufb(shuf_mask_lo_highset, t1); - m256 t2 = andnot256(highconst, rshift64_m256(v, 4)); - m256 shuf3 = vpshufb(shuf_mask_hi, t2); - m256 tmp = and256(or256(shuf1, shuf2), shuf3); - m256 tmp2 = eq256(tmp, zeroes256()); - u32 z = movemask256(tmp2); - - return z; -} - -static -const u8 *truffleMini(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end) { - uintptr_t len = buf_end - buf; - assert(len < 32); - - m256 chars = zeroes256(); - memcpy(&chars, buf, len); - - u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); - // can't be these bytes in z - u32 mask = (0xFFFFFFFF >> (32 - len)) ^ 0xFFFFFFFF; - const u8 *rv = firstMatch(buf, z | mask); - - if (rv) { - return rv; - } else { - return buf_end; - } -} - -#endif - -#endif /* TRUFFLE_COMMON_H_ */ diff --git a/src/nfagraph/ng_limex_accel.cpp b/src/nfagraph/ng_limex_accel.cpp index 52f1e7d8..beeb4a69 100644 --- a/src/nfagraph/ng_limex_accel.cpp +++ b/src/nfagraph/ng_limex_accel.cpp @@ -37,7 +37,6 @@ #include "ue2common.h" #include "nfa/accel.h" -#include "nfa/multiaccel_compilehelper.h" #include "util/bitutils.h" // for CASE_CLEAR #include "util/charreach.h" @@ -677,134 +676,6 @@ NFAVertex get_sds_or_proxy(const NGHolder &g) { return g.startDs; } -static -NFAVertex find_next(const NFAVertex v, const NGHolder &g) { - NFAVertex res = NGHolder::null_vertex(); - for (NFAVertex u : adjacent_vertices_range(v, g)) { - if (u != v) { - res = u; - break; - } - } - return res; -} - -/** \brief Check if vertex \a v is a multi accelerable state (for a limex NFA). */ -MultibyteAccelInfo nfaCheckMultiAccel(const NGHolder &g, - const vector &states, - const CompileContext &cc) { - // For a set of states to be accelerable, we basically have to have only - // one state to accelerate. - if (states.size() != 1) { - DEBUG_PRINTF("can't accelerate multiple states\n"); - return MultibyteAccelInfo(); - } - - // Get our base vertex - NFAVertex v = states[0]; - - // We need the base vertex to be a self-looping dotall leading to exactly - // one vertex. - if (!hasSelfLoop(v, g)) { - DEBUG_PRINTF("base vertex has self-loop\n"); - return MultibyteAccelInfo(); - } - - if (!g[v].char_reach.all()) { - DEBUG_PRINTF("can't accelerate anything but dot\n"); - return MultibyteAccelInfo(); - } - - if (proper_out_degree(v, g) != 1) { - DEBUG_PRINTF("can't accelerate states with multiple successors\n"); - return MultibyteAccelInfo(); - } - - // find our start vertex - NFAVertex cur = find_next(v, g); - if (cur == NGHolder::null_vertex()) { - DEBUG_PRINTF("invalid start vertex\n"); - return MultibyteAccelInfo(); - } - - bool has_offset = false; - u32 offset = 0; - CharReach cr = g[cur].char_reach; - - // if we start with a dot, we have an offset, so defer figuring out the - // real CharReach for this accel scheme - if (cr == CharReach::dot()) { - has_offset = true; - offset = 1; - } - - // figure out our offset - while (has_offset) { - // vertices have to have no self loops - if (hasSelfLoop(cur, g)) { - DEBUG_PRINTF("can't have self-loops\n"); - return MultibyteAccelInfo(); - } - - // we have to have exactly 1 successor to have this acceleration scheme - if (out_degree(cur, g) != 1) { - DEBUG_PRINTF("can't have multiple successors\n"); - return MultibyteAccelInfo(); - } - - cur = *adjacent_vertices(cur, g).first; - - // if we met a special vertex, bail out - if (is_special(cur, g)) { - DEBUG_PRINTF("can't have special vertices\n"); - return MultibyteAccelInfo(); - } - - // now, get the real char reach - if (g[cur].char_reach != CharReach::dot()) { - cr = g[cur].char_reach; - has_offset = false; - } else { - offset++; - } - } - - // now, fire up the compilation machinery - target_t ti = cc.target_info; - unsigned max_len = ti.has_avx2() ? MULTIACCEL_MAX_LEN_AVX2 : MULTIACCEL_MAX_LEN_SSE; - MultiaccelCompileHelper mac(cr, offset, max_len); - - while (mac.canAdvance()) { - // vertices have to have no self loops - if (hasSelfLoop(cur, g)) { - break; - } - - // we have to have exactly 1 successor to have this acceleration scheme - if (out_degree(cur, g) != 1) { - break; - } - - cur = *adjacent_vertices(cur, g).first; - - // if we met a special vertex, bail out - if (is_special(cur, g)) { - break; - } - - mac.advance(g[cur].char_reach); - } - MultibyteAccelInfo mai = mac.getBestScheme(); -#ifdef DEBUG - DEBUG_PRINTF("Multibyte acceleration scheme: type: %u offset: %u lengths: %u,%u\n", - mai.type, mai.offset, mai.len1, mai.len2); - for (size_t c = mai.cr.find_first(); c != CharReach::npos; c = mai.cr.find_next(c)) { - DEBUG_PRINTF("multibyte accel char: %zu\n", c); - } -#endif - return mai; -} - /** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */ bool nfaCheckAccel(const NGHolder &g, NFAVertex v, const vector &refined_cr, diff --git a/src/nfagraph/ng_limex_accel.h b/src/nfagraph/ng_limex_accel.h index cb3d1210..f0c98db2 100644 --- a/src/nfagraph/ng_limex_accel.h +++ b/src/nfagraph/ng_limex_accel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -51,9 +51,6 @@ namespace ue2 { #define MAX_MERGED_ACCEL_STOPS 200 #define ACCEL_MAX_STOP_CHAR 24 #define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */ -#define MULTIACCEL_MIN_LEN 3 -#define MULTIACCEL_MAX_LEN_SSE 15 -#define MULTIACCEL_MAX_LEN_AVX2 31 // forward-declaration of CompileContext struct CompileContext; @@ -84,11 +81,6 @@ bool nfaCheckAccel(const NGHolder &g, NFAVertex v, const std::map &br_cyclic, AccelScheme *as, bool allow_wide); -/** \brief Check if vertex \a v is a multi accelerable state (for a limex NFA). - */ -MultibyteAccelInfo nfaCheckMultiAccel(const NGHolder &g, - const std::vector &verts, - const CompileContext &cc); } // namespace ue2 diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index 8b494444..75ee3e65 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -52,8 +52,6 @@ set(unit_internal_SOURCES internal/limex_nfa.cpp internal/masked_move.cpp internal/multi_bit.cpp - internal/multiaccel_matcher.cpp - internal/multiaccel_shift.cpp internal/nfagraph_common.h internal/nfagraph_comp.cpp internal/nfagraph_equivalence.cpp diff --git a/unit/internal/multiaccel_matcher.cpp b/unit/internal/multiaccel_matcher.cpp deleted file mode 100644 index bdf56ff9..00000000 --- a/unit/internal/multiaccel_matcher.cpp +++ /dev/null @@ -1,301 +0,0 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - - -extern "C" { -#include "nfa/accel.h" // wrapping in extern C to make sure run_accel works -} - -#include "config.h" -#include "src/ue2common.h" - -#include "gtest/gtest.h" -#include "nfagraph/ng_limex_accel.h" -#include "nfa/accelcompile.h" -#include "nfa/multivermicelli.h" -#include "nfa/multishufti.h" -#include "nfa/multitruffle.h" -#include "util/alloc.h" -#include "util/charreach.h" - -#include -#include -#include -#include -#include - -using namespace ue2; -using namespace std; -using namespace testing; - -// test parameters structure -struct MultiaccelTestParam { - string match_pattern; - u32 match_pattern_start_idx; - u32 match_idx; - bool test_all_offsets; - u8 match_len1; - u8 match_len2; - MultibyteAccelInfo::multiaccel_type type; -}; - -// buffer size is constant -static const u32 BUF_SIZE = 200; - -// strings, out of which CharReach will be generated -static const string VERM_CR = "a"; -static const string V_NC_CR = "aA"; -static const string SHUF_CR = "abcdefghijklmnopqrstuvwxyz"; -static const string TRUF_CR = "\x11\x22\x33\x44\x55\x66\x77\x88\x99"; - -// Parameterized test case for multiaccel patterns. -class MultiaccelTest : public TestWithParam { -protected: - virtual void SetUp() { - // set up is deferred until the actual test, since we can't compile - // any accel schemes unless we know CharReach - const MultiaccelTestParam &p = GetParam(); - - // reserve space in our buffer - buffer = (u8 *)aligned_zmalloc(BUF_SIZE); - - // store the index where we expect to see the match. note that it may - // be different from where the match pattern has started since we may - // have a flooded match (i.e. a match preceded by almost-match) or a - // no-match (in which case "match" index is at the end of the buffer). - match_idx = p.match_idx; - - // make note if we need to test all offsets - sometimes we don't, for - // example when testing partial or no-match. - test_all_offsets = p.test_all_offsets; - } - - char getChar(const CharReach &cr) { - assert(cr.count() > 0); - auto dist = uniform_int_distribution(0, cr.count() - 1); - size_t result = cr.find_nth(dist(prng)); - assert(result != CharReach::npos); - return (char)result; - } - - // char generator - char getChar(const CharReach &cr, bool match) { - return getChar(match ? cr : ~cr); - } - - // appends a string with matches/unmatches according to input match pattern - void getMatch(u8 *result, u32 start, const string &pattern, - const CharReach &cr) { - for (const auto &c : pattern) { - result[start++] = getChar(cr, c == '1'); - } - } - - // appends non-matching noise of certain lengths - void getNoise(u8 *result, u32 start, u32 len, const CharReach &cr) { - for (unsigned i = 0; i < len; i++) { - result[start + i] = getChar(cr, false); - } - } - - // deferred buffer generation, as we don't know CharReach before we run the test - void GenerateBuffer(const CharReach &cr) { - const MultiaccelTestParam &p = GetParam(); - - // step 1: fill prefix with non-matching noise - u32 start = 0; - getNoise(buffer, start, p.match_pattern_start_idx, cr); - - // step 2: add a match - start += p.match_pattern_start_idx; - getMatch(buffer, start, p.match_pattern, cr); - - // step 3: fill in the rest of the buffer with non-matching noise - start += p.match_pattern.size(); - getNoise(buffer, start, BUF_SIZE - p.match_pattern.size() - - p.match_pattern_start_idx, cr); - } - - // deferred accel scheme generation, as we don't know CharReach before we run the test - void CompileAccelScheme(const CharReach &cr, AccelAux *aux) { - const MultiaccelTestParam &p = GetParam(); - - AccelInfo ai; - ai.single_stops = cr; // dummy CharReach to prevent red tape accel - ai.ma_len1 = p.match_len1; - ai.ma_len2 = p.match_len2; - ai.multiaccel_stops = cr; - ai.ma_type = p.type; - - buildAccelAux(ai, aux); - - // now, verify we've successfully built our accel scheme, *and* that it's - // a multibyte scheme - ASSERT_TRUE(aux->accel_type >= ACCEL_MLVERM && - aux->accel_type <= ACCEL_MDSGTRUFFLE); - } - - virtual void TearDown() { - aligned_free(buffer); - } - - // We want our tests to be deterministic, so we use a PRNG in the test - // fixture. - mt19937 prng; - - u32 match_idx; - u8 *buffer; - bool test_all_offsets; -}; - -static -void runTest(const u8 *buffer, AccelAux *aux, unsigned match_idx, - bool test_all_offsets) { - const u8 *start = buffer; - const u8 *end = start + BUF_SIZE; - const u8 *match = start + match_idx; - - // comparing indexes into the buffer is easier to understand than pointers - if (test_all_offsets) { - // run_accel can only scan >15 byte buffers - u32 end_offset = min(match_idx, BUF_SIZE - 15); - - for (unsigned offset = 0; offset < end_offset; offset++) { - const u8 *ptr = run_accel(aux, (start + offset), end); - unsigned idx = ptr - start; - ASSERT_EQ(match_idx, idx); - } - } else { - const u8 *ptr = run_accel(aux, start, end); - unsigned idx = ptr - start; - ASSERT_EQ(match_idx, idx); - } -} - -TEST_P(MultiaccelTest, TestVermicelli) { - AccelAux aux = {0}; - CharReach cr(VERM_CR); - - GenerateBuffer(cr); - - CompileAccelScheme(cr, &aux); - - runTest(buffer, &aux, match_idx, test_all_offsets); -} - -TEST_P(MultiaccelTest, TestVermicelliNocase) { - AccelAux aux = {0}; - CharReach cr(V_NC_CR); - - GenerateBuffer(cr); - - CompileAccelScheme(cr, &aux); - - runTest(buffer, &aux, match_idx, test_all_offsets); -} - -TEST_P(MultiaccelTest, TestShufti) { - AccelAux aux = {0}; - CharReach cr(SHUF_CR); - - GenerateBuffer(cr); - - CompileAccelScheme(cr, &aux); - - runTest(buffer, &aux, match_idx, test_all_offsets); -} - -TEST_P(MultiaccelTest, TestTruffle) { - AccelAux aux = {0}; - CharReach cr(TRUF_CR); - - GenerateBuffer(cr); - - CompileAccelScheme(cr, &aux); - - runTest(buffer, &aux, match_idx, test_all_offsets); -} - -static const MultiaccelTestParam multiaccelTests[] = { - // long matcher - - // full, partial, flooded, nomatch - {"11111", 180, 180, true, 5, 0, MultibyteAccelInfo::MAT_LONG}, - {"111", 197, 197, true, 5, 0, MultibyteAccelInfo::MAT_LONG}, - {"1111011111", 177, 182, false, 5, 0, MultibyteAccelInfo::MAT_LONG}, - {"1111011110", 177, 200, false, 5, 0, MultibyteAccelInfo::MAT_LONG}, - - // long-grab matcher - - // full, partial, flooded, nomatch - {"111110", 180, 180, true, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB}, - {"111", 197, 197, true, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB}, - {"11111111110", 177, 182, false, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB}, - {"11110111101", 177, 200, false, 5, 0, MultibyteAccelInfo::MAT_LONGGRAB}, - - // shift matcher - - // full, partial, flooded, nomatch - {"11001", 180, 180, true, 4, 0, MultibyteAccelInfo::MAT_SHIFT}, - {"110", 197, 197, true, 4, 0, MultibyteAccelInfo::MAT_SHIFT}, - {"1001011001", 177, 182, false, 4, 0, MultibyteAccelInfo::MAT_SHIFT}, - {"1101001011", 177, 200, false, 4, 0, MultibyteAccelInfo::MAT_SHIFT}, - - // shift-grab matcher - - // full, partial, flooded, nomatch - {"10111", 180, 180, true, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB}, - {"101", 197, 197, true, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB}, - {"1110010111", 177, 182, false, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB}, - {"1100101100", 177, 200, false, 4, 0, MultibyteAccelInfo::MAT_SHIFTGRAB}, - - // doubleshift matcher - - // full, partial (one and two shifts), flooded, nomatch - {"110111", 180, 180, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFT}, - {"110", 197, 197, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFT}, - {"1101", 196, 196, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFT}, - {"1100100101", 178, 182, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFT}, - {"1101001101", 177, 200, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFT}, - - // doubleshift-grab matcher - - // full, partial (one and two shifts), flooded, nomatch - {"100101", 180, 180, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB}, - {"100", 197, 197, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB}, - {"1011", 196, 196, true, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB}, - {"11111101101", 177, 182, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB}, - {"1111110111", 177, 200, false, 3, 2, MultibyteAccelInfo::MAT_DSHIFTGRAB}, -}; - -INSTANTIATE_TEST_CASE_P(Multiaccel, MultiaccelTest, ValuesIn(multiaccelTests)); - -// boring stuff for google test -void PrintTo(const MultiaccelTestParam &p, ::std::ostream *os) { - *os << "MultiaccelTestParam: " << p.match_pattern; -} diff --git a/unit/internal/multiaccel_shift.cpp b/unit/internal/multiaccel_shift.cpp deleted file mode 100644 index d6019870..00000000 --- a/unit/internal/multiaccel_shift.cpp +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "src/ue2common.h" - -#include "gtest/gtest.h" -#include "nfa/multiaccel_common.h" - -/* - * Unit tests for the shifters. - * - * This is a bit messy, as shifters are macros, so we're using macros to test - * other macros. - */ - -#define TEST_SHIFT(n) \ - do { \ - u64a val = ((u64a) 1 << n) - 1; \ - JOIN(SHIFT, n)(val); \ - ASSERT_EQ(val, 1); \ - } while (0) - -TEST(MultiaccelShift, StaticShift) { - TEST_SHIFT(1); - TEST_SHIFT(2); - TEST_SHIFT(3); - TEST_SHIFT(4); - TEST_SHIFT(5); - TEST_SHIFT(6); - TEST_SHIFT(7); - TEST_SHIFT(8); - TEST_SHIFT(10); - TEST_SHIFT(11); - TEST_SHIFT(12); - TEST_SHIFT(13); - TEST_SHIFT(14); - TEST_SHIFT(15); - TEST_SHIFT(16); - TEST_SHIFT(17); - TEST_SHIFT(18); - TEST_SHIFT(19); - TEST_SHIFT(20); - TEST_SHIFT(21); - TEST_SHIFT(22); - TEST_SHIFT(23); - TEST_SHIFT(24); - TEST_SHIFT(25); - TEST_SHIFT(26); - TEST_SHIFT(27); - TEST_SHIFT(28); - TEST_SHIFT(29); - TEST_SHIFT(30); - TEST_SHIFT(31); - TEST_SHIFT(32); -} From a2dc430a38707294710c009415e39552106a72aa Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 27 Sep 2016 15:56:40 +1000 Subject: [PATCH 295/326] Header guards --- cmake/config.h.in | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cmake/config.h.in b/cmake/config.h.in index c7b577c2..5434668e 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -1,5 +1,8 @@ /* used by cmake */ +#ifndef CONFIG_H_ +#define CONFIG_H_ + /* "Define if the build is 32 bit" */ #cmakedefine ARCH_32_BIT @@ -91,3 +94,5 @@ /* define if reverse_graph requires patch for boost 1.62.0 */ #cmakedefine BOOST_REVGRAPH_PATCH + +#endif /* CONFIG_H_ */ From 9acda484feab22bf02eaa152b0f350a889bc23fe Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 28 Mar 2017 11:15:55 +1100 Subject: [PATCH 296/326] debug: use before set --- src/nfa/mcsheng.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/nfa/mcsheng.c b/src/nfa/mcsheng.c index 322cde0a..a5ba2151 100644 --- a/src/nfa/mcsheng.c +++ b/src/nfa/mcsheng.c @@ -177,15 +177,15 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end, m->sheng_accel_limit, sheng_stop_limit); #endif -#define SHENG_SINGLE_ITER do { \ - m128 shuffle_mask = masks[*(c++)]; \ - s = pshufb(shuffle_mask, s); \ - u32 s_gpr_x4 = movd(s); /* convert to u8 */ \ - DEBUG_PRINTF("c %hhu (%c) --> s %hhu\n", c[-1], c[-1], s_gpr); \ - if (s_gpr_x4 >= sheng_stop_limit_x4) { \ - s_gpr = s_gpr_x4; \ - goto exit; \ - } \ +#define SHENG_SINGLE_ITER do { \ + m128 shuffle_mask = masks[*(c++)]; \ + s = pshufb(shuffle_mask, s); \ + u32 s_gpr_x4 = movd(s); /* convert to u8 */ \ + DEBUG_PRINTF("c %hhu (%c) --> s %hhu\n", c[-1], c[-1], s_gpr_x4); \ + if (s_gpr_x4 >= sheng_stop_limit_x4) { \ + s_gpr = s_gpr_x4; \ + goto exit; \ + } \ } while (0) u8 s_gpr; From 85358e0ad08ecdc8a5aad116d33ee9bfcbcb616a Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 11 Apr 2017 14:27:59 +1000 Subject: [PATCH 297/326] hsbench: output max throughput result --- tools/hsbench/main.cpp | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tools/hsbench/main.cpp b/tools/hsbench/main.cpp index 9eadf6dd..b5506af3 100644 --- a/tools/hsbench/main.cpp +++ b/tools/hsbench/main.cpp @@ -597,6 +597,17 @@ void displayPerScanResults(const vector> &threads, printf("\n"); } +static +double fastestResult(const vector> &threads) { + double best = threads[0]->results[0].seconds; + for (const auto &t : threads) { + for (const auto &r : t->results) { + best = min(best, r.seconds); + } + } + return best; +} + static u64a byte_size(const vector &corpus_blocks) { u64a total = 0; @@ -650,8 +661,12 @@ void displayResults(const vector> &threads, double blockRate = (double)totalBlocks / (double)totalSecs; printf("Overall block rate: %'0.2f blocks/sec\n", blockRate); - printf("Overall throughput: %'0.2Lf Mbit/sec\n", + printf("Mean throughput: %'0.2Lf Mbit/sec\n", calc_mbps(totalSecs, totalBytes)); + + double lowestScanTime = fastestResult(threads); + printf("Maximum throughput: %'0.2Lf Mbit/sec\n", + calc_mbps(lowestScanTime, bytesPerRun)); printf("\n"); if (display_per_scan) { From fedd48489fdb22b46a301e0ab4df2187ef1eb3fb Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 9 May 2017 11:20:47 +1000 Subject: [PATCH 298/326] Allow the full cpuid flags for fat runtimes --- src/util/cpuid_flags.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/cpuid_flags.c b/src/util/cpuid_flags.c index 8ac0e63c..e9bdf690 100644 --- a/src/util/cpuid_flags.c +++ b/src/util/cpuid_flags.c @@ -132,7 +132,7 @@ u64a cpuid_flags(void) { cap |= HS_CPU_FEATURES_AVX2; } -#if !defined(HAVE_AVX2) +#if !defined(FAT_RUNTIME) && !defined(HAVE_AVX2) cap &= ~HS_CPU_FEATURES_AVX2; #endif From 8a56d16d576c4796e30f7b09b1c1a22ec6e2f8b3 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 20 Jul 2016 11:31:34 +1000 Subject: [PATCH 299/326] avx512: add basic functions to simd_utils Extends the m512 type to use avx512 and also changes required for limex. --- cmake/arch.cmake | 19 ++++ cmake/config.h.in | 3 + src/nfa/limex_accel.c | 14 +-- src/nfa/limex_shuffle.h | 13 +++ src/nfa/nfa_build_util.cpp | 4 +- src/util/simd_types.h | 7 +- src/util/simd_utils.c | 4 +- src/util/simd_utils.h | 192 +++++++++++++++++++++++++++++------ src/util/state_compress.c | 28 +++-- unit/internal/shuffle.cpp | 25 ++++- unit/internal/simd_utils.cpp | 2 +- 11 files changed, 258 insertions(+), 53 deletions(-) diff --git a/cmake/arch.cmake b/cmake/arch.cmake index e98fbf22..69902f57 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -31,5 +31,24 @@ int main(){ (void)_mm256_xor_si256(z, z); }" HAVE_AVX2) +if (NOT HAVE_AVX2) + message(STATUS "Building without AVX2 support") +endif () + +# and now for AVX512 +CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}> +#if !defined(__AVX512BW__) +#error no avx512bw +#endif + +int main(){ + __m512i z = _mm512_setzero_si512(); + (void)_mm512_abs_epi8(z); +}" HAVE_AVX512) + +if (NOT HAVE_AVX512) + message(STATUS "Building without AVX512 support") +endif () + unset (CMAKE_REQUIRED_FLAGS) unset (INTRIN_INC_H) diff --git a/cmake/config.h.in b/cmake/config.h.in index 5434668e..6e23f493 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -15,6 +15,9 @@ /* "Define if building for EM64T" */ #cmakedefine ARCH_X86_64 +/* Define if AVX-512BW available */ +#cmakedefine HAVE_AVX512 + /* internal build, switch on dump support. */ #cmakedefine DUMP_SUPPORT diff --git a/src/nfa/limex_accel.c b/src/nfa/limex_accel.c index c34216f3..4834b6a5 100644 --- a/src/nfa/limex_accel.c +++ b/src/nfa/limex_accel.c @@ -151,18 +151,20 @@ size_t doAccel512(const m512 *state, const struct LimExNFA512 *limex, DEBUG_PRINTF("using PSHUFB for 512-bit shuffle\n"); m512 accelPerm = limex->accelPermute; m512 accelComp = limex->accelCompare; -#if !defined(HAVE_AVX2) +#if defined(HAVE_AVX512) + idx = packedExtract512(s, accelPerm, accelComp); +#elif defined(HAVE_AVX2) + u32 idx1 = packedExtract256(s.lo, accelPerm.lo, accelComp.lo); + u32 idx2 = packedExtract256(s.hi, accelPerm.hi, accelComp.hi); + assert((idx1 & idx2) == 0); // should be no shared bits + idx = idx1 | idx2; +#else u32 idx1 = packedExtract128(s.lo.lo, accelPerm.lo.lo, accelComp.lo.lo); u32 idx2 = packedExtract128(s.lo.hi, accelPerm.lo.hi, accelComp.lo.hi); u32 idx3 = packedExtract128(s.hi.lo, accelPerm.hi.lo, accelComp.hi.lo); u32 idx4 = packedExtract128(s.hi.hi, accelPerm.hi.hi, accelComp.hi.hi); assert((idx1 & idx2 & idx3 & idx4) == 0); // should be no shared bits idx = idx1 | idx2 | idx3 | idx4; -#else - u32 idx1 = packedExtract256(s.lo, accelPerm.lo, accelComp.lo); - u32 idx2 = packedExtract256(s.hi, accelPerm.hi, accelComp.hi); - assert((idx1 & idx2) == 0); // should be no shared bits - idx = idx1 | idx2; #endif return accelScanWrapper(accelTable, aux, input, idx, i, end); } diff --git a/src/nfa/limex_shuffle.h b/src/nfa/limex_shuffle.h index 5d9b3ef8..4c142a34 100644 --- a/src/nfa/limex_shuffle.h +++ b/src/nfa/limex_shuffle.h @@ -62,4 +62,17 @@ u32 packedExtract256(m256 s, const m256 permute, const m256 compare) { } #endif // AVX2 +#if defined(HAVE_AVX512) +static really_inline +u32 packedExtract512(m512 s, const m512 permute, const m512 compare) { + // vpshufb doesn't cross lanes, so this is a bit of a cheat + m512 shuffled = pshufb_m512(s, permute); + m512 compared = and512(shuffled, compare); + u64a rv = ~eq512mask(compared, shuffled); + // stitch the lane-wise results back together + rv = rv >> 32 | rv; + return (u32)(((rv >> 16) | rv) & 0xffffU); +} +#endif // AVX512 + #endif // LIMEX_SHUFFLE_H diff --git a/src/nfa/nfa_build_util.cpp b/src/nfa/nfa_build_util.cpp index 3103cd29..9185ccdd 100644 --- a/src/nfa/nfa_build_util.cpp +++ b/src/nfa/nfa_build_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -401,7 +401,7 @@ const char *NFATraits::name = "Sheng"; template<> struct NFATraits { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; - static const u32 stateAlign = 32; + static const u32 stateAlign = 64; static const bool fast = true; static const nfa_dispatch_fn has_accel; static const nfa_dispatch_fn has_repeats; diff --git a/src/util/simd_types.h b/src/util/simd_types.h index 64844dcb..962cad6c 100644 --- a/src/util/simd_types.h +++ b/src/util/simd_types.h @@ -46,9 +46,12 @@ typedef __m256i m256; typedef struct ALIGN_AVX_DIRECTIVE {m128 lo; m128 hi;} m256; #endif -// these should align to 16 and 32 respectively typedef struct {m128 lo; m128 mid; m128 hi;} m384; -typedef struct {m256 lo; m256 hi;} m512; +#if defined(HAVE_AVX512) +typedef __m512i m512; +#else +typedef struct ALIGN_ATTR(64) {m256 lo; m256 hi;} m512; +#endif #endif /* SIMD_TYPES_H */ diff --git a/src/util/simd_utils.c b/src/util/simd_utils.c index 54b5b4ba..25a81412 100644 --- a/src/util/simd_utils.c +++ b/src/util/simd_utils.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -49,6 +49,7 @@ ALIGN_CL_DIRECTIVE const char vbs_mask_data[] = { /** \brief LUT for the mask1bit functions. */ ALIGN_CL_DIRECTIVE const u8 simd_onebit_masks[] = { + ZEROES_32, ZEROES_32, ZEROES_31, 0x01, ZEROES_32, ZEROES_31, 0x02, ZEROES_32, ZEROES_31, 0x04, ZEROES_32, @@ -57,4 +58,5 @@ ALIGN_CL_DIRECTIVE const u8 simd_onebit_masks[] = { ZEROES_31, 0x20, ZEROES_32, ZEROES_31, 0x40, ZEROES_32, ZEROES_31, 0x80, ZEROES_32, + ZEROES_32, ZEROES_32, }; diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index 1f884843..5f4fe921 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -240,7 +240,7 @@ extern const u8 simd_onebit_masks[]; static really_inline m128 mask1bit128(unsigned int n) { assert(n < sizeof(m128) * 8); - u32 mask_idx = ((n % 8) * 64) + 31; + u32 mask_idx = ((n % 8) * 64) + 95; mask_idx -= n / 8; return loadu128(&simd_onebit_masks[mask_idx]); } @@ -290,6 +290,18 @@ m256 vpshufb(m256 a, m256 b) { #endif } +#if defined(HAVE_AVX512) +static really_inline +m512 pshufb_m512(m512 a, m512 b) { + return _mm512_shuffle_epi8(a, b); +} + +static really_inline +m512 maskz_pshufb_m512(__mmask64 k, m512 a, m512 b) { + return _mm512_maskz_shuffle_epi8(k, a, b); +} +#endif + static really_inline m128 variable_byte_shift_m128(m128 in, s32 amount) { assert(amount >= -16 && amount <= 16); @@ -592,7 +604,7 @@ m256 loadbytes256(const void *ptr, unsigned int n) { static really_inline m256 mask1bit256(unsigned int n) { assert(n < sizeof(m256) * 8); - u32 mask_idx = ((n % 8) * 64) + 31; + u32 mask_idx = ((n % 8) * 64) + 95; mask_idx -= n / 8; return loadu256(&simd_onebit_masks[mask_idx]); } @@ -902,41 +914,110 @@ char testbit384(m384 val, unsigned int n) { **** 512-bit Primitives ****/ -static really_inline m512 and512(m512 a, m512 b) { +#define eq512mask(a, b) _mm512_cmpeq_epi8_mask((a), (b)) +#define masked_eq512mask(k, a, b) _mm512_mask_cmpeq_epi8_mask((k), (a), (b)) + +static really_inline +m512 zeroes512(void) { +#if defined(HAVE_AVX512) + return _mm512_setzero_si512(); +#else + m512 rv = {zeroes256(), zeroes256()}; + return rv; +#endif +} + +static really_inline +m512 ones512(void) { +#if defined(HAVE_AVX512) + return _mm512_set1_epi8(0xFF); + //return _mm512_xor_si512(_mm512_setzero_si512(), _mm512_setzero_si512()); +#else + m512 rv = {ones256(), ones256()}; + return rv; +#endif +} + +#if defined(HAVE_AVX512) +static really_inline +m512 set64x8(u8 a) { + return _mm512_set1_epi8(a); +} + +static really_inline +m512 set8x64(u64a a) { + return _mm512_set1_epi64(a); +} + +static really_inline +m512 set4x128(m128 a) { + return _mm512_broadcast_i32x4(a); +} +#endif + +static really_inline +m512 and512(m512 a, m512 b) { +#if defined(HAVE_AVX512) + return _mm512_and_si512(a, b); +#else m512 rv; rv.lo = and256(a.lo, b.lo); rv.hi = and256(a.hi, b.hi); return rv; +#endif } -static really_inline m512 or512(m512 a, m512 b) { +static really_inline +m512 or512(m512 a, m512 b) { +#if defined(HAVE_AVX512) + return _mm512_or_si512(a, b); +#else m512 rv; rv.lo = or256(a.lo, b.lo); rv.hi = or256(a.hi, b.hi); return rv; +#endif } -static really_inline m512 xor512(m512 a, m512 b) { +static really_inline +m512 xor512(m512 a, m512 b) { +#if defined(HAVE_AVX512) + return _mm512_xor_si512(a, b); +#else m512 rv; rv.lo = xor256(a.lo, b.lo); rv.hi = xor256(a.hi, b.hi); return rv; +#endif } -static really_inline m512 not512(m512 a) { +static really_inline +m512 not512(m512 a) { +#if defined(HAVE_AVX512) + return _mm512_xor_si512(a, ones512()); +#else m512 rv; rv.lo = not256(a.lo); rv.hi = not256(a.hi); return rv; +#endif } -static really_inline m512 andnot512(m512 a, m512 b) { +static really_inline +m512 andnot512(m512 a, m512 b) { +#if defined(HAVE_AVX512) + return _mm512_andnot_si512(a, b); +#else m512 rv; rv.lo = andnot256(a.lo, b.lo); rv.hi = andnot256(a.hi, b.hi); return rv; +#endif } +#if defined(HAVE_AVX512) +#define lshift64_m512(a, b) _mm512_slli_epi64((a), b) +#else // The shift amount is an immediate static really_really_inline m512 lshift64_m512(m512 a, unsigned b) { @@ -945,29 +1026,37 @@ m512 lshift64_m512(m512 a, unsigned b) { rv.hi = lshift64_m256(a.hi, b); return rv; } +#endif -static really_inline m512 zeroes512(void) { - m512 rv = {zeroes256(), zeroes256()}; - return rv; -} +#if defined(HAVE_AVX512) +#define rshift64_m512(a, b) _mm512_srli_epi64((a), (b)) +#define rshift128_m512(a, count_immed) _mm512_bsrli_epi128(a, count_immed) +#endif -static really_inline m512 ones512(void) { - m512 rv = {ones256(), ones256()}; - return rv; -} +#if !defined(_MM_CMPINT_NE) +#define _MM_CMPINT_NE 0x4 +#endif -static really_inline int diff512(m512 a, m512 b) { +static really_inline +int diff512(m512 a, m512 b) { +#if defined(HAVE_AVX512) + return !!_mm512_cmp_epi8_mask(a, b, _MM_CMPINT_NE); +#else return diff256(a.lo, b.lo) || diff256(a.hi, b.hi); +#endif } -static really_inline int isnonzero512(m512 a) { -#if !defined(HAVE_AVX2) +static really_inline +int isnonzero512(m512 a) { +#if defined(HAVE_AVX512) + return diff512(a, zeroes512()); +#elif defined(HAVE_AVX2) + m256 x = or256(a.lo, a.hi); + return !!diff256(x, zeroes256()); +#else m128 x = or128(a.lo.lo, a.lo.hi); m128 y = or128(a.hi.lo, a.hi.hi); return isnonzero128(or128(x, y)); -#else - m256 x = or256(a.lo, a.hi); - return !!diff256(x, zeroes256()); #endif } @@ -975,8 +1064,11 @@ static really_inline int isnonzero512(m512 a) { * "Rich" version of diff512(). Takes two vectors a and b and returns a 16-bit * mask indicating which 32-bit words contain differences. */ -static really_inline u32 diffrich512(m512 a, m512 b) { -#if defined(HAVE_AVX2) +static really_inline +u32 diffrich512(m512 a, m512 b) { +#if defined(HAVE_AVX512) + return _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_NE); +#elif defined(HAVE_AVX2) return diffrich256(a.lo, b.lo) | (diffrich256(a.hi, b.hi) << 8); #else a.lo.lo = _mm_cmpeq_epi32(a.lo.lo, b.lo.lo); @@ -993,22 +1085,32 @@ static really_inline u32 diffrich512(m512 a, m512 b) { * "Rich" version of diffrich(), 64-bit variant. Takes two vectors a and b and * returns a 16-bit mask indicating which 64-bit words contain differences. */ -static really_inline u32 diffrich64_512(m512 a, m512 b) { +static really_inline +u32 diffrich64_512(m512 a, m512 b) { + //TODO: cmp_epi64? u32 d = diffrich512(a, b); return (d | (d >> 1)) & 0x55555555; } // aligned load -static really_inline m512 load512(const void *ptr) { +static really_inline +m512 load512(const void *ptr) { +#if defined(HAVE_AVX512) + return _mm512_load_si512(ptr); +#else assert(ISALIGNED_N(ptr, alignof(m256))); m512 rv = { load256(ptr), load256((const char *)ptr + 32) }; return rv; +#endif } // aligned store -static really_inline void store512(void *ptr, m512 a) { - assert(ISALIGNED_N(ptr, alignof(m256))); -#if defined(HAVE_AVX2) +static really_inline +void store512(void *ptr, m512 a) { + assert(ISALIGNED_N(ptr, alignof(m512))); +#if defined(HAVE_AVX512) + return _mm512_store_si512(ptr, a); +#elif defined(HAVE_AVX2) m512 *x = (m512 *)ptr; store256(&x->lo, a.lo); store256(&x->hi, a.hi); @@ -1019,11 +1121,28 @@ static really_inline void store512(void *ptr, m512 a) { } // unaligned load -static really_inline m512 loadu512(const void *ptr) { +static really_inline +m512 loadu512(const void *ptr) { +#if defined(HAVE_AVX512) + return _mm512_loadu_si512(ptr); +#else m512 rv = { loadu256(ptr), loadu256((const char *)ptr + 32) }; return rv; +#endif } +#if defined(HAVE_AVX512) +static really_inline +m512 loadu_maskz_m512(__mmask64 k, const void *ptr) { + return _mm512_maskz_loadu_epi8(k, ptr); +} + +static really_inline +m512 loadu_mask_m512(m512 src, __mmask64 k, const void *ptr) { + return _mm512_mask_loadu_epi8(src, k, ptr); +} +#endif + // packed unaligned store of first N bytes static really_inline void storebytes512(void *ptr, m512 a, unsigned int n) { @@ -1040,6 +1159,14 @@ m512 loadbytes512(const void *ptr, unsigned int n) { return a; } +static really_inline +m512 mask1bit512(unsigned int n) { + assert(n < sizeof(m512) * 8); + u32 mask_idx = ((n % 8) * 64) + 95; + mask_idx -= n / 8; + return loadu512(&simd_onebit_masks[mask_idx]); +} + // switches on bit N in the given vector. static really_inline void setbit512(m512 *ptr, unsigned int n) { @@ -1056,6 +1183,8 @@ void setbit512(m512 *ptr, unsigned int n) { sub = &ptr->hi.hi; } setbit128(sub, n % 128); +#elif defined(HAVE_AVX512) + *ptr = or512(mask1bit512(n), *ptr); #else m256 *sub; if (n < 256) { @@ -1084,6 +1213,8 @@ void clearbit512(m512 *ptr, unsigned int n) { sub = &ptr->hi.hi; } clearbit128(sub, n % 128); +#elif defined(HAVE_AVX512) + *ptr = andnot512(mask1bit512(n), *ptr); #else m256 *sub; if (n < 256) { @@ -1112,6 +1243,9 @@ char testbit512(m512 val, unsigned int n) { sub = val.hi.hi; } return testbit128(sub, n % 128); +#elif defined(HAVE_AVX512) + const m512 mask = mask1bit512(n); + return !!_mm512_test_epi8_mask(mask, val); #else m256 sub; if (n < 256) { diff --git a/src/util/state_compress.c b/src/util/state_compress.c index 87e62429..7238849e 100644 --- a/src/util/state_compress.c +++ b/src/util/state_compress.c @@ -547,16 +547,21 @@ m512 loadcompressed512_32bit(const void *ptr, m512 mvec) { expand32(v[14], m[14]), expand32(v[15], m[15]) }; m512 xvec; -#if !defined(HAVE_AVX2) - xvec.lo.lo = _mm_set_epi32(x[3], x[2], x[1], x[0]); - xvec.lo.hi = _mm_set_epi32(x[7], x[6], x[5], x[4]); - xvec.hi.lo = _mm_set_epi32(x[11], x[10], x[9], x[8]); - xvec.hi.hi = _mm_set_epi32(x[15], x[14], x[13], x[12]); -#else +#if defined(HAVE_AVX512) + xvec = _mm512_set_epi32(x[15], x[14], x[13], x[12], + x[11], x[10], x[9], x[8], + x[7], x[6], x[5], x[4], + x[3], x[2], x[1], x[0]); +#elif defined(HAVE_AVX2) xvec.lo = _mm256_set_epi32(x[7], x[6], x[5], x[4], x[3], x[2], x[1], x[0]); xvec.hi = _mm256_set_epi32(x[15], x[14], x[13], x[12], x[11], x[10], x[9], x[8]); +#else + xvec.lo.lo = _mm_set_epi32(x[3], x[2], x[1], x[0]); + xvec.lo.hi = _mm_set_epi32(x[7], x[6], x[5], x[4]); + xvec.hi.lo = _mm_set_epi32(x[11], x[10], x[9], x[8]); + xvec.hi.hi = _mm_set_epi32(x[15], x[14], x[13], x[12]); #endif return xvec; } @@ -582,14 +587,17 @@ m512 loadcompressed512_64bit(const void *ptr, m512 mvec) { expand64(v[4], m[4]), expand64(v[5], m[5]), expand64(v[6], m[6]), expand64(v[7], m[7]) }; -#if !defined(HAVE_AVX2) +#if defined(HAVE_AVX512) + m512 xvec = _mm512_set_epi64(x[7], x[6], x[5], x[4], + x[3], x[2], x[1], x[0]); +#elif defined(HAVE_AVX2) + m512 xvec = { .lo = _mm256_set_epi64x(x[3], x[2], x[1], x[0]), + .hi = _mm256_set_epi64x(x[7], x[6], x[5], x[4])}; +#else m512 xvec = { .lo = { _mm_set_epi64x(x[1], x[0]), _mm_set_epi64x(x[3], x[2]) }, .hi = { _mm_set_epi64x(x[5], x[4]), _mm_set_epi64x(x[7], x[6]) } }; -#else - m512 xvec = { .lo = _mm256_set_epi64x(x[3], x[2], x[1], x[0]), - .hi = _mm256_set_epi64x(x[7], x[6], x[5], x[4])}; #endif return xvec; } diff --git a/unit/internal/shuffle.cpp b/unit/internal/shuffle.cpp index fcf337f2..b2316bab 100644 --- a/unit/internal/shuffle.cpp +++ b/unit/internal/shuffle.cpp @@ -165,14 +165,15 @@ TEST(Shuffle, PackedExtract64_3) { template static void build_pshufb_masks_onebit(unsigned int bit, T *permute, T *compare) { - static_assert(sizeof(T) == sizeof(m128) || sizeof(T) == sizeof(m256), + static_assert(sizeof(T) == sizeof(m128) || sizeof(T) == sizeof(m256) || + sizeof(T) == sizeof(m512), "should be valid type"); // permute mask has 0x80 in all bytes except the one we care about memset(permute, 0x80, sizeof(*permute)); memset(compare, 0, sizeof(*compare)); char *pmsk = (char *)permute; char *cmsk = (char *)compare; - u8 off = (bit >= 128) ? 0x10 : 0; + u8 off = (bit >= 128) ? (bit >= 256) ? (bit >= 384) ? 0x30 : 0x20 : 0x10 : 0; pmsk[off] = bit/8; cmsk[off] = ~(1 << (bit % 8)); } @@ -214,4 +215,24 @@ TEST(Shuffle, PackedExtract256_1) { } } #endif + +#if defined(HAVE_AVX512) +TEST(Shuffle, PackedExtract512_1) { + // Try all possible one-bit masks + for (unsigned int i = 0; i < 512; i++) { + // shuffle a single 1 bit to the front + m512 permute, compare; + build_pshufb_masks_onebit(i, &permute, &compare); + EXPECT_EQ(1U, packedExtract512(setbit(i), permute, compare)); + EXPECT_EQ(1U, packedExtract512(ones512(), permute, compare)); + // we should get zero out of these cases + EXPECT_EQ(0U, packedExtract512(zeroes512(), permute, compare)); + EXPECT_EQ(0U, packedExtract512(not512(setbit(i)), permute, compare)); + // we should get zero out of all the other bit positions + for (unsigned int j = 0; (j != i && j < 512); j++) { + EXPECT_EQ(0U, packedExtract512(setbit(j), permute, compare)); + } + } +} +#endif } // namespace diff --git a/unit/internal/simd_utils.cpp b/unit/internal/simd_utils.cpp index dac3722e..0d3926d6 100644 --- a/unit/internal/simd_utils.cpp +++ b/unit/internal/simd_utils.cpp @@ -590,7 +590,7 @@ TEST(SimdUtilsTest, alignment) { ASSERT_EQ(16, alignof(m128)); ASSERT_EQ(32, alignof(m256)); ASSERT_EQ(16, alignof(m384)); - ASSERT_EQ(32, alignof(m512)); + ASSERT_EQ(64, alignof(m512)); } TEST(SimdUtilsTest, movq) { From 91db20d8eb548dac8ca76e0aaa9ebc858323c7a7 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 4 Oct 2016 11:18:10 +1100 Subject: [PATCH 300/326] avx512: CPU detection and platform hints --- src/compiler/compiler.cpp | 3 ++ src/database.c | 11 +++-- src/database.h | 11 +++++ src/hs.cpp | 5 ++- src/hs_compile.h | 32 +++++++++++++ src/util/arch.h | 4 ++ src/util/cpuid_flags.c | 92 +++++++++++++++++++++++++++++++++----- src/util/cpuid_flags.h | 8 ++-- src/util/target_info.cpp | 14 ++++-- src/util/target_info.h | 4 +- unit/hyperscan/single.cpp | 3 +- unit/internal/database.cpp | 4 ++ 12 files changed, 164 insertions(+), 27 deletions(-) diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index 9b726f77..cce89e40 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -313,6 +313,9 @@ platform_t target_to_platform(const target_t &target_info) { if (!target_info.has_avx2()) { p |= HS_PLATFORM_NOAVX2; } + if (!target_info.has_avx512()) { + p |= HS_PLATFORM_NOAVX512; + } return p; } diff --git a/src/database.c b/src/database.c index 61eb021f..c6878d89 100644 --- a/src/database.c +++ b/src/database.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -114,7 +114,8 @@ hs_error_t hs_serialize_database(const hs_database_t *db, char **bytes, static hs_error_t db_check_platform(const u64a p) { if (p != hs_current_platform - && p != hs_current_platform_no_avx2) { + && p != hs_current_platform_no_avx2 + && p != hs_current_platform_no_avx512) { return HS_DB_PLATFORM_ERROR; } // passed all checks @@ -366,7 +367,9 @@ hs_error_t print_database_string(char **s, u32 version, const platform_t plat, u8 minor = (version >> 16) & 0xff; u8 major = (version >> 24) & 0xff; - const char *avx2 = (plat & HS_PLATFORM_NOAVX2) ? "NOAVX2" : " AVX2"; + const char *features = (plat & HS_PLATFORM_NOAVX512) + ? (plat & HS_PLATFORM_NOAVX2) ? "" : "AVX2" + : "AVX512"; const char *mode = NULL; @@ -395,7 +398,7 @@ hs_error_t print_database_string(char **s, u32 version, const platform_t plat, // that don't have snprintf but have a workalike. int p_len = SNPRINTF_COMPAT( buf, len, "Version: %u.%u.%u Features: %s Mode: %s", - major, minor, release, avx2, mode); + major, minor, release, features, mode); if (p_len < 0) { DEBUG_PRINTF("snprintf output error, returned %d\n", p_len); hs_misc_free(buf); diff --git a/src/database.h b/src/database.h index 9b24abd4..5715ed67 100644 --- a/src/database.h +++ b/src/database.h @@ -54,6 +54,7 @@ extern "C" #define HS_PLATFORM_CPU_MASK 0x3F #define HS_PLATFORM_NOAVX2 (4<<13) +#define HS_PLATFORM_NOAVX512 (8<<13) /** \brief Platform features bitmask. */ typedef u64a platform_t; @@ -62,6 +63,9 @@ static UNUSED const platform_t hs_current_platform = { #if !defined(HAVE_AVX2) HS_PLATFORM_NOAVX2 | +#endif +#if !defined(HAVE_AVX512) + HS_PLATFORM_NOAVX512 | #endif 0, }; @@ -69,6 +73,13 @@ const platform_t hs_current_platform = { static UNUSED const platform_t hs_current_platform_no_avx2 = { HS_PLATFORM_NOAVX2 | + HS_PLATFORM_NOAVX512 | + 0, +}; + +static UNUSED +const platform_t hs_current_platform_no_avx512 = { + HS_PLATFORM_NOAVX512 | 0, }; diff --git a/src/hs.cpp b/src/hs.cpp index b9d3b356..af1c3c6a 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -120,8 +120,9 @@ bool checkMode(unsigned int mode, hs_compile_error **comp_error) { static bool checkPlatform(const hs_platform_info *p, hs_compile_error **comp_error) { -#define HS_TUNE_LAST HS_TUNE_FAMILY_BDW -#define HS_CPU_FEATURES_ALL (HS_CPU_FEATURES_AVX2) + static constexpr u32 HS_TUNE_LAST = HS_TUNE_FAMILY_GLM; + static constexpr u32 HS_CPU_FEATURES_ALL = + HS_CPU_FEATURES_AVX2 | HS_CPU_FEATURES_AVX512; if (!p) { return true; diff --git a/src/hs_compile.h b/src/hs_compile.h index 1e2e0219..0b64e4b3 100644 --- a/src/hs_compile.h +++ b/src/hs_compile.h @@ -780,6 +780,14 @@ hs_error_t hs_populate_platform(hs_platform_info_t *platform); */ #define HS_CPU_FEATURES_AVX2 (1ULL << 2) +/** + * CPU features flag - Intel(R) Advanced Vector Extensions 512 (Intel(R) AVX512) + * + * Setting this flag indicates that the target platform supports AVX512 + * instructions, specifically AVX-512BW. Using AVX512 implies the use of AVX2. + */ +#define HS_CPU_FEATURES_AVX512 (1ULL << 3) + /** @} */ /** @@ -836,6 +844,30 @@ hs_error_t hs_populate_platform(hs_platform_info_t *platform); */ #define HS_TUNE_FAMILY_BDW 5 +/** + * Tuning Parameter - Intel(R) microarchitecture code name Skylake + * + * This indicates that the compiled database should be tuned for the + * Skylake microarchitecture. + */ +#define HS_TUNE_FAMILY_SKL 6 + +/** + * Tuning Parameter - Intel(R) microarchitecture code name Skylake Server + * + * This indicates that the compiled database should be tuned for the + * Skylake Server microarchitecture. + */ +#define HS_TUNE_FAMILY_SKX 7 + +/** + * Tuning Parameter - Intel(R) microarchitecture code name Goldmont + * + * This indicates that the compiled database should be tuned for the + * Goldmont microarchitecture. + */ +#define HS_TUNE_FAMILY_GLM 8 + /** @} */ /** diff --git a/src/util/arch.h b/src/util/arch.h index 2ed1793a..c78ee9ce 100644 --- a/src/util/arch.h +++ b/src/util/arch.h @@ -53,6 +53,10 @@ #define HAVE_AVX2 #endif +#if defined(__AVX512BW__) +#define HAVE_AVX512 +#endif + /* * ICC and MSVC don't break out POPCNT or BMI/2 as separate pre-def macros */ diff --git a/src/util/cpuid_flags.c b/src/util/cpuid_flags.c index e9bdf690..d4eaa319 100644 --- a/src/util/cpuid_flags.c +++ b/src/util/cpuid_flags.c @@ -56,9 +56,18 @@ #define AVX2 (1 << 5) #define BMI2 (1 << 8) +// Structured Extended Feature Flags Enumeration Leaf EBX values +#define AVX512F (1 << 16) +#define AVX512BW (1 << 30) + // Extended Control Register 0 (XCR0) values #define XCR0_SSE (1 << 1) #define XCR0_AVX (1 << 2) +#define XCR0_OPMASK (1 << 5) // k-regs +#define XCR0_ZMM_Hi256 (1 << 6) // upper 256 bits of ZMM0-ZMM15 +#define XCR0_Hi16_ZMM (1 << 7) // ZMM16-ZMM31 + +#define XCR0_AVX512 (XCR0_OPMASK | XCR0_ZMM_Hi256 | XCR0_Hi16_ZMM) static __inline void cpuid(unsigned int op, unsigned int leaf, unsigned int *eax, @@ -124,6 +133,49 @@ int check_avx2(void) { #endif } +static +int check_avx512(void) { + /* + * For our purposes, having avx512 really means "can we use AVX512BW?" + */ +#if defined(__INTEL_COMPILER) + return _may_i_use_cpu_feature(_FEATURE_AVX512BW); +#else + unsigned int eax, ebx, ecx, edx; + + cpuid(1, 0, &eax, &ebx, &ecx, &edx); + + /* check XSAVE is enabled by OS */ + if (!(ecx & XSAVE)) { + DEBUG_PRINTF("AVX and XSAVE not supported\n"); + return 0; + } + + /* check that AVX 512 registers are enabled by OS */ + u64a xcr0 = xgetbv(0); + if ((xcr0 & XCR0_AVX512) != XCR0_AVX512) { + DEBUG_PRINTF("AVX512 registers not enabled\n"); + return 0; + } + + /* ECX and EDX contain capability flags */ + ecx = 0; + cpuid(7, 0, &eax, &ebx, &ecx, &edx); + + if (!(ebx & AVX512F)) { + DEBUG_PRINTF("AVX512F (AVX512 Foundation) instructions not enabled\n"); + return 0; + } + + if (ebx & AVX512BW) { + DEBUG_PRINTF("AVX512BW instructions enabled\n"); + return 1; + } + + return 0; +#endif +} + u64a cpuid_flags(void) { u64a cap = 0; @@ -132,10 +184,19 @@ u64a cpuid_flags(void) { cap |= HS_CPU_FEATURES_AVX2; } + if (check_avx512()) { + DEBUG_PRINTF("AVX512 enabled\n"); + cap |= HS_CPU_FEATURES_AVX512; + } + #if !defined(FAT_RUNTIME) && !defined(HAVE_AVX2) cap &= ~HS_CPU_FEATURES_AVX2; #endif +#if !defined(FAT_RUNTIME) && !defined(HAVE_AVX512) + cap &= ~HS_CPU_FEATURES_AVX512; +#endif + return cap; } @@ -168,33 +229,37 @@ struct family_id { * Family Numbers" */ static const struct family_id known_microarch[] = { { 0x6, 0x37, HS_TUNE_FAMILY_SLM }, /* baytrail */ + { 0x6, 0x4A, HS_TUNE_FAMILY_SLM }, /* silvermont */ + { 0x6, 0x4C, HS_TUNE_FAMILY_SLM }, /* silvermont */ { 0x6, 0x4D, HS_TUNE_FAMILY_SLM }, /* avoton, rangley */ + { 0x6, 0x5A, HS_TUNE_FAMILY_SLM }, /* silvermont */ + { 0x6, 0x5D, HS_TUNE_FAMILY_SLM }, /* silvermont */ + + { 0x6, 0x5C, HS_TUNE_FAMILY_GLM }, /* goldmont */ + { 0x6, 0x5F, HS_TUNE_FAMILY_GLM }, /* denverton */ { 0x6, 0x3C, HS_TUNE_FAMILY_HSW }, /* haswell */ { 0x6, 0x45, HS_TUNE_FAMILY_HSW }, /* haswell */ { 0x6, 0x46, HS_TUNE_FAMILY_HSW }, /* haswell */ - { 0x6, 0x3F, HS_TUNE_FAMILY_HSW }, /* haswell */ + { 0x6, 0x3F, HS_TUNE_FAMILY_HSW }, /* haswell Xeon */ - { 0x6, 0x3E, HS_TUNE_FAMILY_IVB }, /* ivybridge */ + { 0x6, 0x3E, HS_TUNE_FAMILY_IVB }, /* ivybridge Xeon */ { 0x6, 0x3A, HS_TUNE_FAMILY_IVB }, /* ivybridge */ { 0x6, 0x2A, HS_TUNE_FAMILY_SNB }, /* sandybridge */ - { 0x6, 0x2D, HS_TUNE_FAMILY_SNB }, /* sandybridge */ + { 0x6, 0x2D, HS_TUNE_FAMILY_SNB }, /* sandybridge Xeon */ { 0x6, 0x3D, HS_TUNE_FAMILY_BDW }, /* broadwell Core-M */ + { 0x6, 0x47, HS_TUNE_FAMILY_BDW }, /* broadwell */ { 0x6, 0x4F, HS_TUNE_FAMILY_BDW }, /* broadwell xeon */ { 0x6, 0x56, HS_TUNE_FAMILY_BDW }, /* broadwell xeon-d */ -// { 0x6, 0x25, HS_TUNE_FAMILY_GENERIC }, /* westmere */ -// { 0x6, 0x2C, HS_TUNE_FAMILY_GENERIC }, /* westmere */ -// { 0x6, 0x2F, HS_TUNE_FAMILY_GENERIC }, /* westmere */ + { 0x6, 0x4E, HS_TUNE_FAMILY_SKL }, /* Skylake Mobile */ + { 0x6, 0x5E, HS_TUNE_FAMILY_SKL }, /* Skylake Core/E3 Xeon */ + { 0x6, 0x55, HS_TUNE_FAMILY_SKX }, /* Skylake Xeon */ -// { 0x6, 0x1E, HS_TUNE_FAMILY_GENERIC }, /* nehalem */ -// { 0x6, 0x1A, HS_TUNE_FAMILY_GENERIC }, /* nehalem */ -// { 0x6, 0x2E, HS_TUNE_FAMILY_GENERIC }, /* nehalem */ - -// { 0x6, 0x17, HS_TUNE_FAMILY_GENERIC }, /* penryn */ -// { 0x6, 0x1D, HS_TUNE_FAMILY_GENERIC }, /* penryn */ + { 0x6, 0x8E, HS_TUNE_FAMILY_SKL }, /* Kabylake Mobile */ + { 0x6, 0x9E, HS_TUNE_FAMILY_SKL }, /* Kabylake desktop */ }; @@ -204,10 +269,13 @@ const char *dumpTune(u32 tune) { #define T_CASE(x) case x: return #x; switch (tune) { T_CASE(HS_TUNE_FAMILY_SLM); + T_CASE(HS_TUNE_FAMILY_GLM); T_CASE(HS_TUNE_FAMILY_HSW); T_CASE(HS_TUNE_FAMILY_SNB); T_CASE(HS_TUNE_FAMILY_IVB); T_CASE(HS_TUNE_FAMILY_BDW); + T_CASE(HS_TUNE_FAMILY_SKL); + T_CASE(HS_TUNE_FAMILY_SKX); } #undef T_CASE return "unknown"; diff --git a/src/util/cpuid_flags.h b/src/util/cpuid_flags.h index 8b23d495..c39038a1 100644 --- a/src/util/cpuid_flags.h +++ b/src/util/cpuid_flags.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,8 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef CPUID_H_53FFCB14B257C2 -#define CPUID_H_53FFCB14B257C2 +#ifndef UTIL_CPUID_H_ +#define UTIL_CPUID_H_ #include "ue2common.h" @@ -50,5 +50,5 @@ int check_popcnt(void); } /* extern "C" */ #endif -#endif /* CPUID_H_53FFCB14B257C2 */ +#endif /* UTIL_CPUID_H_ */ diff --git a/src/util/target_info.cpp b/src/util/target_info.cpp index 4eadec2d..3a41e020 100644 --- a/src/util/target_info.cpp +++ b/src/util/target_info.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -46,6 +46,10 @@ bool target_t::can_run_on_code_built_for(const target_t &code_target) const { return false; } + if (!has_avx512() && code_target.has_avx512()) { + return false; + } + return true; } @@ -53,11 +57,15 @@ target_t::target_t(const hs_platform_info &p) : tune(p.tune), cpu_features(p.cpu_features) {} bool target_t::has_avx2(void) const { - return (cpu_features & HS_CPU_FEATURES_AVX2); + return cpu_features & HS_CPU_FEATURES_AVX2; +} + +bool target_t::has_avx512(void) const { + return cpu_features & HS_CPU_FEATURES_AVX512; } bool target_t::is_atom_class(void) const { - return tune == HS_TUNE_FAMILY_SLM; + return tune == HS_TUNE_FAMILY_SLM || tune == HS_TUNE_FAMILY_GLM; } } // namespace ue2 diff --git a/src/util/target_info.h b/src/util/target_info.h index 67b5b7d9..794b2985 100644 --- a/src/util/target_info.h +++ b/src/util/target_info.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,6 +40,8 @@ struct target_t { bool has_avx2(void) const; + bool has_avx512(void) const; + bool is_atom_class(void) const; // This asks: can this target (the object) run on code that was built for diff --git a/unit/hyperscan/single.cpp b/unit/hyperscan/single.cpp index 029d223a..01fbfeab 100644 --- a/unit/hyperscan/single.cpp +++ b/unit/hyperscan/single.cpp @@ -363,7 +363,8 @@ static const unsigned validModes[] = { // Mode bits for switching off various architecture features static const unsigned long long featureMask[] = { ~0ULL, /* native */ - ~HS_CPU_FEATURES_AVX2, /* no avx2 */ + ~(HS_CPU_FEATURES_AVX2 | HS_CPU_FEATURES_AVX512), /* no avx2 */ + ~HS_CPU_FEATURES_AVX512, /* no avx512 */ }; INSTANTIATE_TEST_CASE_P(Single, diff --git a/unit/internal/database.cpp b/unit/internal/database.cpp index fa34ead2..8f0c1a69 100644 --- a/unit/internal/database.cpp +++ b/unit/internal/database.cpp @@ -52,6 +52,10 @@ TEST(DB, flagsToPlatform) { p.cpu_features |= HS_CPU_FEATURES_AVX2; #endif +#if defined(HAVE_AVX512) + p.cpu_features |= HS_CPU_FEATURES_AVX512; +#endif + platform_t pp = target_to_platform(target_t(p)); ASSERT_EQ(pp, hs_current_platform); } From ec7869711f235bf9587383fada9a377f94be35f9 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 14 Dec 2016 11:47:28 +1100 Subject: [PATCH 301/326] avx512 fat runtime support: experimental --- CMakeLists.txt | 61 ++++++++++++++++++++++++++++++++---------- cmake/config.h.in | 3 --- src/dispatcher.c | 11 +++++++- src/util/cpuid_flags.c | 3 +-- src/util/cpuid_flags.h | 1 + 5 files changed, 59 insertions(+), 20 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 93f3c152..60959cb5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -125,6 +125,9 @@ CMAKE_DEPENDENT_OPTION(DUMP_SUPPORT "Dump code support; normally on, except in r CMAKE_DEPENDENT_OPTION(DISABLE_ASSERTS "Disable assert(); Asserts are enabled in debug builds, disabled in release builds" OFF "NOT RELEASE_BUILD" ON) +option(BUILD_AVX512 "Experimental: support avx512 in the fat runtime" + OFF) + option(WINDOWS_ICC "Use Intel C++ Compiler on Windows, default off, requires ICC to be set in project" OFF) # TODO: per platform config files? @@ -456,6 +459,11 @@ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") endif() +if(CMAKE_C_COMPILER_ID MATCHES "Intel") + set(SKYLAKE_FLAG "-xCORE-AVX512") +else() + set(SKYLAKE_FLAG "-march=skylake-avx512") +endif() if(NOT WIN32) set(RAGEL_C_FLAGS "-Wno-unused") @@ -1079,6 +1087,7 @@ if (NOT FAT_RUNTIME) if (HAVE_AVX2) set(hs_exec_SRCS ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) endif() + if (BUILD_STATIC_LIBS) add_library(hs_exec OBJECT ${hs_exec_SRCS}) @@ -1096,27 +1105,41 @@ if (NOT FAT_RUNTIME) else (FAT_RUNTIME) set(BUILD_WRAPPER "${PROJECT_SOURCE_DIR}/cmake/build_wrapper.sh") + if (NOT BUILD_AVX512) + set (DISPATCHER_DEFINE "-DDISABLE_AVX512_DISPATCH") + endif (NOT BUILD_AVX512) set_source_files_properties(src/dispatcher.c PROPERTIES - COMPILE_FLAGS "-Wno-unused-parameter -Wno-unused-function") + COMPILE_FLAGS "-Wno-unused-parameter -Wno-unused-function ${DISPATCHER_DEFINE}") if (BUILD_STATIC_LIBS) add_library(hs_exec_core2 OBJECT ${hs_exec_SRCS}) + list(APPEND RUNTIME_LIBS $) set_target_properties(hs_exec_core2 PROPERTIES COMPILE_FLAGS "-march=core2" RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" ) add_library(hs_exec_corei7 OBJECT ${hs_exec_SRCS}) + list(APPEND RUNTIME_LIBS $) set_target_properties(hs_exec_corei7 PROPERTIES COMPILE_FLAGS "-march=corei7" RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in" ) add_library(hs_exec_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + list(APPEND RUNTIME_LIBS $) set_target_properties(hs_exec_avx2 PROPERTIES COMPILE_FLAGS "-march=core-avx2" RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in" ) + if (BUILD_AVX512) + add_library(hs_exec_avx512 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + list(APPEND RUNTIME_LIBS $) + set_target_properties(hs_exec_avx512 PROPERTIES + COMPILE_FLAGS "${SKYLAKE_FLAG}" + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_AVX512) add_library(hs_exec_common OBJECT ${hs_exec_common_SRCS} @@ -1127,37 +1150,51 @@ else (FAT_RUNTIME) # create a lib without any src (I'm looking at you Xcode) add_library(hs_runtime STATIC src/hs_version.c - $ $ - $ $) + $ + ${RUNTIME_LIBS}) set_target_properties(hs_runtime PROPERTIES LINKER_LANGUAGE C) # we want the static lib for testing add_library(hs STATIC src/hs_version.c src/hs_valid_platform.c - ${hs_SRCS} $ $ - $ $) + ${hs_SRCS} + $ + ${RUNTIME_LIBS}) endif (BUILD_STATIC_LIBS) if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) # build shared libs add_library(hs_exec_shared_core2 OBJECT ${hs_exec_SRCS}) + list(APPEND RUNTIME_SHLIBS $) set_target_properties(hs_exec_shared_core2 PROPERTIES COMPILE_FLAGS "-march=core2" POSITION_INDEPENDENT_CODE TRUE RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" ) add_library(hs_exec_shared_corei7 OBJECT ${hs_exec_SRCS}) + list(APPEND RUNTIME_SHLIBS $) set_target_properties(hs_exec_shared_corei7 PROPERTIES COMPILE_FLAGS "-march=corei7" POSITION_INDEPENDENT_CODE TRUE RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in" ) add_library(hs_exec_shared_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + list(APPEND RUNTIME_SHLIBS $) set_target_properties(hs_exec_shared_avx2 PROPERTIES COMPILE_FLAGS "-march=core-avx2" POSITION_INDEPENDENT_CODE TRUE RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx2 ${CMAKE_MODULE_PATH}/keep.syms.in" ) + + if (BUILD_AVX512) + add_library(hs_exec_shared_avx512 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) + list(APPEND RUNTIME_SHLIBS $) + set_target_properties(hs_exec_shared_avx512 PROPERTIES + COMPILE_FLAGS "${SKYLAKE_FLAG}" + POSITION_INDEPENDENT_CODE TRUE + RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} avx512 ${CMAKE_MODULE_PATH}/keep.syms.in" + ) + endif (BUILD_AVX512) add_library(hs_exec_common_shared OBJECT ${hs_exec_common_SRCS} src/dispatcher.c @@ -1176,15 +1213,13 @@ endif() if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) if (NOT FAT_RUNTIME) - add_library(hs_runtime_shared SHARED src/hs_version.c src/hs_valid_platform.c -$) - else() + add_library(hs_runtime_shared SHARED src/hs_version.c + src/hs_valid_platform.c $) + else() add_library(hs_runtime_shared SHARED src/hs_version.c src/hs_valid_platform.c $ - $ - $ - $) + ${RUNTIME_SHLIBS}) endif() set_target_properties(hs_runtime_shared PROPERTIES VERSION ${LIB_VERSION} @@ -1213,9 +1248,7 @@ if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) else() add_library(hs_shared SHARED src/hs_version.c src/hs_valid_platform.c ${hs_SRCS} $ - $ - $ - $) + ${RUNTIME_SHLIBS}) endif() add_dependencies(hs_shared ragel_Parser) diff --git a/cmake/config.h.in b/cmake/config.h.in index 6e23f493..5434668e 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -15,9 +15,6 @@ /* "Define if building for EM64T" */ #cmakedefine ARCH_X86_64 -/* Define if AVX-512BW available */ -#cmakedefine HAVE_AVX512 - /* internal build, switch on dump support. */ #cmakedefine DUMP_SUPPORT diff --git a/src/dispatcher.c b/src/dispatcher.c index fb2f4f02..5ae46b56 100644 --- a/src/dispatcher.c +++ b/src/dispatcher.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,8 +33,14 @@ #include "util/cpuid_flags.h" #include "util/join.h" +#if defined(DISABLE_AVX512_DISPATCH) +#define avx512_ disabled_ +#define check_avx512() (0) +#endif + #define CREATE_DISPATCH(RTYPE, NAME, ...) \ /* create defns */ \ + RTYPE JOIN(avx512_, NAME)(__VA_ARGS__); \ RTYPE JOIN(avx2_, NAME)(__VA_ARGS__); \ RTYPE JOIN(corei7_, NAME)(__VA_ARGS__); \ RTYPE JOIN(core2_, NAME)(__VA_ARGS__); \ @@ -46,6 +52,9 @@ \ /* resolver */ \ static void(*JOIN(resolve_, NAME)(void)) { \ + if (check_avx512()) { \ + return JOIN(avx512_, NAME); \ + } \ if (check_avx2()) { \ return JOIN(avx2_, NAME); \ } \ diff --git a/src/util/cpuid_flags.c b/src/util/cpuid_flags.c index d4eaa319..c0ab09af 100644 --- a/src/util/cpuid_flags.c +++ b/src/util/cpuid_flags.c @@ -133,13 +133,12 @@ int check_avx2(void) { #endif } -static int check_avx512(void) { /* * For our purposes, having avx512 really means "can we use AVX512BW?" */ #if defined(__INTEL_COMPILER) - return _may_i_use_cpu_feature(_FEATURE_AVX512BW); + return _may_i_use_cpu_feature(_FEATURE_AVX512BW | _FEATURE_AVX512VL); #else unsigned int eax, ebx, ecx, edx; diff --git a/src/util/cpuid_flags.h b/src/util/cpuid_flags.h index c39038a1..d79c3832 100644 --- a/src/util/cpuid_flags.h +++ b/src/util/cpuid_flags.h @@ -41,6 +41,7 @@ u64a cpuid_flags(void); u32 cpuid_tune(void); +int check_avx512(void); int check_avx2(void); int check_ssse3(void); int check_sse42(void); From 1089fa501865844c7af945b537a7445a52b7f488 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Mon, 8 Aug 2016 10:55:52 +1000 Subject: [PATCH 302/326] avx512: noodle --- src/hwlm/noodle_engine.c | 49 +++++++- src/hwlm/noodle_engine_avx512.c | 193 ++++++++++++++++++++++++++++++++ 2 files changed, 236 insertions(+), 6 deletions(-) create mode 100644 src/hwlm/noodle_engine_avx512.c diff --git a/src/hwlm/noodle_engine.c b/src/hwlm/noodle_engine.c index a30a59a5..24f78c8e 100644 --- a/src/hwlm/noodle_engine.c +++ b/src/hwlm/noodle_engine.c @@ -36,12 +36,14 @@ #include "util/arch.h" #include "util/bitutils.h" #include "util/compare.h" +#include "util/join.h" #include "util/masked_move.h" #include "util/simd_utils.h" #include #include #include +#include /** \brief Noodle runtime context. */ struct cb_info { @@ -51,6 +53,24 @@ struct cb_info { size_t offsetAdj; //!< used in streaming mode }; +#if defined(HAVE_AVX512) +#define CHUNKSIZE 64 +#define MASK_TYPE m512 +#define Z_BITS 64 +#define Z_TYPE u64a +#elif defined(HAVE_AVX2) +#define CHUNKSIZE 32 +#define MASK_TYPE m256 +#define Z_BITS 32 +#define Z_TYPE u32 +#else +#define CHUNKSIZE 16 +#define MASK_TYPE m128 +#define Z_BITS 32 +#define Z_TYPE u32 +#endif + + #define RETURN_IF_TERMINATED(x) \ { \ if ((x) == HWLM_TERMINATED) { \ @@ -61,8 +81,9 @@ struct cb_info { #define SINGLE_ZSCAN() \ do { \ while (unlikely(z)) { \ - u32 pos = findAndClearLSB_32(&z); \ + Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); \ size_t matchPos = d - buf + pos; \ + DEBUG_PRINTF("match pos %zu\n", matchPos); \ hwlmcb_rv_t rv = final(buf, len, key, 1, 0, 0, noCase, cbi, \ matchPos); \ RETURN_IF_TERMINATED(rv); \ @@ -72,8 +93,9 @@ struct cb_info { #define DOUBLE_ZSCAN() \ do { \ while (unlikely(z)) { \ - u32 pos = findAndClearLSB_32(&z); \ + Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); \ size_t matchPos = d - buf + pos - 1; \ + DEBUG_PRINTF("match pos %zu\n", matchPos); \ hwlmcb_rv_t rv = final(buf, len, key, keyLen, keyOffset, 1, \ noCase, cbi, matchPos); \ RETURN_IF_TERMINATED(rv); \ @@ -110,7 +132,11 @@ hwlm_error_t final(const u8 *buf, size_t len, const u8 *key, size_t keyLen, return HWLM_SUCCESS; } -#if defined(HAVE_AVX2) +#if defined(HAVE_AVX512) +#define CHUNKSIZE 64 +#define MASK_TYPE m512 +#include "noodle_engine_avx512.c" +#elif defined(HAVE_AVX2) #define CHUNKSIZE 32 #define MASK_TYPE m256 #include "noodle_engine_avx2.c" @@ -123,12 +149,14 @@ hwlm_error_t final(const u8 *buf, size_t len, const u8 *key, size_t keyLen, static really_inline hwlm_error_t scanSingleMain(const u8 *buf, size_t len, const u8 *key, bool noCase, const struct cb_info *cbi) { - hwlm_error_t rv; - size_t end = len; const MASK_TYPE mask1 = getMask(key[0], noCase); const MASK_TYPE caseMask = getCaseMask(); +#if !defined(HAVE_AVX512) + hwlm_error_t rv; + size_t end = len; + if (len < CHUNKSIZE) { rv = scanSingleShort(buf, len, key, noCase, caseMask, mask1, cbi, 0, len); return rv; @@ -173,13 +201,15 @@ hwlm_error_t scanSingleMain(const u8 *buf, size_t len, const u8 *key, cbi, s2End, end); return rv; +#else // HAVE_AVX512 + return scanSingle512(buf, len, key, noCase, caseMask, mask1, cbi); +#endif } static really_inline hwlm_error_t scanDoubleMain(const u8 *buf, size_t len, const u8 *key, size_t keyLen, size_t keyOffset, bool noCase, const struct cb_info *cbi) { - hwlm_error_t rv; // we stop scanning for the key-fragment when the rest of the key can't // possibly fit in the remaining buffer size_t end = len - keyLen + keyOffset + 2; @@ -188,6 +218,9 @@ hwlm_error_t scanDoubleMain(const u8 *buf, size_t len, const u8 *key, const MASK_TYPE mask1 = getMask(key[keyOffset + 0], noCase); const MASK_TYPE mask2 = getMask(key[keyOffset + 1], noCase); +#if !defined(HAVE_AVX512) + hwlm_error_t rv; + if (end - keyOffset < CHUNKSIZE) { rv = scanDoubleShort(buf, len, key, keyLen, keyOffset, noCase, caseMask, mask1, mask2, cbi, keyOffset, end); @@ -244,6 +277,10 @@ hwlm_error_t scanDoubleMain(const u8 *buf, size_t len, const u8 *key, caseMask, mask1, mask2, cbi, off, end); return rv; +#else // AVX512 + return scanDouble512(buf, len, key, keyLen, keyOffset, noCase, caseMask, + mask1, mask2, cbi, keyOffset, end); +#endif // AVX512 } diff --git a/src/hwlm/noodle_engine_avx512.c b/src/hwlm/noodle_engine_avx512.c new file mode 100644 index 00000000..d4e6527f --- /dev/null +++ b/src/hwlm/noodle_engine_avx512.c @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* noodle scan parts for AVX512 */ + +static really_inline +m512 getMask(u8 c, bool noCase) { + u8 k = caseClear8(c, noCase); + return set64x8(k); +} + +static really_inline +m512 getCaseMask(void) { + return set64x8(CASE_CLEAR); +} + +// The short scan routine. It is used both to scan data up to an +// alignment boundary if needed and to finish off data that the aligned scan +// function can't handle (due to small/unaligned chunk at end) +static really_inline +hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key, + bool noCase, m512 caseMask, m512 mask1, + const struct cb_info *cbi, size_t start, + size_t end) { + const u8 *d = buf + start; + ptrdiff_t scan_len = end - start; + DEBUG_PRINTF("scan_len %zu\n", scan_len); + assert(scan_len <= 64); + if (!scan_len) { + return HWLM_SUCCESS; + } + + __mmask64 k = (~0ULL) >> (64 - scan_len); + DEBUG_PRINTF("load mask 0x%016llx\n", k); + + m512 v = loadu_maskz_m512(k, d); + + if (noCase) { + v = and512(v, caseMask); + } + + // reuse the load mask to indicate valid bytes + u64a z = masked_eq512mask(k, mask1, v); + + SINGLE_ZSCAN(); + + return HWLM_SUCCESS; +} + +static really_inline +hwlm_error_t scanSingle512(const u8 *buf, size_t len, const u8 *key, + bool noCase, m512 caseMask, m512 mask1, + const struct cb_info *cbi) { + const u8 *d = buf; + const u8 *e = buf + len; + DEBUG_PRINTF("start %p end %p \n", d, e); + assert(d < e); + if (d + 64 >= e) { + goto tail; + } + + // peel off first part to cacheline boundary + const u8 *d1 = ROUNDUP_PTR(d, 64); + if (scanSingleShort(buf, len, key, noCase, caseMask, mask1, cbi, 0, + d1 - d) == HWLM_TERMINATED) { + return HWLM_TERMINATED; + } + d = d1; + + for (; d + 64 < e; d += 64) { + DEBUG_PRINTF("d %p e %p \n", d, e); + m512 v = noCase ? and512(load512(d), caseMask) : load512(d); + + u64a z = eq512mask(mask1, v); + __builtin_prefetch(d + 128); + + SINGLE_ZSCAN(); + } + +tail: + DEBUG_PRINTF("d %p e %p \n", d, e); + // finish off tail + + return scanSingleShort(buf, len, key, noCase, caseMask, mask1, cbi, d - buf, + e - buf); +} + +static really_inline +hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key, + size_t keyLen, size_t keyOffset, bool noCase, + m512 caseMask, m512 mask1, m512 mask2, + const struct cb_info *cbi, u64a *lastz0, + size_t start, size_t end) { + DEBUG_PRINTF("start %zu end %zu last 0x%016llx\n", start, end, *lastz0); + const u8 *d = buf + start; + ptrdiff_t scan_len = end - start; + if (!scan_len) { + return HWLM_SUCCESS; + } + assert(scan_len <= 64); + __mmask64 k = (~0ULL) >> (64 - scan_len); + DEBUG_PRINTF("load mask 0x%016llx scan_len %zu\n", k, scan_len); + + m512 v = loadu_maskz_m512(k, d); + if (noCase) { + v = and512(v, caseMask); + } + + u64a z0 = masked_eq512mask(k, mask1, v); + u64a z1 = masked_eq512mask(k, mask2, v); + u64a z = (*lastz0 | (z0 << 1)) & z1; + DEBUG_PRINTF("z 0x%016llx\n", z); + + DOUBLE_ZSCAN(); + *lastz0 = z0 >> (scan_len - 1); + return HWLM_SUCCESS; +} + +static really_inline +hwlm_error_t scanDouble512(const u8 *buf, size_t len, const u8 *key, + size_t keyLen, size_t keyOffset, bool noCase, + m512 caseMask, m512 mask1, m512 mask2, + const struct cb_info *cbi, size_t start, + size_t end) { + const u8 *d = buf + start; + const u8 *e = buf + end; + u64a lastz0 = 0; + DEBUG_PRINTF("start %zu end %zu \n", start, end); + assert(d < e); + if (d + 64 >= e) { + goto tail; + } + + // peel off first part to cacheline boundary + const u8 *d1 = ROUNDUP_PTR(d, 64); + if (scanDoubleShort(buf, len, key, keyLen, keyOffset, noCase, caseMask, + mask1, mask2, cbi, &lastz0, start, + d1 - buf) == HWLM_TERMINATED) { + return HWLM_TERMINATED; + } + d = d1; + + for (; d + 64 < e; d += 64) { + DEBUG_PRINTF("d %p e %p 0x%016llx\n", d, e, lastz0); + m512 v = noCase ? and512(load512(d), caseMask) : load512(d); + + /* we have to pull the masks out of the AVX registers because we can't + byte shift between the lanes */ + u64a z0 = eq512mask(mask1, v); + u64a z1 = eq512mask(mask2, v); + u64a z = (lastz0 | (z0 << 1)) & z1; + lastz0 = z0 >> 63; + + // On large packet buffers, this prefetch appears to get us about 2%. + __builtin_prefetch(d + 256); + + DEBUG_PRINTF("z 0x%016llx\n", z); + + DOUBLE_ZSCAN(); + } + +tail: + DEBUG_PRINTF("d %p e %p off %zu \n", d, e, d - buf); + // finish off tail + + return scanDoubleShort(buf, len, key, keyLen, keyOffset, noCase, caseMask, + mask1, mask2, cbi, &lastz0, d - buf, end); +} From 194c201fc70c70dcc3d359489baf602a7bda8de1 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 14 Sep 2016 16:09:47 +1000 Subject: [PATCH 303/326] avx512: truffle --- src/nfa/truffle.c | 182 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 181 insertions(+), 1 deletion(-) diff --git a/src/nfa/truffle.c b/src/nfa/truffle.c index 331ae6d6..d31b1a56 100644 --- a/src/nfa/truffle.c +++ b/src/nfa/truffle.c @@ -231,7 +231,7 @@ const u8 *rtruffleExec(m128 shuf_mask_lo_highclear, return buf - 1; } -#else +#elif !defined(HAVE_AVX512) // AVX2 @@ -425,4 +425,184 @@ const u8 *rtruffleExec(m128 shuf_mask_lo_highclear, return buf - 1; } +#else // AVX512 + +static really_inline +const u8 *lastMatch(const u8 *buf, u64a z) { + if (unlikely(z != ~0ULL)) { + u64a pos = clz64(~z); + assert(pos < 64); + return buf + (63 - pos); + } + + return NULL; // no match +} + +static really_inline +const u8 *firstMatch(const u8 *buf, u64a z) { + if (unlikely(z != ~0ULL)) { + u64a pos = ctz64(~z); + assert(pos < 64); + DEBUG_PRINTF("pos %llu\n", pos); + return buf + pos; + } + + return NULL; // no match +} + +static really_inline +u64a block(m512 shuf_mask_lo_highclear, m512 shuf_mask_lo_highset, m512 v) { + m512 highconst = set64x8(0x80); + m512 shuf_mask_hi = set8x64(0x8040201008040201); + + // and now do the real work + m512 shuf1 = pshufb_m512(shuf_mask_lo_highclear, v); + m512 t1 = xor512(v, highconst); + m512 shuf2 = pshufb_m512(shuf_mask_lo_highset, t1); + m512 t2 = andnot512(highconst, rshift64_m512(v, 4)); + m512 shuf3 = pshufb_m512(shuf_mask_hi, t2); + m512 tmp = and512(or512(shuf1, shuf2), shuf3); + u64a z = eq512mask(tmp, zeroes512()); + + return z; +} + +static really_inline +const u8 *truffleMini(m512 shuf_mask_lo_highclear, m512 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { + uintptr_t len = buf_end - buf; + assert(len <= 64); + + __mmask64 mask = (~0ULL) >> (64 - len); + + m512 chars = loadu_maskz_m512(mask, buf); + + u64a z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); + + const u8 *rv = firstMatch(buf, z | ~mask); + + return rv; +} + +static really_inline +const u8 *fwdBlock(m512 shuf_mask_lo_highclear, m512 shuf_mask_lo_highset, + m512 v, const u8 *buf) { + u64a z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v); + return firstMatch(buf, z); +} + +static really_inline +const u8 *revBlock(m512 shuf_mask_lo_highclear, m512 shuf_mask_lo_highset, + m512 v, const u8 *buf) { + u64a z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v); + return lastMatch(buf, z); +} + +const u8 *truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { + DEBUG_PRINTF("len %zu\n", buf_end - buf); + const m512 wide_clear = set4x128(shuf_mask_lo_highclear); + const m512 wide_set = set4x128(shuf_mask_lo_highset); + + assert(buf && buf_end); + assert(buf < buf_end); + const u8 *rv; + + if (buf_end - buf <= 64) { + rv = truffleMini(wide_clear, wide_set, buf, buf_end); + return rv ? rv : buf_end; + } + + assert(buf_end - buf >= 64); + if ((uintptr_t)buf % 64) { + // Preconditioning: most of the time our buffer won't be aligned. + rv = truffleMini(wide_clear, wide_set, buf, ROUNDUP_PTR(buf, 64)); + if (rv) { + return rv; + } + buf = ROUNDUP_PTR(buf, 64); + } + const u8 *last_block = buf_end - 64; + while (buf < last_block) { + m512 lchars = load512(buf); + rv = fwdBlock(wide_clear, wide_set, lchars, buf); + if (rv) { + return rv; + } + buf += 64; + } + + // Use an unaligned load to mop up the last 64 bytes and get an accurate + // picture to buf_end. + assert(buf <= buf_end && buf >= buf_end - 64); + m512 chars = loadu512(buf_end - 64); + rv = fwdBlock(wide_clear, wide_set, chars, buf_end - 64); + if (rv) { + return rv; + } + return buf_end; +} + +static really_inline +const u8 *truffleRevMini(m512 shuf_mask_lo_highclear, m512 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { + uintptr_t len = buf_end - buf; + assert(len < 64); + + __mmask64 mask = (~0ULL) >> (64 - len); + m512 chars = loadu_maskz_m512(mask, buf); + u64a z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); + DEBUG_PRINTF("mask 0x%016llx z 0x%016llx\n", mask, z); + const u8 *rv = lastMatch(buf, z | ~mask); + + if (rv) { + return rv; + } + return buf - 1; +} + +const u8 *rtruffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { + const m512 wide_clear = set4x128(shuf_mask_lo_highclear); + const m512 wide_set = set4x128(shuf_mask_lo_highset); + assert(buf && buf_end); + assert(buf < buf_end); + const u8 *rv; + + DEBUG_PRINTF("len %zu\n", buf_end - buf); + + if (buf_end - buf < 64) { + return truffleRevMini(wide_clear, wide_set, buf, buf_end); + } + + assert(buf_end - buf >= 64); + + // Preconditioning: most of the time our buffer won't be aligned. + m512 chars = loadu512(buf_end - 64); + rv = revBlock(wide_clear, wide_set, chars, buf_end - 64); + if (rv) { + return rv; + } + buf_end = (const u8 *)ROUNDDOWN_N((uintptr_t)buf_end, 64); + + const u8 *last_block = buf + 64; + while (buf_end > last_block) { + buf_end -= 64; + m512 lchars = load512(buf_end); + rv = revBlock(wide_clear, wide_set, lchars, buf_end); + if (rv) { + return rv; + } + } + + // Use an unaligned load to mop up the last 64 bytes and get an accurate + // picture to buf_end. + chars = loadu512(buf); + rv = revBlock(wide_clear, wide_set, chars, buf); + if (rv) { + return rv; + } + return buf - 1; +} + #endif From eabe408e2b03bae096f7cd6b48f2e2d2bff9c85c Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 27 Sep 2016 16:01:08 +1000 Subject: [PATCH 304/326] avx512: shufti --- src/nfa/shufti.c | 351 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 349 insertions(+), 2 deletions(-) diff --git a/src/nfa/shufti.c b/src/nfa/shufti.c index dda5060f..390b6510 100644 --- a/src/nfa/shufti.c +++ b/src/nfa/shufti.c @@ -360,7 +360,8 @@ const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi, return buf_end; } -#else // AVX2 - 256 wide shuftis +#elif !defined(HAVE_AVX512) +// AVX2 - 256 wide shuftis #ifdef DEBUG DUMP_MSK(256) @@ -389,9 +390,11 @@ u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits, static really_inline const u8 *firstMatch(const u8 *buf, u32 z) { + DEBUG_PRINTF("z 0x%08x\n", z); if (unlikely(z != 0xffffffff)) { u32 pos = ctz32(~z); assert(pos < 32); + DEBUG_PRINTF("match @ pos %u\n", pos); return buf + pos; } else { return NULL; // no match @@ -697,6 +700,7 @@ const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi, const u8 *buf, const u8 *buf_end) { /* we should always have at least 16 bytes */ assert(buf_end - buf >= 16); + DEBUG_PRINTF("buf %p len %zu\n", buf, buf_end - buf); if (buf_end - buf < 32) { return shuftiDoubleShort(mask1_lo, mask1_hi, mask2_lo, mask2_hi, buf, @@ -747,4 +751,347 @@ const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi, return buf_end; } -#endif //AVX2 +#else // defined(HAVE_AVX512) + +#ifdef DEBUG +DUMP_MSK(512) +#endif + +static really_inline +u64a block(m512 mask_lo, m512 mask_hi, m512 chars, const m512 low4bits, + const m512 compare) { + m512 c_lo = pshufb_m512(mask_lo, and512(chars, low4bits)); + m512 c_hi = pshufb_m512(mask_hi, + rshift64_m512(andnot512(low4bits, chars), 4)); + m512 t = and512(c_lo, c_hi); + +#ifdef DEBUG + DEBUG_PRINTF(" chars: "); dumpMsk512AsChars(chars); printf("\n"); + DEBUG_PRINTF(" char: "); dumpMsk512(chars); printf("\n"); + DEBUG_PRINTF(" c_lo: "); dumpMsk512(c_lo); printf("\n"); + DEBUG_PRINTF(" c_hi: "); dumpMsk512(c_hi); printf("\n"); + DEBUG_PRINTF(" t: "); dumpMsk512(t); printf("\n"); +#endif + + return eq512mask(t, compare); +} +static really_inline +const u8 *firstMatch64(const u8 *buf, u64a z) { + DEBUG_PRINTF("z 0x%016llx\n", z); + if (unlikely(z != ~0ULL)) { + u32 pos = ctz64(~z); + DEBUG_PRINTF("match @ pos %u\n", pos); + assert(pos < 64); + return buf + pos; + } else { + return NULL; // no match + } +} + +static really_inline +const u8 *fwdBlock512(m512 mask_lo, m512 mask_hi, m512 chars, const u8 *buf, + const m512 low4bits, const m512 zeroes) { + u64a z = block(mask_lo, mask_hi, chars, low4bits, zeroes); + + return firstMatch64(buf, z); +} + +static really_inline +const u8 *shortShufti512(m512 mask_lo, m512 mask_hi, const u8 *buf, + const u8 *buf_end, const m512 low4bits, + const m512 zeroes) { + DEBUG_PRINTF("short shufti %p len %zu\n", buf, buf_end - buf); + uintptr_t len = buf_end - buf; + assert(len <= 64); + + // load mask + u64a k = (~0ULL) >> (64 - len); + DEBUG_PRINTF("load mask 0x%016llx\n", k); + + m512 chars = loadu_maskz_m512(k, buf); + + u64a z = block(mask_lo, mask_hi, chars, low4bits, zeroes); + + // reuse the load mask to indicate valid bytes + return firstMatch64(buf, z | ~k); +} + +/* takes 128 bit masks, but operates on 512 bits of data */ +const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, + const u8 *buf_end) { + assert(buf && buf_end); + assert(buf < buf_end); + DEBUG_PRINTF("shufti %p len %zu\n", buf, buf_end - buf); + DEBUG_PRINTF("b %s\n", buf); + + const m512 low4bits = set64x8(0xf); + const m512 zeroes = zeroes512(); + const m512 wide_mask_lo = set4x128(mask_lo); + const m512 wide_mask_hi = set4x128(mask_hi); + const u8 *rv; + + // small cases. + if (buf_end - buf <= 64) { + rv = shortShufti512(wide_mask_lo, wide_mask_hi, buf, buf_end, low4bits, + zeroes); + return rv ? rv : buf_end; + } + + assert(buf_end - buf >= 64); + + // Preconditioning: most of the time our buffer won't be aligned. + if ((uintptr_t)buf % 64) { + rv = shortShufti512(wide_mask_lo, wide_mask_hi, buf, + ROUNDUP_PTR(buf, 64), low4bits, zeroes); + if (rv) { + return rv; + } + buf = ROUNDUP_PTR(buf, 64); + } + + const u8 *last_block = ROUNDDOWN_PTR(buf_end, 64); + while (buf < last_block) { + m512 lchars = load512(buf); + rv = fwdBlock512(wide_mask_lo, wide_mask_hi, lchars, buf, low4bits, + zeroes); + if (rv) { + return rv; + } + buf += 64; + } + + if (buf == buf_end) { + goto done; + } + + // Use an unaligned load to mop up the last 64 bytes and get an accurate + // picture to buf_end. + assert(buf <= buf_end && buf >= buf_end - 64); + m512 chars = loadu512(buf_end - 64); + rv = fwdBlock512(wide_mask_lo, wide_mask_hi, chars, buf_end - 64, low4bits, + zeroes); + if (rv) { + return rv; + } +done: + return buf_end; +} + +static really_inline +const u8 *lastMatch64(const u8 *buf, u64a z) { + DEBUG_PRINTF("z 0x%016llx\n", z); + if (unlikely(z != ~0ULL)) { + u32 pos = clz64(~z); + DEBUG_PRINTF("buf=%p, pos=%u\n", buf, pos); + return buf + (63 - pos); + } else { + return NULL; // no match + } +} + +static really_inline +const u8 *rshortShufti512(m512 mask_lo, m512 mask_hi, const u8 *buf, + const u8 *buf_end, const m512 low4bits, + const m512 zeroes) { + DEBUG_PRINTF("short %p len %zu\n", buf, buf_end - buf); + uintptr_t len = buf_end - buf; + assert(len <= 64); + + // load mask + u64a k = (~0ULL) >> (64 - len); + DEBUG_PRINTF("load mask 0x%016llx\n", k); + + m512 chars = loadu_maskz_m512(k, buf); + + u64a z = block(mask_lo, mask_hi, chars, low4bits, zeroes); + + // reuse the load mask to indicate valid bytes + return lastMatch64(buf, z | ~k); +} + +static really_inline +const u8 *revBlock512(m512 mask_lo, m512 mask_hi, m512 chars, const u8 *buf, + const m512 low4bits, const m512 zeroes) { + m512 c_lo = pshufb_m512(mask_lo, and512(chars, low4bits)); + m512 c_hi = pshufb_m512(mask_hi, + rshift64_m512(andnot512(low4bits, chars), 4)); + m512 t = and512(c_lo, c_hi); + +#ifdef DEBUG + DEBUG_PRINTF(" chars: "); dumpMsk512AsChars(chars); printf("\n"); + DEBUG_PRINTF(" char: "); dumpMsk512(chars); printf("\n"); + DEBUG_PRINTF(" c_lo: "); dumpMsk512(c_lo); printf("\n"); + DEBUG_PRINTF(" c_hi: "); dumpMsk512(c_hi); printf("\n"); + DEBUG_PRINTF(" t: "); dumpMsk512(t); printf("\n"); +#endif + + u64a z = eq512mask(t, zeroes); + return lastMatch64(buf, z); +} + +/* takes 128 bit masks, but operates on 512 bits of data */ +const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, + const u8 *buf_end) { + DEBUG_PRINTF("buf %p buf_end %p\n", buf, buf_end); + assert(buf && buf_end); + assert(buf < buf_end); + + const m512 low4bits = set64x8(0xf); + const m512 zeroes = zeroes512(); + const m512 wide_mask_lo = set4x128(mask_lo); + const m512 wide_mask_hi = set4x128(mask_hi); + const u8 *rv; + + if (buf_end - buf < 64) { + rv = rshortShufti512(wide_mask_lo, wide_mask_hi, buf, buf_end, low4bits, + zeroes); + return rv ? rv : buf - 1; + } + + if (ROUNDDOWN_PTR(buf_end, 64) != buf_end) { + // peel off unaligned portion + assert(buf_end - buf >= 64); + DEBUG_PRINTF("start\n"); + rv = rshortShufti512(wide_mask_lo, wide_mask_hi, + ROUNDDOWN_PTR(buf_end, 64), buf_end, low4bits, + zeroes); + if (rv) { + return rv; + } + buf_end = ROUNDDOWN_PTR(buf_end, 64); + } + + const u8 *last_block = ROUNDUP_PTR(buf, 64); + while (buf_end > last_block) { + buf_end -= 64; + m512 lchars = load512(buf_end); + rv = revBlock512(wide_mask_lo, wide_mask_hi, lchars, buf_end, low4bits, + zeroes); + if (rv) { + return rv; + } + } + if (buf_end == buf) { + goto done; + } + // Use an unaligned load to mop up the last 64 bytes and get an accurate + // picture to buf. + m512 chars = loadu512(buf); + rv = revBlock512(wide_mask_lo, wide_mask_hi, chars, buf, low4bits, zeroes); + if (rv) { + return rv; + } +done: + return buf - 1; +} + +static really_inline +const u8 *fwdBlock2(m512 mask1_lo, m512 mask1_hi, m512 mask2_lo, m512 mask2_hi, + m512 chars, const u8 *buf, const m512 low4bits, + const m512 ones, __mmask64 k) { + DEBUG_PRINTF("buf %p %.64s\n", buf, buf); + m512 chars_lo = and512(chars, low4bits); + m512 chars_hi = rshift64_m512(andnot512(low4bits, chars), 4); + m512 c_lo = maskz_pshufb_m512(k, mask1_lo, chars_lo); + m512 c_hi = maskz_pshufb_m512(k, mask1_hi, chars_hi); + m512 t = or512(c_lo, c_hi); + +#ifdef DEBUG + DEBUG_PRINTF(" chars: "); dumpMsk512AsChars(chars); printf("\n"); + DEBUG_PRINTF(" char: "); dumpMsk512(chars); printf("\n"); + DEBUG_PRINTF(" c_lo: "); dumpMsk512(c_lo); printf("\n"); + DEBUG_PRINTF(" c_hi: "); dumpMsk512(c_hi); printf("\n"); + DEBUG_PRINTF(" t: "); dumpMsk512(t); printf("\n"); +#endif + + m512 c2_lo = maskz_pshufb_m512(k, mask2_lo, chars_lo); + m512 c2_hi = maskz_pshufb_m512(k, mask2_hi, chars_hi); + m512 t2 = or512(t, rshift128_m512(or512(c2_lo, c2_hi), 1)); + +#ifdef DEBUG + DEBUG_PRINTF(" c2_lo: "); dumpMsk512(c2_lo); printf("\n"); + DEBUG_PRINTF(" c2_hi: "); dumpMsk512(c2_hi); printf("\n"); + DEBUG_PRINTF(" t2: "); dumpMsk512(t2); printf("\n"); +#endif + u64a z = eq512mask(t2, ones); + + return firstMatch64(buf, z | ~k); +} + +static really_inline +const u8 *shortDoubleShufti512(m512 mask1_lo, m512 mask1_hi, m512 mask2_lo, + m512 mask2_hi, const u8 *buf, const u8 *buf_end, + const m512 low4bits, const m512 ones) { + DEBUG_PRINTF("short %p len %zu\n", buf, buf_end - buf); + uintptr_t len = buf_end - buf; + assert(len <= 64); + + u64a k = (~0ULL) >> (64 - len); + DEBUG_PRINTF("load mask 0x%016llx\n", k); + + m512 chars = loadu_mask_m512(ones, k, buf); + + const u8 *rv = fwdBlock2(mask1_lo, mask1_hi, mask2_lo, mask2_hi, chars, buf, + low4bits, ones, k); + + return rv; +} + +/* takes 128 bit masks, but operates on 512 bits of data */ +const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi, + m128 mask2_lo, m128 mask2_hi, + const u8 *buf, const u8 *buf_end) { + /* we should always have at least 16 bytes */ + assert(buf_end - buf >= 16); + DEBUG_PRINTF("buf %p len %zu\n", buf, buf_end - buf); + + const m512 ones = ones512(); + const m512 low4bits = set64x8(0xf); + const m512 wide_mask1_lo = set4x128(mask1_lo); + const m512 wide_mask1_hi = set4x128(mask1_hi); + const m512 wide_mask2_lo = set4x128(mask2_lo); + const m512 wide_mask2_hi = set4x128(mask2_hi); + const u8 *rv; + + if (buf_end - buf <= 64) { + rv = shortDoubleShufti512(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, + wide_mask2_hi, buf, buf_end, low4bits, ones); + DEBUG_PRINTF("rv %p\n", rv); + return rv ? rv : buf_end; + } + + // Preconditioning: most of the time our buffer won't be aligned. + if ((uintptr_t)buf % 64) { + rv = shortDoubleShufti512(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, + wide_mask2_hi, buf, ROUNDUP_PTR(buf, 64), + low4bits, ones); + if (rv) { + return rv; + } + + buf = ROUNDUP_PTR(buf, 64); + } + + const u8 *last_block = buf_end - 64; + while (buf < last_block) { + m512 lchars = load512(buf); + rv = fwdBlock2(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, + wide_mask2_hi, lchars, buf, low4bits, ones, ~0); + if (rv) { + return rv; + } + buf += 64; + } + + // Use an unaligned load to mop up the last 64 bytes and get an accurate + // picture to buf_end. + m512 chars = loadu512(buf_end - 64); + rv = fwdBlock2(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, wide_mask2_hi, + chars, buf_end - 64, low4bits, ones, ~0); + if (rv) { + return rv; + } + + return buf_end; +} +#endif From a295c961983810bc650b25fb09d2155eea237ed4 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Fri, 5 May 2017 09:59:29 +1000 Subject: [PATCH 305/326] rename vpshufb to pshufb_m256 --- src/fdr/teddy_avx2.c | 16 ++++++------- src/nfa/limex_shuffle.h | 2 +- src/nfa/shufti.c | 24 ++++++++++---------- src/nfa/truffle.c | 6 ++--- src/rose/program_runtime.h | 24 ++++++++++---------- src/rose/validate_shufti.h | 46 ++++++++++++++++++++------------------ src/util/masked_move.h | 3 ++- src/util/simd_utils.h | 2 +- 8 files changed, 63 insertions(+), 60 deletions(-) diff --git a/src/fdr/teddy_avx2.c b/src/fdr/teddy_avx2.c index ebc1362d..299825cc 100644 --- a/src/fdr/teddy_avx2.c +++ b/src/fdr/teddy_avx2.c @@ -147,8 +147,8 @@ m256 prep_conf_fat_teddy_m1(const m256 *maskBase, m256 val) { m256 mask = set32x8(0xf); m256 lo = and256(val, mask); m256 hi = and256(rshift64_m256(val, 4), mask); - return and256(vpshufb(maskBase[0*2], lo), - vpshufb(maskBase[0*2+1], hi)); + return and256(pshufb_m256(maskBase[0*2], lo), + pshufb_m256(maskBase[0*2+1], hi)); } static really_inline @@ -158,8 +158,8 @@ m256 prep_conf_fat_teddy_m2(const m256 *maskBase, m256 *old_1, m256 val) { m256 hi = and256(rshift64_m256(val, 4), mask); m256 r = prep_conf_fat_teddy_m1(maskBase, val); - m256 res_1 = and256(vpshufb(maskBase[1*2], lo), - vpshufb(maskBase[1*2+1], hi)); + m256 res_1 = and256(pshufb_m256(maskBase[1*2], lo), + pshufb_m256(maskBase[1*2+1], hi)); m256 res_shifted_1 = vpalignr(res_1, *old_1, 16-1); *old_1 = res_1; return and256(r, res_shifted_1); @@ -173,8 +173,8 @@ m256 prep_conf_fat_teddy_m3(const m256 *maskBase, m256 *old_1, m256 *old_2, m256 hi = and256(rshift64_m256(val, 4), mask); m256 r = prep_conf_fat_teddy_m2(maskBase, old_1, val); - m256 res_2 = and256(vpshufb(maskBase[2*2], lo), - vpshufb(maskBase[2*2+1], hi)); + m256 res_2 = and256(pshufb_m256(maskBase[2*2], lo), + pshufb_m256(maskBase[2*2+1], hi)); m256 res_shifted_2 = vpalignr(res_2, *old_2, 16-2); *old_2 = res_2; return and256(r, res_shifted_2); @@ -188,8 +188,8 @@ m256 prep_conf_fat_teddy_m4(const m256 *maskBase, m256 *old_1, m256 *old_2, m256 hi = and256(rshift64_m256(val, 4), mask); m256 r = prep_conf_fat_teddy_m3(maskBase, old_1, old_2, val); - m256 res_3 = and256(vpshufb(maskBase[3*2], lo), - vpshufb(maskBase[3*2+1], hi)); + m256 res_3 = and256(pshufb_m256(maskBase[3*2], lo), + pshufb_m256(maskBase[3*2+1], hi)); m256 res_shifted_3 = vpalignr(res_3, *old_3, 16-3); *old_3 = res_3; return and256(r, res_shifted_3); diff --git a/src/nfa/limex_shuffle.h b/src/nfa/limex_shuffle.h index 4c142a34..cedca333 100644 --- a/src/nfa/limex_shuffle.h +++ b/src/nfa/limex_shuffle.h @@ -54,7 +54,7 @@ u32 packedExtract128(m128 s, const m128 permute, const m128 compare) { static really_inline u32 packedExtract256(m256 s, const m256 permute, const m256 compare) { // vpshufb doesn't cross lanes, so this is a bit of a cheat - m256 shuffled = vpshufb(s, permute); + m256 shuffled = pshufb_m256(s, permute); m256 compared = and256(shuffled, compare); u32 rv = ~movemask256(eq256(compared, shuffled)); // stitch the lane-wise results back together diff --git a/src/nfa/shufti.c b/src/nfa/shufti.c index 390b6510..ebe5015d 100644 --- a/src/nfa/shufti.c +++ b/src/nfa/shufti.c @@ -373,8 +373,8 @@ DUMP_MSK(256) static really_inline u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits, const m256 compare) { - m256 c_lo = vpshufb(mask_lo, GET_LO_4(chars)); - m256 c_hi = vpshufb(mask_hi, GET_HI_4(chars)); + m256 c_lo = pshufb_m256(mask_lo, GET_LO_4(chars)); + m256 c_hi = pshufb_m256(mask_hi, GET_HI_4(chars)); m256 t = and256(c_lo, c_hi); #ifdef DEBUG @@ -407,7 +407,7 @@ const u8 *fwdBlockShort(m256 mask, m128 chars, const u8 *buf, // do the hi and lo shuffles in the one avx register m256 c = combine2x128(rshift64_m128(chars, 4), chars); c = and256(c, low4bits); - m256 c_shuf = vpshufb(mask, c); + m256 c_shuf = pshufb_m256(mask, c); m128 t = and128(movdq_hi(c_shuf), cast256to128(c_shuf)); // the upper 32-bits can't match u32 z = 0xffff0000U | movemask128(eq128(t, zeroes128())); @@ -516,8 +516,8 @@ const u8 *lastMatch(const u8 *buf, u32 z) { static really_inline const u8 *revBlock(m256 mask_lo, m256 mask_hi, m256 chars, const u8 *buf, const m256 low4bits, const m256 zeroes) { - m256 c_lo = vpshufb(mask_lo, GET_LO_4(chars)); - m256 c_hi = vpshufb(mask_hi, GET_HI_4(chars)); + m256 c_lo = pshufb_m256(mask_lo, GET_LO_4(chars)); + m256 c_hi = pshufb_m256(mask_hi, GET_HI_4(chars)); m256 t = and256(c_lo, c_hi); #ifdef DEBUG @@ -538,7 +538,7 @@ const u8 *revBlockShort(m256 mask, m128 chars, const u8 *buf, // do the hi and lo shuffles in the one avx register m256 c = combine2x128(rshift64_m128(chars, 4), chars); c = and256(c, low4bits); - m256 c_shuf = vpshufb(mask, c); + m256 c_shuf = pshufb_m256(mask, c); m128 t = and128(movdq_hi(c_shuf), cast256to128(c_shuf)); // the upper 32-bits can't match u32 z = 0xffff0000U | movemask128(eq128(t, zeroes128())); @@ -630,8 +630,8 @@ const u8 *fwdBlock2(m256 mask1_lo, m256 mask1_hi, m256 mask2_lo, m256 mask2_hi, DEBUG_PRINTF("buf %p\n", buf); m256 chars_lo = GET_LO_4(chars); m256 chars_hi = GET_HI_4(chars); - m256 c_lo = vpshufb(mask1_lo, chars_lo); - m256 c_hi = vpshufb(mask1_hi, chars_hi); + m256 c_lo = pshufb_m256(mask1_lo, chars_lo); + m256 c_hi = pshufb_m256(mask1_hi, chars_hi); m256 t = or256(c_lo, c_hi); #ifdef DEBUG @@ -642,8 +642,8 @@ const u8 *fwdBlock2(m256 mask1_lo, m256 mask1_hi, m256 mask2_lo, m256 mask2_hi, DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n"); #endif - m256 c2_lo = vpshufb(mask2_lo, chars_lo); - m256 c2_hi = vpshufb(mask2_hi, chars_hi); + m256 c2_lo = pshufb_m256(mask2_lo, chars_lo); + m256 c2_hi = pshufb_m256(mask2_hi, chars_hi); m256 t2 = or256(t, rshift128_m256(or256(c2_lo, c2_hi), 1)); #ifdef DEBUG @@ -662,8 +662,8 @@ const u8 *fwdBlockShort2(m256 mask1, m256 mask2, m128 chars, const u8 *buf, // do the hi and lo shuffles in the one avx register m256 c = combine2x128(rshift64_m128(chars, 4), chars); c = and256(c, low4bits); - m256 c_shuf1 = vpshufb(mask1, c); - m256 c_shuf2 = rshift128_m256(vpshufb(mask2, c), 1); + m256 c_shuf1 = pshufb_m256(mask1, c); + m256 c_shuf2 = rshift128_m256(pshufb_m256(mask2, c), 1); m256 t0 = or256(c_shuf1, c_shuf2); m128 t = or128(movdq_hi(t0), cast256to128(t0)); // the upper 32-bits can't match diff --git a/src/nfa/truffle.c b/src/nfa/truffle.c index d31b1a56..335edd5b 100644 --- a/src/nfa/truffle.c +++ b/src/nfa/truffle.c @@ -264,11 +264,11 @@ u32 block(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, m256 v) { m256 shuf_mask_hi = _mm256_set1_epi64x(0x8040201008040201); // and now do the real work - m256 shuf1 = vpshufb(shuf_mask_lo_highclear, v); + m256 shuf1 = pshufb_m256(shuf_mask_lo_highclear, v); m256 t1 = xor256(v, highconst); - m256 shuf2 = vpshufb(shuf_mask_lo_highset, t1); + m256 shuf2 = pshufb_m256(shuf_mask_lo_highset, t1); m256 t2 = andnot256(highconst, rshift64_m256(v, 4)); - m256 shuf3 = vpshufb(shuf_mask_hi, t2); + m256 shuf3 = pshufb_m256(shuf_mask_hi, t2); m256 tmp = and256(or256(shuf1, shuf2), shuf3); m256 tmp2 = eq256(tmp, zeroes256()); u32 z = movemask256(tmp2); diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index c67a4acb..38700fbd 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -1334,11 +1334,11 @@ int roseCheckMultipathShufti32x8(const struct hs_scratch *scratch, DEBUG_PRINTF("expand_lo %llx\n", valid_lo); expand_valid = set64x4(valid_hi, valid_lo, valid_hi, valid_lo); - valid_path_mask = ~movemask256(vpshufb(expand_valid, - data_select_mask)); + valid_path_mask = ~movemask256(pshufb_m256(expand_valid, + data_select_mask)); } - m256 data = vpshufb(data_double, data_select_mask); + m256 data = pshufb_m256(data_double, data_select_mask); m256 hi_mask = loadu2x128(ri->hi_mask); m256 lo_mask = loadu2x128(ri->lo_mask); m256 bucket_select_mask = loadu256(ri->bucket_select_mask); @@ -1395,11 +1395,11 @@ int roseCheckMultipathShufti32x16(const struct hs_scratch *scratch, DEBUG_PRINTF("expand_lo %llx\n", valid_lo); expand_valid = set64x4(valid_hi, valid_lo, valid_hi, valid_lo); - valid_path_mask = ~movemask256(vpshufb(expand_valid, - data_select_mask)); + valid_path_mask = ~movemask256(pshufb_m256(expand_valid, + data_select_mask)); } - m256 data = vpshufb(data_double, data_select_mask); + m256 data = pshufb_m256(data_double, data_select_mask); m256 hi_mask_1 = loadu2x128(ri->hi_mask); m256 hi_mask_2 = loadu2x128(ri->hi_mask + 16); @@ -1463,15 +1463,15 @@ int roseCheckMultipathShufti64(const struct hs_scratch *scratch, DEBUG_PRINTF("expand_lo %llx\n", valid_lo); expand_valid = set64x4(valid_hi, valid_lo, valid_hi, valid_lo); - u32 valid_path_1 = movemask256(vpshufb(expand_valid, - data_select_mask_1)); - u32 valid_path_2 = movemask256(vpshufb(expand_valid, - data_select_mask_2)); + u32 valid_path_1 = movemask256(pshufb_m256(expand_valid, + data_select_mask_1)); + u32 valid_path_2 = movemask256(pshufb_m256(expand_valid, + data_select_mask_2)); valid_path_mask = ~((u64a)valid_path_1 | (u64a)valid_path_2 << 32); } - m256 data_1 = vpshufb(data_m256, data_select_mask_1); - m256 data_2 = vpshufb(data_m256, data_select_mask_2); + m256 data_1 = pshufb_m256(data_m256, data_select_mask_1); + m256 data_2 = pshufb_m256(data_m256, data_select_mask_2); m256 hi_mask = loadu2x128(ri->hi_mask); m256 lo_mask = loadu2x128(ri->lo_mask); diff --git a/src/rose/validate_shufti.h b/src/rose/validate_shufti.h index e26d6c2b..1dc855d9 100644 --- a/src/rose/validate_shufti.h +++ b/src/rose/validate_shufti.h @@ -48,8 +48,9 @@ int validateShuftiMask16x16(const m256 data, const m256 hi_mask, const m256 lo_mask, const m256 and_mask, const u32 neg_mask, const u32 valid_data_mask) { m256 low4bits = set32x8(0xf); - m256 c_lo = vpshufb(lo_mask, and256(data, low4bits)); - m256 c_hi = vpshufb(hi_mask, rshift64_m256(andnot256(low4bits, data), 4)); + m256 c_lo = pshufb_m256(lo_mask, and256(data, low4bits)); + m256 c_hi = pshufb_m256(hi_mask, + rshift64_m256(andnot256(low4bits, data), 4)); m256 t = and256(c_lo, c_hi); u32 nresult = movemask256(eq256(and256(t, and_mask), zeroes256())); #ifdef DEBUG @@ -78,7 +79,7 @@ int validateShuftiMask16x8(const m128 data, const m256 nib_mask, const u32 valid_data_mask) { m256 data_m256 = combine2x128(rshift64_m128(data, 4), data); m256 low4bits = set32x8(0xf); - m256 c_nib = vpshufb(nib_mask, and256(data_m256, low4bits)); + m256 c_nib = pshufb_m256(nib_mask, and256(data_m256, low4bits)); m128 t = and128(movdq_hi(c_nib), movdq_lo(c_nib)); m128 nresult = eq128(and128(t, and_mask), zeroes128()); #ifdef DEBUG @@ -101,8 +102,9 @@ int validateShuftiMask32x8(const m256 data, const m256 hi_mask, const m256 lo_mask, const m256 and_mask, const u32 neg_mask, const u32 valid_data_mask) { m256 low4bits = set32x8(0xf); - m256 c_lo = vpshufb(lo_mask, and256(data, low4bits)); - m256 c_hi = vpshufb(hi_mask, rshift64_m256(andnot256(low4bits, data), 4)); + m256 c_lo = pshufb_m256(lo_mask, and256(data, low4bits)); + m256 c_hi = pshufb_m256(hi_mask, + rshift64_m256(andnot256(low4bits, data), 4)); m256 t = and256(c_lo, c_hi); m256 nresult = eq256(and256(t, and_mask), zeroes256()); #ifdef DEBUG @@ -134,10 +136,10 @@ int validateShuftiMask32x16(const m256 data, m256 low4bits = set32x8(0xf); m256 data_lo = and256(data, low4bits); m256 data_hi = and256(rshift64_m256(data, 4), low4bits); - m256 c_lo_1 = vpshufb(lo_mask_1, data_lo); - m256 c_lo_2 = vpshufb(lo_mask_2, data_lo); - m256 c_hi_1 = vpshufb(hi_mask_1, data_hi); - m256 c_hi_2 = vpshufb(hi_mask_2, data_hi); + m256 c_lo_1 = pshufb_m256(lo_mask_1, data_lo); + m256 c_lo_2 = pshufb_m256(lo_mask_2, data_lo); + m256 c_hi_1 = pshufb_m256(hi_mask_1, data_hi); + m256 c_hi_2 = pshufb_m256(hi_mask_2, data_hi); m256 t1 = and256(c_lo_1, c_hi_1); m256 t2 = and256(c_lo_2, c_hi_2); m256 result = or256(and256(t1, bucket_mask_lo), and256(t2, bucket_mask_hi)); @@ -200,7 +202,7 @@ int validateMultipathShuftiMask16x8(const m128 data, const u32 valid_path_mask) { m256 data_256 = combine2x128(rshift64_m128(data, 4), data); m256 low4bits = set32x8(0xf); - m256 c_nib = vpshufb(nib_mask, and256(data_256, low4bits)); + m256 c_nib = pshufb_m256(nib_mask, and256(data_256, low4bits)); m128 t = and128(movdq_hi(c_nib), movdq_lo(c_nib)); m128 result = and128(t, bucket_select_mask); u32 nresult = movemask128(eq128(result, zeroes128())); @@ -221,8 +223,8 @@ int validateMultipathShuftiMask32x8(const m256 data, m256 low4bits = set32x8(0xf); m256 data_lo = and256(data, low4bits); m256 data_hi = and256(rshift64_m256(data, 4), low4bits); - m256 c_lo = vpshufb(lo_mask, data_lo); - m256 c_hi = vpshufb(hi_mask, data_hi); + m256 c_lo = pshufb_m256(lo_mask, data_lo); + m256 c_hi = pshufb_m256(hi_mask, data_hi); m256 c = and256(c_lo, c_hi); m256 result = and256(c, bucket_select_mask); u32 nresult = movemask256(eq256(result, zeroes256())); @@ -245,10 +247,10 @@ int validateMultipathShuftiMask32x16(const m256 data, m256 low4bits = set32x8(0xf); m256 data_lo = and256(data, low4bits); m256 data_hi = and256(rshift64_m256(data, 4), low4bits); - m256 c_lo_1 = vpshufb(lo_mask_1, data_lo); - m256 c_lo_2 = vpshufb(lo_mask_2, data_lo); - m256 c_hi_1 = vpshufb(hi_mask_1, data_hi); - m256 c_hi_2 = vpshufb(hi_mask_2, data_hi); + m256 c_lo_1 = pshufb_m256(lo_mask_1, data_lo); + m256 c_lo_2 = pshufb_m256(lo_mask_2, data_lo); + m256 c_hi_1 = pshufb_m256(hi_mask_1, data_hi); + m256 c_hi_2 = pshufb_m256(hi_mask_2, data_hi); m256 t1 = and256(c_lo_1, c_hi_1); m256 t2 = and256(c_lo_2, c_hi_2); m256 result = or256(and256(t1, bucket_select_mask_lo), @@ -270,12 +272,12 @@ int validateMultipathShuftiMask64(const m256 data_1, const m256 data_2, const u64a neg_mask, const u64a valid_path_mask) { m256 low4bits = set32x8(0xf); - m256 c_lo_1 = vpshufb(lo_mask, and256(data_1, low4bits)); - m256 c_lo_2 = vpshufb(lo_mask, and256(data_2, low4bits)); - m256 c_hi_1 = vpshufb(hi_mask, - rshift64_m256(andnot256(low4bits, data_1), 4)); - m256 c_hi_2 = vpshufb(hi_mask, - rshift64_m256(andnot256(low4bits, data_2), 4)); + m256 c_lo_1 = pshufb_m256(lo_mask, and256(data_1, low4bits)); + m256 c_lo_2 = pshufb_m256(lo_mask, and256(data_2, low4bits)); + m256 c_hi_1 = pshufb_m256(hi_mask, + rshift64_m256(andnot256(low4bits, data_1), 4)); + m256 c_hi_2 = pshufb_m256(hi_mask, + rshift64_m256(andnot256(low4bits, data_2), 4)); m256 t1 = and256(c_lo_1, c_hi_1); m256 t2 = and256(c_lo_2, c_hi_2); m256 nresult_1 = eq256(and256(t1, bucket_select_mask_1), zeroes256()); diff --git a/src/util/masked_move.h b/src/util/masked_move.h index b51ff632..4c877ca9 100644 --- a/src/util/masked_move.h +++ b/src/util/masked_move.h @@ -70,7 +70,8 @@ masked_move256_len(const u8 *buf, const u32 len) { u32 end = unaligned_load_u32(buf + len - 4); m256 preshufend = _mm256_broadcastq_epi64(_mm_cvtsi32_si128(end)); m256 v = _mm256_maskload_epi32((const int *)buf, lmask); - m256 shufend = vpshufb(preshufend, loadu256(&mm_shuffle_end[len - 4])); + m256 shufend = pshufb_m256(preshufend, + loadu256(&mm_shuffle_end[len - 4])); m256 target = or256(v, shufend); return target; diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index 5f4fe921..6eafe488 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -279,7 +279,7 @@ m128 pshufb(m128 a, m128 b) { } static really_inline -m256 vpshufb(m256 a, m256 b) { +m256 pshufb_m256(m256 a, m256 b) { #if defined(HAVE_AVX2) return _mm256_shuffle_epi8(a, b); #else From f6b688fc064d30d154aabe4a73c3274e639458bd Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Fri, 5 May 2017 10:43:37 +1000 Subject: [PATCH 306/326] rename pshufb to pshufb_m128 --- src/fdr/teddy.c | 17 +++++++++-------- src/nfa/limex_shuffle.h | 2 +- src/nfa/mcsheng.c | 6 +++--- src/nfa/sheng_impl.h | 4 ++-- src/nfa/sheng_impl4.h | 10 +++++----- src/nfa/shufti.c | 16 ++++++++-------- src/nfa/truffle.c | 6 +++--- src/rose/counting_miracle.h | 10 +++++----- src/rose/program_runtime.h | 4 ++-- src/util/simd_utils.h | 8 ++++---- 10 files changed, 42 insertions(+), 41 deletions(-) diff --git a/src/fdr/teddy.c b/src/fdr/teddy.c index e7a0fccd..a3f7cfaf 100644 --- a/src/fdr/teddy.c +++ b/src/fdr/teddy.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -129,7 +129,8 @@ m128 prep_conf_teddy_m1(const m128 *maskBase, m128 val) { m128 mask = set16x8(0xf); m128 lo = and128(val, mask); m128 hi = and128(rshift64_m128(val, 4), mask); - return and128(pshufb(maskBase[0*2], lo), pshufb(maskBase[0*2+1], hi)); + return and128(pshufb_m128(maskBase[0 * 2], lo), + pshufb_m128(maskBase[0 * 2 + 1], hi)); } static really_inline @@ -139,8 +140,8 @@ m128 prep_conf_teddy_m2(const m128 *maskBase, m128 *old_1, m128 val) { m128 hi = and128(rshift64_m128(val, 4), mask); m128 r = prep_conf_teddy_m1(maskBase, val); - m128 res_1 = and128(pshufb(maskBase[1*2], lo), - pshufb(maskBase[1*2+1], hi)); + m128 res_1 = and128(pshufb_m128(maskBase[1*2], lo), + pshufb_m128(maskBase[1*2+1], hi)); m128 res_shifted_1 = palignr(res_1, *old_1, 16-1); *old_1 = res_1; return and128(r, res_shifted_1); @@ -154,8 +155,8 @@ m128 prep_conf_teddy_m3(const m128 *maskBase, m128 *old_1, m128 *old_2, m128 hi = and128(rshift64_m128(val, 4), mask); m128 r = prep_conf_teddy_m2(maskBase, old_1, val); - m128 res_2 = and128(pshufb(maskBase[2*2], lo), - pshufb(maskBase[2*2+1], hi)); + m128 res_2 = and128(pshufb_m128(maskBase[2*2], lo), + pshufb_m128(maskBase[2*2+1], hi)); m128 res_shifted_2 = palignr(res_2, *old_2, 16-2); *old_2 = res_2; return and128(r, res_shifted_2); @@ -169,8 +170,8 @@ m128 prep_conf_teddy_m4(const m128 *maskBase, m128 *old_1, m128 *old_2, m128 hi = and128(rshift64_m128(val, 4), mask); m128 r = prep_conf_teddy_m3(maskBase, old_1, old_2, val); - m128 res_3 = and128(pshufb(maskBase[3*2], lo), - pshufb(maskBase[3*2+1], hi)); + m128 res_3 = and128(pshufb_m128(maskBase[3*2], lo), + pshufb_m128(maskBase[3*2+1], hi)); m128 res_shifted_3 = palignr(res_3, *old_3, 16-3); *old_3 = res_3; return and128(r, res_shifted_3); diff --git a/src/nfa/limex_shuffle.h b/src/nfa/limex_shuffle.h index cedca333..365d4729 100644 --- a/src/nfa/limex_shuffle.h +++ b/src/nfa/limex_shuffle.h @@ -44,7 +44,7 @@ static really_inline u32 packedExtract128(m128 s, const m128 permute, const m128 compare) { - m128 shuffled = pshufb(s, permute); + m128 shuffled = pshufb_m128(s, permute); m128 compared = and128(shuffled, compare); u16 rv = ~movemask128(eq128(compared, shuffled)); return (u32)rv; diff --git a/src/nfa/mcsheng.c b/src/nfa/mcsheng.c index a5ba2151..8130173d 100644 --- a/src/nfa/mcsheng.c +++ b/src/nfa/mcsheng.c @@ -179,7 +179,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end, #define SHENG_SINGLE_ITER do { \ m128 shuffle_mask = masks[*(c++)]; \ - s = pshufb(shuffle_mask, s); \ + s = pshufb_m128(shuffle_mask, s); \ u32 s_gpr_x4 = movd(s); /* convert to u8 */ \ DEBUG_PRINTF("c %hhu (%c) --> s %hhu\n", c[-1], c[-1], s_gpr_x4); \ if (s_gpr_x4 >= sheng_stop_limit_x4) { \ @@ -198,7 +198,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end, u64a cc0 = pdep64(data_bytes, 0xff0); /* extract scaled low byte */ data_bytes &= ~0xffULL; /* clear low bits for scale space */ m128 shuffle_mask0 = load128((const char *)masks + cc0); - s = pshufb(shuffle_mask0, s); + s = pshufb_m128(shuffle_mask0, s); m128 s_max = s; m128 s_max0 = s_max; DEBUG_PRINTF("c %02llx --> s %hhu\n", cc0 >> 4, movd(s)); @@ -208,7 +208,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end, u64a cc##iter = pext64(data_bytes, mcsheng_pext_mask[iter]); \ assert(cc##iter == (u64a)c[iter] << 4); \ m128 shuffle_mask##iter = load128((const char *)masks + cc##iter); \ - s = pshufb(shuffle_mask##iter, s); \ + s = pshufb_m128(shuffle_mask##iter, s); \ if (do_accel && iter == 7) { \ /* in the final iteration we also have to check against accel */ \ m128 s_temp = sadd_u8_m128(s, accel_delta); \ diff --git a/src/nfa/sheng_impl.h b/src/nfa/sheng_impl.h index fc3e54aa..9552fe15 100644 --- a/src/nfa/sheng_impl.h +++ b/src/nfa/sheng_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -58,7 +58,7 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s, while (likely(cur_buf != end)) { const u8 c = *cur_buf; const m128 shuffle_mask = masks[c]; - cur_state = pshufb(shuffle_mask, cur_state); + cur_state = pshufb_m128(shuffle_mask, cur_state); const u8 tmp = movd(cur_state); DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?'); diff --git a/src/nfa/sheng_impl4.h b/src/nfa/sheng_impl4.h index 2561e52d..74032201 100644 --- a/src/nfa/sheng_impl4.h +++ b/src/nfa/sheng_impl4.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -100,19 +100,19 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s, const u8 c4 = *b4; const m128 shuffle_mask1 = masks[c1]; - cur_state = pshufb(shuffle_mask1, cur_state); + cur_state = pshufb_m128(shuffle_mask1, cur_state); const u8 a1 = movd(cur_state); const m128 shuffle_mask2 = masks[c2]; - cur_state = pshufb(shuffle_mask2, cur_state); + cur_state = pshufb_m128(shuffle_mask2, cur_state); const u8 a2 = movd(cur_state); const m128 shuffle_mask3 = masks[c3]; - cur_state = pshufb(shuffle_mask3, cur_state); + cur_state = pshufb_m128(shuffle_mask3, cur_state); const u8 a3 = movd(cur_state); const m128 shuffle_mask4 = masks[c4]; - cur_state = pshufb(shuffle_mask4, cur_state); + cur_state = pshufb_m128(shuffle_mask4, cur_state); const u8 a4 = movd(cur_state); DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?'); diff --git a/src/nfa/shufti.c b/src/nfa/shufti.c index ebe5015d..09ffc0cf 100644 --- a/src/nfa/shufti.c +++ b/src/nfa/shufti.c @@ -114,8 +114,8 @@ DUMP_MSK(128) static really_inline u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits, const m128 compare) { - m128 c_lo = pshufb(mask_lo, GET_LO_4(chars)); - m128 c_hi = pshufb(mask_hi, GET_HI_4(chars)); + m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(chars)); + m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(chars)); m128 t = and128(c_lo, c_hi); #ifdef DEBUG @@ -219,8 +219,8 @@ const u8 *lastMatch(const u8 *buf, m128 t, m128 compare) { static really_inline const u8 *revBlock(m128 mask_lo, m128 mask_hi, m128 chars, const u8 *buf, const m128 low4bits, const m128 zeroes) { - m128 c_lo = pshufb(mask_lo, GET_LO_4(chars)); - m128 c_hi = pshufb(mask_hi, GET_HI_4(chars)); + m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(chars)); + m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(chars)); m128 t = and128(c_lo, c_hi); #ifdef DEBUG @@ -289,8 +289,8 @@ const u8 *fwdBlock2(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 mask2_hi, const m128 ones) { m128 chars_lo = GET_LO_4(chars); m128 chars_hi = GET_HI_4(chars); - m128 c_lo = pshufb(mask1_lo, chars_lo); - m128 c_hi = pshufb(mask1_hi, chars_hi); + m128 c_lo = pshufb_m128(mask1_lo, chars_lo); + m128 c_hi = pshufb_m128(mask1_hi, chars_hi); m128 t = or128(c_lo, c_hi); #ifdef DEBUG @@ -301,8 +301,8 @@ const u8 *fwdBlock2(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 mask2_hi, DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n"); #endif - m128 c2_lo = pshufb(mask2_lo, chars_lo); - m128 c2_hi = pshufb(mask2_hi, chars_hi); + m128 c2_lo = pshufb_m128(mask2_lo, chars_lo); + m128 c2_hi = pshufb_m128(mask2_hi, chars_hi); m128 t2 = or128(t, rshiftbyte_m128(or128(c2_lo, c2_hi), 1)); #ifdef DEBUG diff --git a/src/nfa/truffle.c b/src/nfa/truffle.c index 335edd5b..be6b312c 100644 --- a/src/nfa/truffle.c +++ b/src/nfa/truffle.c @@ -68,11 +68,11 @@ u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) { m128 shuf_mask_hi = _mm_set1_epi64x(0x8040201008040201); // and now do the real work - m128 shuf1 = pshufb(shuf_mask_lo_highclear, v); + m128 shuf1 = pshufb_m128(shuf_mask_lo_highclear, v); m128 t1 = xor128(v, highconst); - m128 shuf2 = pshufb(shuf_mask_lo_highset, t1); + m128 shuf2 = pshufb_m128(shuf_mask_lo_highset, t1); m128 t2 = andnot128(highconst, rshift64_m128(v, 4)); - m128 shuf3 = pshufb(shuf_mask_hi, t2); + m128 shuf3 = pshufb_m128(shuf_mask_hi, t2); m128 tmp = and128(or128(shuf1, shuf2), shuf3); m128 tmp2 = eq128(tmp, zeroes128()); u32 z = movemask128(tmp2); diff --git a/src/rose/counting_miracle.h b/src/rose/counting_miracle.h index 76db5a77..976208b7 100644 --- a/src/rose/counting_miracle.h +++ b/src/rose/counting_miracle.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -98,8 +98,8 @@ u32 roseCountingMiracleScanShufti(m128 mask_lo, m128 mask_hi, u8 poison, for (; d + 16 <= d_end; d_end -= 16) { m128 data = loadu128(d_end - 16); - m128 c_lo = pshufb(mask_lo, GET_LO_4(data)); - m128 c_hi = pshufb(mask_hi, GET_HI_4(data)); + m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(data)); + m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(data)); m128 t = and128(c_lo, c_hi); u32 z1 = movemask128(eq128(t, zeroes)); count += popcount32(z1 ^ 0xffff); @@ -117,8 +117,8 @@ u32 roseCountingMiracleScanShufti(m128 mask_lo, m128 mask_hi, u8 poison, memset(temp, poison, sizeof(temp)); memcpy(temp, d, d_end - d); m128 data = loadu128(temp); - m128 c_lo = pshufb(mask_lo, GET_LO_4(data)); - m128 c_hi = pshufb(mask_hi, GET_HI_4(data)); + m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(data)); + m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(data)); m128 t = and128(c_lo, c_hi); u32 z1 = movemask128(eq128(t, zeroes)); count += popcount32(z1 ^ 0xffff); diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 38700fbd..b140a2bc 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -1274,11 +1274,11 @@ int roseCheckMultipathShufti16x8(const struct hs_scratch *scratch, DEBUG_PRINTF("expand_hi %llx\n", valid_hi); DEBUG_PRINTF("expand_lo %llx\n", valid_lo); expand_valid = set64x2(valid_hi, valid_lo); - valid_path_mask = ~movemask128(pshufb(expand_valid, + valid_path_mask = ~movemask128(pshufb_m128(expand_valid, data_select_mask)); } - m128 data = pshufb(data_init, data_select_mask); + m128 data = pshufb_m128(data_init, data_select_mask); m256 nib_mask = loadu256(ri->nib_mask); m128 bucket_select_mask = loadu128(ri->bucket_select_mask); diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index 6eafe488..b4c0f7c8 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -272,7 +272,7 @@ char testbit128(m128 val, unsigned int n) { #define palignr(r, l, offset) _mm_alignr_epi8(r, l, offset) static really_inline -m128 pshufb(m128 a, m128 b) { +m128 pshufb_m128(m128 a, m128 b) { m128 result; result = _mm_shuffle_epi8(a, b); return result; @@ -284,8 +284,8 @@ m256 pshufb_m256(m256 a, m256 b) { return _mm256_shuffle_epi8(a, b); #else m256 rv; - rv.lo = pshufb(a.lo, b.lo); - rv.hi = pshufb(a.hi, b.hi); + rv.lo = pshufb_m128(a.lo, b.lo); + rv.hi = pshufb_m128(a.hi, b.hi); return rv; #endif } @@ -306,7 +306,7 @@ static really_inline m128 variable_byte_shift_m128(m128 in, s32 amount) { assert(amount >= -16 && amount <= 16); m128 shift_mask = loadu128(vbs_mask_data + 16 - amount); - return pshufb(in, shift_mask); + return pshufb_m128(in, shift_mask); } static really_inline From 055ff7391c247b2a72cd26eef5159da120a2acee Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 10 May 2017 14:24:43 +1000 Subject: [PATCH 307/326] cmake: build shared libs with the PIC objs --- CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 60959cb5..5e0f06b2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1165,21 +1165,21 @@ else (FAT_RUNTIME) if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) # build shared libs add_library(hs_exec_shared_core2 OBJECT ${hs_exec_SRCS}) - list(APPEND RUNTIME_SHLIBS $) + list(APPEND RUNTIME_SHLIBS $) set_target_properties(hs_exec_shared_core2 PROPERTIES COMPILE_FLAGS "-march=core2" POSITION_INDEPENDENT_CODE TRUE RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} core2 ${CMAKE_MODULE_PATH}/keep.syms.in" ) add_library(hs_exec_shared_corei7 OBJECT ${hs_exec_SRCS}) - list(APPEND RUNTIME_SHLIBS $) + list(APPEND RUNTIME_SHLIBS $) set_target_properties(hs_exec_shared_corei7 PROPERTIES COMPILE_FLAGS "-march=corei7" POSITION_INDEPENDENT_CODE TRUE RULE_LAUNCH_COMPILE "${BUILD_WRAPPER} corei7 ${CMAKE_MODULE_PATH}/keep.syms.in" ) add_library(hs_exec_shared_avx2 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) - list(APPEND RUNTIME_SHLIBS $) + list(APPEND RUNTIME_SHLIBS $) set_target_properties(hs_exec_shared_avx2 PROPERTIES COMPILE_FLAGS "-march=core-avx2" POSITION_INDEPENDENT_CODE TRUE @@ -1188,7 +1188,7 @@ else (FAT_RUNTIME) if (BUILD_AVX512) add_library(hs_exec_shared_avx512 OBJECT ${hs_exec_SRCS} ${hs_exec_avx2_SRCS}) - list(APPEND RUNTIME_SHLIBS $) + list(APPEND RUNTIME_SHLIBS $) set_target_properties(hs_exec_shared_avx512 PROPERTIES COMPILE_FLAGS "${SKYLAKE_FLAG}" POSITION_INDEPENDENT_CODE TRUE From 221229f71caff37d7f66964ce164be1562165f5d Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Mon, 10 Apr 2017 13:25:07 +1000 Subject: [PATCH 308/326] gcc7: comments for falling through a switch case GCC 7 adds a warning -Wimplicit-fallthrough to catch when falling through a switch statement without a break. Since we actually want that behaviour sometimes, we can add a comment so the compiler knows we intended the fallthrough. --- src/hwlm/noodle_engine_avx2.c | 14 +++++++------- src/nfa/limex_runtime_impl.h | 9 ++++++++- src/nfa/mcsheng.c | 14 +++++++------- 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/hwlm/noodle_engine_avx2.c b/src/hwlm/noodle_engine_avx2.c index 14d0eab5..a3f46047 100644 --- a/src/hwlm/noodle_engine_avx2.c +++ b/src/hwlm/noodle_engine_avx2.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -117,9 +117,9 @@ hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key, if (l < 4) { u8 *vp = (u8*)&v; switch (l) { - case 3: vp[2] = d[2]; - case 2: vp[1] = d[1]; - case 1: vp[0] = d[0]; + case 3: vp[2] = d[2]; // fallthrough + case 2: vp[1] = d[1]; // fallthrough + case 1: vp[0] = d[0]; // fallthrough } } else { v = masked_move256_len(d, l); @@ -157,9 +157,9 @@ hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key, if (l < 4) { u8 *vp = (u8*)&v; switch (l) { - case 3: vp[2] = d[2]; - case 2: vp[1] = d[1]; - case 1: vp[0] = d[0]; + case 3: vp[2] = d[2]; // fallthrough + case 2: vp[1] = d[1]; // fallthrough + case 1: vp[0] = d[0]; // fallthrough } } else { v = masked_move256_len(d, l); diff --git a/src/nfa/limex_runtime_impl.h b/src/nfa/limex_runtime_impl.h index 2c9647d0..7b89182b 100644 --- a/src/nfa/limex_runtime_impl.h +++ b/src/nfa/limex_runtime_impl.h @@ -173,25 +173,32 @@ size_t RUN_ACCEL_FN(const STATE_T s, UNUSED const STATE_T accelMask, switch (limex_m->shiftCount) { \ case 8: \ succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 7)); \ + /* fallthrough */ \ case 7: \ succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 6)); \ + /* fallthrough */ \ case 6: \ succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 5)); \ + /* fallthrough */ \ case 5: \ succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 4)); \ + /* fallthrough */ \ case 4: \ succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 3)); \ + /* fallthrough */ \ case 3: \ succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 2)); \ + /* fallthrough */ \ case 2: \ succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 1)); \ + /* fallthrough */ \ case 1: \ + /* fallthrough */ \ case 0: \ ; \ } \ } while (0) - /** * \brief LimEx NFAS inner loop without accel. * diff --git a/src/nfa/mcsheng.c b/src/nfa/mcsheng.c index 8130173d..9722fd67 100644 --- a/src/nfa/mcsheng.c +++ b/src/nfa/mcsheng.c @@ -288,19 +288,19 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end, assert(soft_c_end - c < SHENG_CHUNK); switch (soft_c_end - c) { case 7: - SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; // fallthrough case 6: - SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; // fallthrough case 5: - SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; // fallthrough case 4: - SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; // fallthrough case 3: - SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; // fallthrough case 2: - SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; // fallthrough case 1: - SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; // fallthrough } } From 4976f019f4bac1960b3fb14c2a4f0005d78ac0ba Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 15 May 2017 14:40:11 +1000 Subject: [PATCH 309/326] rose_build_engine_blob: rose unused data() func We should no longer be reading out of the engine blob. --- src/rose/rose_build_engine_blob.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/rose/rose_build_engine_blob.h b/src/rose/rose_build_engine_blob.h index a22f2dff..3aa501b4 100644 --- a/src/rose/rose_build_engine_blob.h +++ b/src/rose/rose_build_engine_blob.h @@ -74,10 +74,6 @@ public: return blob.size(); } - const char *data() const { - return blob.data(); - } - u32 add(const void *a, const size_t len, const size_t align) { pad(align); From b0a5bd8940c1ee3d2779d1bdb50f106bbe9a3219 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Fri, 12 May 2017 11:29:58 +1000 Subject: [PATCH 310/326] test for pthread_setaffinity_np Only enable setting threads per core when available --- cmake/config.h.in | 2 ++ tools/hsbench/CMakeLists.txt | 12 ++++++++++++ tools/hsbench/main.cpp | 15 ++++++++++++++- 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/cmake/config.h.in b/cmake/config.h.in index 5434668e..62029cb9 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -46,6 +46,8 @@ 0 if you don't. */ #cmakedefine HAVE_DECL_PTHREAD_SETAFFINITY_NP +#cmakedefine HAVE_PTHREAD_NP_H + /* Define to 1 if you have the `malloc_info' function. */ #cmakedefine HAVE_MALLOC_INFO diff --git a/tools/hsbench/CMakeLists.txt b/tools/hsbench/CMakeLists.txt index 3b9a73f7..8f718ee3 100644 --- a/tools/hsbench/CMakeLists.txt +++ b/tools/hsbench/CMakeLists.txt @@ -11,6 +11,18 @@ else() set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -isystem ${SQLITE3_INCLUDE_DIRS}") endif() +# BSD has the _np funcs in a _np header +CHECK_INCLUDE_FILE(pthread_np.h HAVE_PTHREAD_NP_H) +if (HAVE_PTHREAD_NP_H) + set (PTHREAD_NP_INC pthread_np.h) +else () + set (PTHREAD_NP_INC pthread.h) +endif () + +set (CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -D_GNU_SOURCE") +set (CMAKE_REQUIRED_LIBRARIES pthread) +CHECK_CXX_SYMBOL_EXISTS(pthread_setaffinity_np ${PTHREAD_NP_INC} HAVE_DECL_PTHREAD_SETAFFINITY_NP) + CHECK_FUNCTION_EXISTS(malloc_info HAVE_MALLOC_INFO) CHECK_FUNCTION_EXISTS(shmget HAVE_SHMGET) set(HAVE_SHMGET ${HAVE_SHMGET} CACHE BOOL "shmget()") diff --git a/tools/hsbench/main.cpp b/tools/hsbench/main.cpp index b5506af3..3153737e 100644 --- a/tools/hsbench/main.cpp +++ b/tools/hsbench/main.cpp @@ -56,6 +56,9 @@ #include #ifndef _WIN32 #include +#if defined(HAVE_PTHREAD_NP_H) +#include +#endif #include #endif @@ -122,7 +125,11 @@ public: // Apply processor affinity (if available) to this thread. bool affine(UNUSED int cpu) { #ifdef HAVE_DECL_PTHREAD_SETAFFINITY_NP +#if defined(__linux__) cpu_set_t cpuset; +#else // BSD + cpuset_t cpuset; +#endif CPU_ZERO(&cpuset); assert(cpu >= 0 && cpu < CPU_SETSIZE); @@ -166,7 +173,9 @@ void usage(const char *error) { " (default: streaming).\n"); printf(" -V Benchmark in vectored mode" " (default: streaming).\n"); +#ifdef HAVE_DECL_PTHREAD_SETAFFINITY_NP printf(" -T CPU,CPU,... Benchmark with threads on these CPUs.\n"); +#endif printf(" -i DIR Don't compile, load from files in DIR" " instead.\n"); printf(" -w DIR After compiling, save to files in DIR.\n"); @@ -195,7 +204,11 @@ struct BenchmarkSigs { static void processArgs(int argc, char *argv[], vector &sigSets, UNUSED unique_ptr &grey) { - const char options[] = "-b:c:Cd:e:E:G:hi:n:No:p:sT:Vw:z:"; + const char options[] = "-b:c:Cd:e:E:G:hi:n:No:p:sVw:z:" +#if HAVE_DECL_PTHREAD_SETAFFINITY_N + "T:" // add the thread flag +#endif + ; int in_sigfile = 0; int do_per_scan = 0; int do_echo_matches = 0; From 60e3769664cf868e8f1a2cadf348e8101fdcbc51 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 16 May 2017 08:52:11 +1000 Subject: [PATCH 311/326] cmake: use the correct command --- tools/hsbench/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/hsbench/CMakeLists.txt b/tools/hsbench/CMakeLists.txt index 8f718ee3..9b2cde4d 100644 --- a/tools/hsbench/CMakeLists.txt +++ b/tools/hsbench/CMakeLists.txt @@ -12,7 +12,7 @@ else() endif() # BSD has the _np funcs in a _np header -CHECK_INCLUDE_FILE(pthread_np.h HAVE_PTHREAD_NP_H) +CHECK_INCLUDE_FILE_CXX(pthread_np.h HAVE_PTHREAD_NP_H) if (HAVE_PTHREAD_NP_H) set (PTHREAD_NP_INC pthread_np.h) else () From 0275869b3e73dda1f4a18e89233f99e7f4122e09 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 16 May 2017 10:37:19 +1000 Subject: [PATCH 312/326] Use our intrin header --- src/hwlm/noodle_engine.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hwlm/noodle_engine.c b/src/hwlm/noodle_engine.c index 24f78c8e..9758f42b 100644 --- a/src/hwlm/noodle_engine.c +++ b/src/hwlm/noodle_engine.c @@ -36,6 +36,7 @@ #include "util/arch.h" #include "util/bitutils.h" #include "util/compare.h" +#include "util/intrinsics.h" #include "util/join.h" #include "util/masked_move.h" #include "util/simd_utils.h" @@ -43,7 +44,6 @@ #include #include #include -#include /** \brief Noodle runtime context. */ struct cb_info { From 3e345c256770c229cf3cba66d07c350497566d3c Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Tue, 16 May 2017 11:05:53 +1000 Subject: [PATCH 313/326] If we can shift by an immediate, do it. Otherwise, don't. --- CMakeLists.txt | 1 + cmake/config.h.in | 3 ++ src/util/simd_utils.h | 40 ++++++++++++++++++++++---- unit/internal/simd_utils.cpp | 54 ++++++++++++++++++++++++++++++++++++ 4 files changed, 92 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5e0f06b2..cfb1325c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -313,6 +313,7 @@ endif () # testing a builtin takes a little more work CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED) CHECK_CXX_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CXX_BUILTIN_ASSUME_ALIGNED) +CHECK_C_SOURCE_COMPILES("int main(void) { __builtin_constant_p(0); }" HAVE__BUILTIN_CONSTANT_P) if (NOT WIN32) set(C_FLAGS_TO_CHECK diff --git a/cmake/config.h.in b/cmake/config.h.in index 62029cb9..9c250b4c 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -81,6 +81,9 @@ /* Define to 1 if you have the `_aligned_malloc' function. */ #cmakedefine HAVE__ALIGNED_MALLOC +/* Define if compiler has __builtin_constant_p */ +#cmakedefine HAVE__BUILTIN_CONSTANT_P + /* Optimize, inline critical functions */ #cmakedefine HS_OPTIMIZE diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h index b4c0f7c8..047cdbab 100644 --- a/src/util/simd_utils.h +++ b/src/util/simd_utils.h @@ -123,7 +123,17 @@ static really_inline u32 diffrich64_128(m128 a, m128 b) { #endif } -#define lshift64_m128(a, b) _mm_slli_epi64((a), (b)) +static really_really_inline +m128 lshift64_m128(m128 a, unsigned b) { +#if defined(HAVE__BUILTIN_CONSTANT_P) + if (__builtin_constant_p(b)) { + return _mm_slli_epi64(a, b); + } +#endif + m128 x = _mm_cvtsi32_si128(b); + return _mm_sll_epi64(a, x); +} + #define rshift64_m128(a, b) _mm_srli_epi64((a), (b)) #define eq128(a, b) _mm_cmpeq_epi8((a), (b)) #define movemask128(a) ((u32)_mm_movemask_epi8((a))) @@ -339,7 +349,18 @@ m128 set64x2(u64a hi, u64a lo) { ****/ #if defined(HAVE_AVX2) -#define lshift64_m256(a, b) _mm256_slli_epi64((a), (b)) + +static really_really_inline +m256 lshift64_m256(m256 a, unsigned b) { +#if defined(HAVE__BUILTIN_CONSTANT_P) + if (__builtin_constant_p(b)) { + return _mm256_slli_epi64(a, b); + } +#endif + m128 x = _mm_cvtsi32_si128(b); + return _mm256_sll_epi64(a, x); +} + #define rshift64_m256(a, b) _mm256_srli_epi64((a), (b)) static really_inline @@ -357,7 +378,7 @@ m256 set2x128(m128 a) { #else -static really_inline +static really_really_inline m256 lshift64_m256(m256 a, int b) { m256 rv = a; rv.lo = lshift64_m128(rv.lo, b); @@ -776,7 +797,6 @@ static really_inline m384 andnot384(m384 a, m384 b) { return rv; } -// The shift amount is an immediate static really_really_inline m384 lshift64_m384(m384 a, unsigned b) { m384 rv; @@ -1016,9 +1036,17 @@ m512 andnot512(m512 a, m512 b) { } #if defined(HAVE_AVX512) -#define lshift64_m512(a, b) _mm512_slli_epi64((a), b) +static really_really_inline +m512 lshift64_m512(m512 a, unsigned b) { +#if defined(HAVE__BUILTIN_CONSTANT_P) + if (__builtin_constant_p(b)) { + return _mm512_slli_epi64(a, b); + } +#endif + m128 x = _mm_cvtsi32_si128(b); + return _mm512_sll_epi64(a, x); +} #else -// The shift amount is an immediate static really_really_inline m512 lshift64_m512(m512 a, unsigned b) { m512 rv; diff --git a/unit/internal/simd_utils.cpp b/unit/internal/simd_utils.cpp index 0d3926d6..d3e34f52 100644 --- a/unit/internal/simd_utils.cpp +++ b/unit/internal/simd_utils.cpp @@ -143,6 +143,10 @@ void simd_loadbytes(m128 *a, const void *ptr, unsigned i) { *a = loadbytes128(pt void simd_loadbytes(m256 *a, const void *ptr, unsigned i) { *a = loadbytes256(ptr, i); } void simd_loadbytes(m384 *a, const void *ptr, unsigned i) { *a = loadbytes384(ptr, i); } void simd_loadbytes(m512 *a, const void *ptr, unsigned i) { *a = loadbytes512(ptr, i); } +m128 simd_lshift64(const m128 &a, unsigned i) { return lshift64_m128(a, i); } +m256 simd_lshift64(const m256 &a, unsigned i) { return lshift64_m256(a, i); } +m384 simd_lshift64(const m384 &a, unsigned i) { return lshift64_m384(a, i); } +m512 simd_lshift64(const m512 &a, unsigned i) { return lshift64_m512(a, i); } template class SimdUtilsTest : public testing::Test { @@ -586,6 +590,56 @@ TYPED_TEST(SimdUtilsTest, loadbytes_storebytes) { } } +TYPED_TEST(SimdUtilsTest, lshift64) { + TypeParam a; + memset(&a, 0x5a, sizeof(a)); + + static constexpr u64a exp_val = 0x5a5a5a5a5a5a5a5aULL; + + union { + TypeParam simd; + u64a qword[sizeof(TypeParam) / 8]; + } c; + cout << "non-const for size " << sizeof(a) << '\n'; + for (unsigned s = 0; s < 64; s++) { + c.simd = simd_lshift64(a, s); + + const u64a expected = exp_val << s; + for (size_t i = 0; i < sizeof(c) / 8; i++) { + EXPECT_EQ(expected, c.qword[i]); + } + } + + // test immediates + u64a expected; + + cout << "imm for size " << sizeof(a) << '\n'; + c.simd = simd_lshift64(a, 1); + expected = exp_val << 1; + for (size_t i = 0; i < sizeof(c) / 8; i++) { + EXPECT_EQ(expected, c.qword[i]); + } + + c.simd = simd_lshift64(a, 2); + expected = exp_val << 2; + for (size_t i = 0; i < sizeof(c) / 8; i++) { + EXPECT_EQ(expected, c.qword[i]); + } + + c.simd = simd_lshift64(a, 7); + expected = exp_val << 7; + for (size_t i = 0; i < sizeof(c) / 8; i++) { + EXPECT_EQ(expected, c.qword[i]); + } + + c.simd = simd_lshift64(a, 31); + expected = exp_val << 31; + for (size_t i = 0; i < sizeof(c) / 8; i++) { + EXPECT_EQ(expected, c.qword[i]); + } +} + + TEST(SimdUtilsTest, alignment) { ASSERT_EQ(16, alignof(m128)); ASSERT_EQ(32, alignof(m256)); From fb3a03dc6924525bc2cd20c13db31c47127c09b6 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 17 May 2017 13:48:13 +1000 Subject: [PATCH 314/326] Disable part of unit test on FreeBSD 10 --- unit/internal/simd_utils.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/unit/internal/simd_utils.cpp b/unit/internal/simd_utils.cpp index d3e34f52..623c2c99 100644 --- a/unit/internal/simd_utils.cpp +++ b/unit/internal/simd_utils.cpp @@ -600,7 +600,7 @@ TYPED_TEST(SimdUtilsTest, lshift64) { TypeParam simd; u64a qword[sizeof(TypeParam) / 8]; } c; - cout << "non-const for size " << sizeof(a) << '\n'; + for (unsigned s = 0; s < 64; s++) { c.simd = simd_lshift64(a, s); @@ -610,10 +610,12 @@ TYPED_TEST(SimdUtilsTest, lshift64) { } } + /* Clang 3.4 on FreeBSD 10 crashes on the following - disable for now */ +#if !(defined(__FreeBSD__) && defined(__clang__) && __clang_major__ == 3) + // test immediates u64a expected; - cout << "imm for size " << sizeof(a) << '\n'; c.simd = simd_lshift64(a, 1); expected = exp_val << 1; for (size_t i = 0; i < sizeof(c) / 8; i++) { @@ -637,9 +639,9 @@ TYPED_TEST(SimdUtilsTest, lshift64) { for (size_t i = 0; i < sizeof(c) / 8; i++) { EXPECT_EQ(expected, c.qword[i]); } +#endif } - TEST(SimdUtilsTest, alignment) { ASSERT_EQ(16, alignof(m128)); ASSERT_EQ(32, alignof(m256)); From cfdac664042c17e79c7a1de0a04906b9f9b59cc0 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Wed, 17 May 2017 14:43:15 +1000 Subject: [PATCH 315/326] accel: limit how far we will search in findBest --- src/nfagraph/ng_limex_accel.cpp | 41 ++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/src/nfagraph/ng_limex_accel.cpp b/src/nfagraph/ng_limex_accel.cpp index beeb4a69..80e08a7f 100644 --- a/src/nfagraph/ng_limex_accel.cpp +++ b/src/nfagraph/ng_limex_accel.cpp @@ -215,16 +215,30 @@ struct SAccelScheme { }; } +/** + * \brief Limit on the number of (recursive) calls to findBestInternal(). + */ +static constexpr size_t MAX_FINDBEST_CALLS = 1000000; + static -void findBest(vector >::const_iterator pb, - vector >::const_iterator pe, - const SAccelScheme &curr, SAccelScheme *best) { +void findBestInternal(vector>::const_iterator pb, + vector>::const_iterator pe, + size_t *num_calls, const SAccelScheme &curr, + SAccelScheme *best) { assert(curr.offset <= MAX_ACCEL_DEPTH); + + if (++(*num_calls) > MAX_FINDBEST_CALLS) { + DEBUG_PRINTF("hit num_calls limit %zu\n", *num_calls); + return; + } + DEBUG_PRINTF("paths left %zu\n", pe - pb); if (pb == pe) { if (curr < *best) { - DEBUG_PRINTF("new best\n"); *best = curr; + DEBUG_PRINTF("new best: count=%zu, class=%s, offset=%u\n", + best->cr.count(), describeClass(best->cr).c_str(), + best->offset); } return; } @@ -262,7 +276,7 @@ void findBest(vector >::const_iterator pb, DEBUG_PRINTF("worse\n"); continue; } - findBest(pb + 1, pe, in, best); + findBestInternal(pb + 1, pe, num_calls, in, best); if (curr.cr == best->cr) { return; /* could only get better by offset */ @@ -270,6 +284,19 @@ void findBest(vector >::const_iterator pb, } } +static +SAccelScheme findBest(const vector> &paths, + const CharReach &terminating) { + SAccelScheme curr(terminating, 0U); + SAccelScheme best; + size_t num_calls = 0; + findBestInternal(paths.begin(), paths.end(), &num_calls, curr, &best); + DEBUG_PRINTF("findBest completed, num_calls=%zu\n", num_calls); + DEBUG_PRINTF("selected scheme: count=%zu, class=%s, offset=%u\n", + best.cr.count(), describeClass(best.cr).c_str(), best.offset); + return best; +} + namespace { struct DAccelScheme { DAccelScheme(CharReach cr_in, u32 offset_in) @@ -557,9 +584,7 @@ AccelScheme findBestAccelScheme(vector> paths, /* if we were smart we would do something netflowy on the paths to find the * best cut. But we aren't, so we will just brute force it. */ - SAccelScheme curr(terminating, 0U); - SAccelScheme best; - findBest(paths.begin(), paths.end(), curr, &best); + SAccelScheme best = findBest(paths, terminating); /* find best is a bit lazy in terms of minimising the offset, see if we can * make it better. need to find the min max offset that we need.*/ From a6f439495914a803eb2f4a51e4e01e40f47757fe Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 23 Nov 2016 16:43:34 +1100 Subject: [PATCH 316/326] cmake: scope fun --- cmake/backtrace.cmake | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cmake/backtrace.cmake b/cmake/backtrace.cmake index b8ad79f6..5a446e89 100644 --- a/cmake/backtrace.cmake +++ b/cmake/backtrace.cmake @@ -45,10 +45,12 @@ if(HAVE_BACKTRACE) if(HAS_RDYNAMIC) list(INSERT BACKTRACE_LDFLAGS 0 -rdynamic) endif() - # cmake scope fun - set(HAVE_BACKTRACE ${HAVE_BACKTRACE} PARENT_SCOPE) else() set(BACKTRACE_CFLAGS "") set(BACKTRACE_LDFLAGS "") endif() +# cmake scope fun +set(HAVE_BACKTRACE ${HAVE_BACKTRACE} CACHE BOOL INTERNAL) +set(BACKTRACE_CFLAGS ${BACKTRACE_CFLAGS} CACHE STRING INTERNAL) +set(BACKTRACE_LDFLAGS ${BACKTRACE_LDFLAGS} CACHE STRING INTERNAL) From 73765f1f84bf4bf22bb18d7df24aaca55bc66cf2 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 7 Jun 2017 11:16:30 +1000 Subject: [PATCH 317/326] cmake: Improve the microarch checks --- CMakeLists.txt | 15 +++++---------- cmake/arch.cmake | 45 ++++++++++++++++++++++++++++++++++++++------- 2 files changed, 43 insertions(+), 17 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cfb1325c..670de51b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -253,6 +253,11 @@ else() set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-abi") endif () + if (CMAKE_C_COMPILER_ID MATCHES "Intel") + set(SKYLAKE_FLAG "-xCORE-AVX512") + else () + set(SKYLAKE_FLAG "-march=skylake-avx512") + endif () endif() CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H) @@ -306,10 +311,6 @@ endif () include (${CMAKE_MODULE_PATH}/arch.cmake) -if (NOT FAT_RUNTIME AND NOT HAVE_SSSE3) - message(FATAL_ERROR "A minimum of SSSE3 compiler support is required") -endif () - # testing a builtin takes a little more work CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED) CHECK_CXX_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CXX_BUILTIN_ASSUME_ALIGNED) @@ -460,12 +461,6 @@ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") endif() -if(CMAKE_C_COMPILER_ID MATCHES "Intel") - set(SKYLAKE_FLAG "-xCORE-AVX512") -else() - set(SKYLAKE_FLAG "-march=skylake-avx512") -endif() - if(NOT WIN32) set(RAGEL_C_FLAGS "-Wno-unused") endif() diff --git a/cmake/arch.cmake b/cmake/arch.cmake index 69902f57..0519b2e5 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -10,8 +10,24 @@ else () message (FATAL_ERROR "No intrinsics header found") endif () +if (BUILD_AVX512) + CHECK_C_COMPILER_FLAG(${SKYLAKE_FLAG} HAS_ARCH_SKYLAKE) + if (NOT HAS_ARCH_SKYLAKE) + message (FATAL_ERROR "AVX512 not supported by compiler") + endif () +endif () -set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${ARCH_C_FLAGS}") +if (FAT_RUNTIME) + # test the highest level microarch to make sure everything works + if (BUILD_AVX512) + set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${SKYLAKE_FLAG}") + else () + set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-avx2") + endif () +else (NOT FAT_RUNTIME) + # if not fat runtime, then test given cflags + set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${ARCH_C_FLAGS}") +endif () # ensure we have the minimum of SSSE3 - call a SSSE3 intrinsic CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}> @@ -31,10 +47,6 @@ int main(){ (void)_mm256_xor_si256(z, z); }" HAVE_AVX2) -if (NOT HAVE_AVX2) - message(STATUS "Building without AVX2 support") -endif () - # and now for AVX512 CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}> #if !defined(__AVX512BW__) @@ -46,8 +58,27 @@ int main(){ (void)_mm512_abs_epi8(z); }" HAVE_AVX512) -if (NOT HAVE_AVX512) - message(STATUS "Building without AVX512 support") +if (FAT_RUNTIME) + if (NOT HAVE_SSSE3) + message(FATAL_ERROR "SSSE3 support required to build fat runtime") + endif () + if (NOT HAVE_AVX2) + message(FATAL_ERROR "AVX2 support required to build fat runtime") + endif () + if (BUILD_AVX512 AND NOT HAVE_AVX512) + message(FATAL_ERROR "AVX512 support requested but not supported") + endif () +else (NOT FAT_RUNTIME) + if (NOT HAVE_AVX2) + message(STATUS "Building without AVX2 support") + endif () + if (NOT HAVE_AVX512) + message(STATUS "Building without AVX512 support") + endif () +else (NOT FAT_RUNTIME) + if (NOT HAVE_SSSE3) + message(FATAL_ERROR "A minimum of SSSE3 compiler support is required") + endif () endif () unset (CMAKE_REQUIRED_FLAGS) From dba2470ec944c9add386f0765c38bc64084c1904 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Fri, 10 Mar 2017 15:48:38 +1100 Subject: [PATCH 318/326] msvc: use the vectorcall calling convention This requires declaring external interfaces with the cdecl calling convention. --- CMakeLists.txt | 21 ++++++++--- src/alloc.c | 16 +++++--- src/database.c | 29 ++++++++------- src/hs.cpp | 52 ++++++++++++++------------ src/hs_common.h | 64 +++++++++++++++++++------------- src/hs_compile.h | 41 +++++++++++--------- src/hs_runtime.h | 66 ++++++++++++++++++--------------- src/hs_valid_platform.c | 4 +- src/hs_version.c | 4 +- src/runtime.c | 58 +++++++++++++++++------------ src/scratch.c | 12 +++--- unit/CMakeLists.txt | 66 +++++++++++++++++---------------- unit/internal/main.cpp | 5 ++- unit/internal/utf8_validate.cpp | 6 +-- util/CMakeLists.txt | 2 +- util/ExpressionParser.h | 10 +++-- util/ExpressionParser.rl | 6 +-- util/expressions.cpp | 8 ++-- util/expressions.h | 8 ++-- 19 files changed, 271 insertions(+), 207 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 670de51b..330b8650 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -135,16 +135,21 @@ option(WINDOWS_ICC "Use Intel C++ Compiler on Windows, default off, requires ICC # TODO: windows generator on cmake always uses msvc, even if we plan to build with icc if(MSVC OR MSVC_IDE) message(STATUS "Building for Windows") + if (MSVC_VERSION LESS 1700) message(FATAL_ERROR "The project requires C++11 features.") else() if (WINDOWS_ICC) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O3 /Qstd=c99 /Qrestrict /QxHost /wd4267 /Qdiag-disable:remark") + set(ARCH_C_FLAGS "/QxHost") + set(ARCH_CXX_FLAGS "/QxHost") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O3 /Qstd=c99 /Qrestrict /wd4267 /Qdiag-disable:remark") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /Qstd=c++11 /Qrestrict /QxHost /wd4267 /wd4800 /Qdiag-disable:remark -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS") else() - #TODO: don't hardcode arch - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 /arch:AVX /wd4267") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /arch:AVX /wd4244 /wd4267 /wd4800 -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS") + # todo: change these as required + set(ARCH_C_FLAGS "/arch:AVX2") + set(ARCH_CXX_FLAGS "/arch:AVX2") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 /wd4244 /wd4267") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /wd4244 /wd4267 /wd4800 -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS") endif() string(REPLACE "/RTC1" "" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") string(REPLACE "/RTC1" "" CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}") @@ -153,6 +158,10 @@ if(MSVC OR MSVC_IDE) set(CMAKE_C_FLAGS_DEBUG "/DNDEBUG ${CMAKE_C_FLAGS_DEBUG}") set(CMAKE_CXX_FLAGS_DEBUG "/DNDEBUG ${CMAKE_CXX_FLAGS_DEBUG}") endif () + + # flags only used to build hs libs + set(HS_C_FLAGS "/Gv") + set(HS_CXX_FLAGS "/Gv") endif() else() @@ -454,8 +463,8 @@ endif() # only set these after all tests are done if (NOT FAT_RUNTIME) -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${HS_C_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS} ${HS_CXX_FLAGS}") else() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") diff --git a/src/alloc.c b/src/alloc.c index aa7638e7..e27649bc 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -67,7 +67,7 @@ hs_free_t normalise_free(hs_free_t f) { } HS_PUBLIC_API -hs_error_t hs_set_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) { +hs_error_t HS_CDECL hs_set_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) { hs_set_database_allocator(allocfunc, freefunc); hs_set_misc_allocator(allocfunc, freefunc); hs_set_stream_allocator(allocfunc, freefunc); @@ -77,7 +77,8 @@ hs_error_t hs_set_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) { } HS_PUBLIC_API -hs_error_t hs_set_database_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) { +hs_error_t HS_CDECL hs_set_database_allocator(hs_alloc_t allocfunc, + hs_free_t freefunc) { hs_database_alloc = normalise_alloc(allocfunc); hs_database_free = normalise_free(freefunc); @@ -85,7 +86,8 @@ hs_error_t hs_set_database_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) { } HS_PUBLIC_API -hs_error_t hs_set_misc_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) { +hs_error_t HS_CDECL hs_set_misc_allocator(hs_alloc_t allocfunc, + hs_free_t freefunc) { hs_misc_alloc = normalise_alloc(allocfunc); hs_misc_free = normalise_free(freefunc); @@ -93,7 +95,8 @@ hs_error_t hs_set_misc_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) { } HS_PUBLIC_API -hs_error_t hs_set_scratch_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) { +hs_error_t HS_CDECL hs_set_scratch_allocator(hs_alloc_t allocfunc, + hs_free_t freefunc) { hs_scratch_alloc = normalise_alloc(allocfunc); hs_scratch_free = normalise_free(freefunc); @@ -101,7 +104,8 @@ hs_error_t hs_set_scratch_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) { } HS_PUBLIC_API -hs_error_t hs_set_stream_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) { +hs_error_t HS_CDECL hs_set_stream_allocator(hs_alloc_t allocfunc, + hs_free_t freefunc) { hs_stream_alloc = normalise_alloc(allocfunc); hs_stream_free = normalise_free(freefunc); diff --git a/src/database.c b/src/database.c index c6878d89..dc03bf1f 100644 --- a/src/database.c +++ b/src/database.c @@ -49,7 +49,7 @@ int db_correctly_aligned(const void *db) { } HS_PUBLIC_API -hs_error_t hs_free_database(hs_database_t *db) { +hs_error_t HS_CDECL hs_free_database(hs_database_t *db) { if (db && db->magic != HS_DB_MAGIC) { return HS_INVALID; } @@ -59,8 +59,8 @@ hs_error_t hs_free_database(hs_database_t *db) { } HS_PUBLIC_API -hs_error_t hs_serialize_database(const hs_database_t *db, char **bytes, - size_t *serialized_length) { +hs_error_t HS_CDECL hs_serialize_database(const hs_database_t *db, char **bytes, + size_t *serialized_length) { if (!db || !bytes || !serialized_length) { return HS_INVALID; } @@ -196,8 +196,9 @@ void db_copy_bytecode(const char *serialized, hs_database_t *db) { } HS_PUBLIC_API -hs_error_t hs_deserialize_database_at(const char *bytes, const size_t length, - hs_database_t *db) { +hs_error_t HS_CDECL hs_deserialize_database_at(const char *bytes, + const size_t length, + hs_database_t *db) { if (!bytes || !db) { return HS_INVALID; } @@ -238,8 +239,9 @@ hs_error_t hs_deserialize_database_at(const char *bytes, const size_t length, } HS_PUBLIC_API -hs_error_t hs_deserialize_database(const char *bytes, const size_t length, - hs_database_t **db) { +hs_error_t HS_CDECL hs_deserialize_database(const char *bytes, + const size_t length, + hs_database_t **db) { if (!bytes || !db) { return HS_INVALID; } @@ -287,7 +289,7 @@ hs_error_t hs_deserialize_database(const char *bytes, const size_t length, } HS_PUBLIC_API -hs_error_t hs_database_size(const hs_database_t *db, size_t *size) { +hs_error_t HS_CDECL hs_database_size(const hs_database_t *db, size_t *size) { if (!size) { return HS_INVALID; } @@ -302,8 +304,9 @@ hs_error_t hs_database_size(const hs_database_t *db, size_t *size) { } HS_PUBLIC_API -hs_error_t hs_serialized_database_size(const char *bytes, const size_t length, - size_t *size) { +hs_error_t HS_CDECL hs_serialized_database_size(const char *bytes, + const size_t length, + size_t *size) { // Decode and check the header hs_database_t header; hs_error_t ret = db_decode_header(&bytes, length, &header); @@ -417,8 +420,8 @@ hs_error_t print_database_string(char **s, u32 version, const platform_t plat, } HS_PUBLIC_API -hs_error_t hs_serialized_database_info(const char *bytes, size_t length, - char **info) { +hs_error_t HS_CDECL hs_serialized_database_info(const char *bytes, + size_t length, char **info) { if (!info) { return HS_INVALID; } @@ -437,7 +440,7 @@ hs_error_t hs_serialized_database_info(const char *bytes, size_t length, } HS_PUBLIC_API -hs_error_t hs_database_info(const hs_database_t *db, char **info) { +hs_error_t HS_CDECL hs_database_info(const hs_database_t *db, char **info) { if (!info) { return HS_INVALID; } diff --git a/src/hs.cpp b/src/hs.cpp index af1c3c6a..b93a56ee 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -279,9 +279,10 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags, } // namespace ue2 extern "C" HS_PUBLIC_API -hs_error_t hs_compile(const char *expression, unsigned flags, unsigned mode, - const hs_platform_info_t *platform, hs_database_t **db, - hs_compile_error_t **error) { +hs_error_t HS_CDECL hs_compile(const char *expression, unsigned flags, + unsigned mode, + const hs_platform_info_t *platform, + hs_database_t **db, hs_compile_error_t **error) { if (expression == nullptr) { *db = nullptr; *error = generateCompileError("Invalid parameter: expression is NULL", @@ -297,24 +298,25 @@ hs_error_t hs_compile(const char *expression, unsigned flags, unsigned mode, } extern "C" HS_PUBLIC_API -hs_error_t hs_compile_multi(const char * const *expressions, - const unsigned *flags, const unsigned *ids, - unsigned elements, unsigned mode, - const hs_platform_info_t *platform, - hs_database_t **db, hs_compile_error_t **error) { +hs_error_t HS_CDECL hs_compile_multi(const char *const *expressions, + const unsigned *flags, const unsigned *ids, + unsigned elements, unsigned mode, + const hs_platform_info_t *platform, + hs_database_t **db, + hs_compile_error_t **error) { const hs_expr_ext * const *ext = nullptr; // unused for this call. return hs_compile_multi_int(expressions, flags, ids, ext, elements, mode, platform, db, error, Grey()); } extern "C" HS_PUBLIC_API -hs_error_t hs_compile_ext_multi(const char * const *expressions, - const unsigned *flags, const unsigned *ids, - const hs_expr_ext * const *ext, - unsigned elements, unsigned mode, - const hs_platform_info_t *platform, - hs_database_t **db, - hs_compile_error_t **error) { +hs_error_t HS_CDECL hs_compile_ext_multi(const char * const *expressions, + const unsigned *flags, const unsigned *ids, + const hs_expr_ext * const *ext, + unsigned elements, unsigned mode, + const hs_platform_info_t *platform, + hs_database_t **db, + hs_compile_error_t **error) { return hs_compile_multi_int(expressions, flags, ids, ext, elements, mode, platform, db, error, Grey()); } @@ -419,24 +421,26 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, } extern "C" HS_PUBLIC_API -hs_error_t hs_expression_info(const char *expression, unsigned int flags, - hs_expr_info_t **info, - hs_compile_error_t **error) { +hs_error_t HS_CDECL hs_expression_info(const char *expression, + unsigned int flags, + hs_expr_info_t **info, + hs_compile_error_t **error) { return hs_expression_info_int(expression, flags, nullptr, HS_MODE_BLOCK, info, error); } extern "C" HS_PUBLIC_API -hs_error_t hs_expression_ext_info(const char *expression, unsigned int flags, - const hs_expr_ext_t *ext, - hs_expr_info_t **info, - hs_compile_error_t **error) { +hs_error_t HS_CDECL hs_expression_ext_info(const char *expression, + unsigned int flags, + const hs_expr_ext_t *ext, + hs_expr_info_t **info, + hs_compile_error_t **error) { return hs_expression_info_int(expression, flags, ext, HS_MODE_BLOCK, info, error); } extern "C" HS_PUBLIC_API -hs_error_t hs_populate_platform(hs_platform_info_t *platform) { +hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform) { if (!platform) { return HS_INVALID; } @@ -450,7 +454,7 @@ hs_error_t hs_populate_platform(hs_platform_info_t *platform) { } extern "C" HS_PUBLIC_API -hs_error_t hs_free_compile_error(hs_compile_error_t *error) { +hs_error_t HS_CDECL hs_free_compile_error(hs_compile_error_t *error) { #if defined(FAT_RUNTIME) if (!check_ssse3()) { return HS_ARCH_ERROR; diff --git a/src/hs_common.h b/src/hs_common.h index fac08253..ffea397e 100644 --- a/src/hs_common.h +++ b/src/hs_common.h @@ -29,6 +29,11 @@ #ifndef HS_COMMON_H_ #define HS_COMMON_H_ +#if defined(_WIN32) +#define HS_CDECL __cdecl +#else +#define HS_CDECL +#endif #include /** @@ -76,7 +81,7 @@ typedef int hs_error_t; * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_free_database(hs_database_t *db); +hs_error_t HS_CDECL hs_free_database(hs_database_t *db); /** * Serialize a pattern database to a stream of bytes. @@ -100,8 +105,8 @@ hs_error_t hs_free_database(hs_database_t *db); * @ref HS_SUCCESS on success, @ref HS_NOMEM if the byte array cannot be * allocated, other values may be returned if errors are detected. */ -hs_error_t hs_serialize_database(const hs_database_t *db, char **bytes, - size_t *length); +hs_error_t HS_CDECL hs_serialize_database(const hs_database_t *db, char **bytes, + size_t *length); /** * Reconstruct a pattern database from a stream of bytes previously generated @@ -129,8 +134,9 @@ hs_error_t hs_serialize_database(const hs_database_t *db, char **bytes, * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_deserialize_database(const char *bytes, const size_t length, - hs_database_t **db); +hs_error_t HS_CDECL hs_deserialize_database(const char *bytes, + const size_t length, + hs_database_t **db); /** * Reconstruct a pattern database from a stream of bytes previously generated @@ -160,8 +166,9 @@ hs_error_t hs_deserialize_database(const char *bytes, const size_t length, * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_deserialize_database_at(const char *bytes, const size_t length, - hs_database_t *db); +hs_error_t HS_CDECL hs_deserialize_database_at(const char *bytes, + const size_t length, + hs_database_t *db); /** * Provides the size of the stream state allocated by a single stream opened @@ -177,7 +184,8 @@ hs_error_t hs_deserialize_database_at(const char *bytes, const size_t length, * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_stream_size(const hs_database_t *database, size_t *stream_size); +hs_error_t HS_CDECL hs_stream_size(const hs_database_t *database, + size_t *stream_size); /** * Provides the size of the given database in bytes. @@ -192,8 +200,8 @@ hs_error_t hs_stream_size(const hs_database_t *database, size_t *stream_size); * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_database_size(const hs_database_t *database, - size_t *database_size); +hs_error_t HS_CDECL hs_database_size(const hs_database_t *database, + size_t *database_size); /** * Utility function for reporting the size that would be required by a @@ -219,8 +227,9 @@ hs_error_t hs_database_size(const hs_database_t *database, * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_serialized_database_size(const char *bytes, const size_t length, - size_t *deserialized_size); +hs_error_t HS_CDECL hs_serialized_database_size(const char *bytes, + const size_t length, + size_t *deserialized_size); /** * Utility function providing information about a database. @@ -237,7 +246,8 @@ hs_error_t hs_serialized_database_size(const char *bytes, const size_t length, * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_database_info(const hs_database_t *database, char **info); +hs_error_t HS_CDECL hs_database_info(const hs_database_t *database, + char **info); /** * Utility function providing information about a serialized database. @@ -258,8 +268,8 @@ hs_error_t hs_database_info(const hs_database_t *database, char **info); * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_serialized_database_info(const char *bytes, size_t length, - char **info); +hs_error_t HS_CDECL hs_serialized_database_info(const char *bytes, + size_t length, char **info); /** * The type of the callback function that will be used by Hyperscan to allocate @@ -275,7 +285,7 @@ hs_error_t hs_serialized_database_info(const char *bytes, size_t length, * @return * A pointer to the region of memory allocated, or NULL on error. */ -typedef void *(*hs_alloc_t)(size_t size); +typedef void *(HS_CDECL *hs_alloc_t)(size_t size); /** * The type of the callback function that will be used by Hyperscan to free @@ -284,7 +294,7 @@ typedef void *(*hs_alloc_t)(size_t size); * @param ptr * The region of memory to be freed. */ -typedef void (*hs_free_t)(void *ptr); +typedef void (HS_CDECL *hs_free_t)(void *ptr); /** * Set the allocate and free functions used by Hyperscan for allocating @@ -312,7 +322,8 @@ typedef void (*hs_free_t)(void *ptr); * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_set_allocator(hs_alloc_t alloc_func, hs_free_t free_func); +hs_error_t HS_CDECL hs_set_allocator(hs_alloc_t alloc_func, + hs_free_t free_func); /** * Set the allocate and free functions used by Hyperscan for allocating memory @@ -344,8 +355,8 @@ hs_error_t hs_set_allocator(hs_alloc_t alloc_func, hs_free_t free_func); * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_set_database_allocator(hs_alloc_t alloc_func, - hs_free_t free_func); +hs_error_t HS_CDECL hs_set_database_allocator(hs_alloc_t alloc_func, + hs_free_t free_func); /** * Set the allocate and free functions used by Hyperscan for allocating memory @@ -371,7 +382,8 @@ hs_error_t hs_set_database_allocator(hs_alloc_t alloc_func, * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_set_misc_allocator(hs_alloc_t alloc_func, hs_free_t free_func); +hs_error_t HS_CDECL hs_set_misc_allocator(hs_alloc_t alloc_func, + hs_free_t free_func); /** * Set the allocate and free functions used by Hyperscan for allocating memory @@ -397,7 +409,8 @@ hs_error_t hs_set_misc_allocator(hs_alloc_t alloc_func, hs_free_t free_func); * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_set_scratch_allocator(hs_alloc_t alloc_func, hs_free_t free_func); +hs_error_t HS_CDECL hs_set_scratch_allocator(hs_alloc_t alloc_func, + hs_free_t free_func); /** * Set the allocate and free functions used by Hyperscan for allocating memory @@ -423,7 +436,8 @@ hs_error_t hs_set_scratch_allocator(hs_alloc_t alloc_func, hs_free_t free_func); * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_set_stream_allocator(hs_alloc_t alloc_func, hs_free_t free_func); +hs_error_t HS_CDECL hs_set_stream_allocator(hs_alloc_t alloc_func, + hs_free_t free_func); /** * Utility function for identifying this release version. @@ -433,7 +447,7 @@ hs_error_t hs_set_stream_allocator(hs_alloc_t alloc_func, hs_free_t free_func); * date of the build. It is allocated statically, so it does not need to * be freed by the caller. */ -const char *hs_version(void); +const char * HS_CDECL hs_version(void); /** * Utility function to test the current system architecture. @@ -450,7 +464,7 @@ const char *hs_version(void); * @ref HS_SUCCESS on success, @ref HS_ARCH_ERROR if system does not * support Hyperscan. */ -hs_error_t hs_valid_platform(void); +hs_error_t HS_CDECL hs_valid_platform(void); /** * @defgroup HS_ERROR hs_error_t values diff --git a/src/hs_compile.h b/src/hs_compile.h index 0b64e4b3..c1074ffc 100644 --- a/src/hs_compile.h +++ b/src/hs_compile.h @@ -333,9 +333,10 @@ typedef struct hs_expr_ext { * HS_COMPILER_ERROR on failure, with details provided in the error * parameter. */ -hs_error_t hs_compile(const char *expression, unsigned int flags, - unsigned int mode, const hs_platform_info_t *platform, - hs_database_t **db, hs_compile_error_t **error); +hs_error_t HS_CDECL hs_compile(const char *expression, unsigned int flags, + unsigned int mode, + const hs_platform_info_t *platform, + hs_database_t **db, hs_compile_error_t **error); /** * The multiple regular expression compiler. @@ -411,11 +412,13 @@ hs_error_t hs_compile(const char *expression, unsigned int flags, * parameter. * */ -hs_error_t hs_compile_multi(const char *const *expressions, - const unsigned int *flags, const unsigned int *ids, - unsigned int elements, unsigned int mode, - const hs_platform_info_t *platform, - hs_database_t **db, hs_compile_error_t **error); +hs_error_t HS_CDECL hs_compile_multi(const char *const *expressions, + const unsigned int *flags, + const unsigned int *ids, + unsigned int elements, unsigned int mode, + const hs_platform_info_t *platform, + hs_database_t **db, + hs_compile_error_t **error); /** * The multiple regular expression compiler with extended parameter support. @@ -496,7 +499,7 @@ hs_error_t hs_compile_multi(const char *const *expressions, * parameter. * */ -hs_error_t hs_compile_ext_multi(const char *const *expressions, +hs_error_t HS_CDECL hs_compile_ext_multi(const char *const *expressions, const unsigned int *flags, const unsigned int *ids, const hs_expr_ext_t *const *ext, @@ -515,7 +518,7 @@ hs_error_t hs_compile_ext_multi(const char *const *expressions, * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_free_compile_error(hs_compile_error_t *error); +hs_error_t HS_CDECL hs_free_compile_error(hs_compile_error_t *error); /** * Utility function providing information about a regular expression. The @@ -563,9 +566,10 @@ hs_error_t hs_free_compile_error(hs_compile_error_t *error); * HS_COMPILER_ERROR on failure, with details provided in the error * parameter. */ -hs_error_t hs_expression_info(const char *expression, unsigned int flags, - hs_expr_info_t **info, - hs_compile_error_t **error); +hs_error_t HS_CDECL hs_expression_info(const char *expression, + unsigned int flags, + hs_expr_info_t **info, + hs_compile_error_t **error); /** * Utility function providing information about a regular expression, with @@ -618,10 +622,11 @@ hs_error_t hs_expression_info(const char *expression, unsigned int flags, * HS_COMPILER_ERROR on failure, with details provided in the error * parameter. */ -hs_error_t hs_expression_ext_info(const char *expression, unsigned int flags, - const hs_expr_ext_t *ext, - hs_expr_info_t **info, - hs_compile_error_t **error); +hs_error_t HS_CDECL hs_expression_ext_info(const char *expression, + unsigned int flags, + const hs_expr_ext_t *ext, + hs_expr_info_t **info, + hs_compile_error_t **error); /** * Populates the platform information based on the current host. @@ -633,7 +638,7 @@ hs_error_t hs_expression_ext_info(const char *expression, unsigned int flags, * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_populate_platform(hs_platform_info_t *platform); +hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform); /** * @defgroup HS_PATTERN_FLAG Pattern flags diff --git a/src/hs_runtime.h b/src/hs_runtime.h index db52f4f5..ecd97ca5 100644 --- a/src/hs_runtime.h +++ b/src/hs_runtime.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -145,8 +145,8 @@ typedef int (*match_event_handler)(unsigned int id, * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_open_stream(const hs_database_t *db, unsigned int flags, - hs_stream_t **stream); +hs_error_t HS_CDECL hs_open_stream(const hs_database_t *db, unsigned int flags, + hs_stream_t **stream); /** * Write data to be scanned to the opened stream. @@ -185,10 +185,10 @@ hs_error_t hs_open_stream(const hs_database_t *db, unsigned int flags, * match callback indicated that scanning should stop; other values on * error. */ -hs_error_t hs_scan_stream(hs_stream_t *id, const char *data, - unsigned int length, unsigned int flags, - hs_scratch_t *scratch, match_event_handler onEvent, - void *ctxt); +hs_error_t HS_CDECL hs_scan_stream(hs_stream_t *id, const char *data, + unsigned int length, unsigned int flags, + hs_scratch_t *scratch, + match_event_handler onEvent, void *ctxt); /** * Close a stream. @@ -223,8 +223,8 @@ hs_error_t hs_scan_stream(hs_stream_t *id, const char *data, * @return * Returns @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch, - match_event_handler onEvent, void *ctxt); +hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch, + match_event_handler onEvent, void *ctxt); /** * Reset a stream to an initial state. @@ -264,9 +264,9 @@ hs_error_t hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch, * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_reset_stream(hs_stream_t *id, unsigned int flags, - hs_scratch_t *scratch, match_event_handler onEvent, - void *context); +hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, unsigned int flags, + hs_scratch_t *scratch, + match_event_handler onEvent, void *context); /** * Duplicate the given stream. The new stream will have the same state as the @@ -282,7 +282,8 @@ hs_error_t hs_reset_stream(hs_stream_t *id, unsigned int flags, * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_copy_stream(hs_stream_t **to_id, const hs_stream_t *from_id); +hs_error_t HS_CDECL hs_copy_stream(hs_stream_t **to_id, + const hs_stream_t *from_id); /** * Duplicate the given 'from' stream state onto the 'to' stream. The 'to' stream @@ -314,11 +315,11 @@ hs_error_t hs_copy_stream(hs_stream_t **to_id, const hs_stream_t *from_id); * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_reset_and_copy_stream(hs_stream_t *to_id, - const hs_stream_t *from_id, - hs_scratch_t *scratch, - match_event_handler onEvent, - void *context); +hs_error_t HS_CDECL hs_reset_and_copy_stream(hs_stream_t *to_id, + const hs_stream_t *from_id, + hs_scratch_t *scratch, + match_event_handler onEvent, + void *context); /** * The block (non-streaming) regular expression scanner. @@ -355,10 +356,10 @@ hs_error_t hs_reset_and_copy_stream(hs_stream_t *to_id, * match callback indicated that scanning should stop; other values on * error. */ -hs_error_t hs_scan(const hs_database_t *db, const char *data, - unsigned int length, unsigned int flags, - hs_scratch_t *scratch, match_event_handler onEvent, - void *context); +hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data, + unsigned int length, unsigned int flags, + hs_scratch_t *scratch, match_event_handler onEvent, + void *context); /** * The vectored regular expression scanner. @@ -398,10 +399,12 @@ hs_error_t hs_scan(const hs_database_t *db, const char *data, * Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the match * callback indicated that scanning should stop; other values on error. */ -hs_error_t hs_scan_vector(const hs_database_t *db, const char *const *data, - const unsigned int *length, unsigned int count, - unsigned int flags, hs_scratch_t *scratch, - match_event_handler onEvent, void *context); +hs_error_t HS_CDECL hs_scan_vector(const hs_database_t *db, + const char *const *data, + const unsigned int *length, + unsigned int count, unsigned int flags, + hs_scratch_t *scratch, + match_event_handler onEvent, void *context); /** * Allocate a "scratch" space for use by Hyperscan. @@ -429,7 +432,8 @@ hs_error_t hs_scan_vector(const hs_database_t *db, const char *const *data, * allocation fails. Other errors may be returned if invalid parameters * are specified. */ -hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch); +hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db, + hs_scratch_t **scratch); /** * Allocate a scratch space that is a clone of an existing scratch space. @@ -449,7 +453,8 @@ hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch); * @ref HS_SUCCESS on success; @ref HS_NOMEM if the allocation fails. * Other errors may be returned if invalid parameters are specified. */ -hs_error_t hs_clone_scratch(const hs_scratch_t *src, hs_scratch_t **dest); +hs_error_t HS_CDECL hs_clone_scratch(const hs_scratch_t *src, + hs_scratch_t **dest); /** * Provides the size of the given scratch space. @@ -465,7 +470,8 @@ hs_error_t hs_clone_scratch(const hs_scratch_t *src, hs_scratch_t **dest); * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_scratch_size(const hs_scratch_t *scratch, size_t *scratch_size); +hs_error_t HS_CDECL hs_scratch_size(const hs_scratch_t *scratch, + size_t *scratch_size); /** * Free a scratch block previously allocated by @ref hs_alloc_scratch() or @ref @@ -480,7 +486,7 @@ hs_error_t hs_scratch_size(const hs_scratch_t *scratch, size_t *scratch_size); * @return * @ref HS_SUCCESS on success, other values on failure. */ -hs_error_t hs_free_scratch(hs_scratch_t *scratch); +hs_error_t HS_CDECL hs_free_scratch(hs_scratch_t *scratch); /** * Callback 'from' return value, indicating that the start of this match was diff --git a/src/hs_valid_platform.c b/src/hs_valid_platform.c index 939cde1f..128ac04f 100644 --- a/src/hs_valid_platform.c +++ b/src/hs_valid_platform.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Intel Corporation + * Copyright (c) 2016-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,7 +30,7 @@ #include "util/cpuid_flags.h" HS_PUBLIC_API -hs_error_t hs_valid_platform(void) { +hs_error_t HS_CDECL hs_valid_platform(void) { /* Hyperscan requires SSSE3, anything else is a bonus */ if (check_ssse3()) { return HS_SUCCESS; diff --git a/src/hs_version.c b/src/hs_version.c index 45e23c3b..04cf46f3 100644 --- a/src/hs_version.c +++ b/src/hs_version.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,6 +31,6 @@ #include "hs_version.h" HS_PUBLIC_API -const char *hs_version(void) { +const char * HS_CDECL hs_version(void) { return HS_VERSION_STRING; } diff --git a/src/runtime.c b/src/runtime.c index 1ee3efa5..5725cf93 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -311,9 +311,10 @@ void runSmallWriteEngine(const struct SmallWriteEngine *smwr, } HS_PUBLIC_API -hs_error_t hs_scan(const hs_database_t *db, const char *data, unsigned length, - unsigned flags, hs_scratch_t *scratch, - match_event_handler onEvent, void *userCtx) { +hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data, + unsigned length, unsigned flags, + hs_scratch_t *scratch, match_event_handler onEvent, + void *userCtx) { if (unlikely(!scratch || !data)) { return HS_INVALID; } @@ -503,8 +504,9 @@ void init_stream(struct hs_stream *s, const struct RoseEngine *rose, } HS_PUBLIC_API -hs_error_t hs_open_stream(const hs_database_t *db, UNUSED unsigned flags, - hs_stream_t **stream) { +hs_error_t HS_CDECL hs_open_stream(const hs_database_t *db, + UNUSED unsigned flags, + hs_stream_t **stream) { if (unlikely(!stream)) { return HS_INVALID; } @@ -656,7 +658,8 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch, } HS_PUBLIC_API -hs_error_t hs_copy_stream(hs_stream_t **to_id, const hs_stream_t *from_id) { +hs_error_t HS_CDECL hs_copy_stream(hs_stream_t **to_id, + const hs_stream_t *from_id) { if (!to_id) { return HS_INVALID; } @@ -683,11 +686,11 @@ hs_error_t hs_copy_stream(hs_stream_t **to_id, const hs_stream_t *from_id) { } HS_PUBLIC_API -hs_error_t hs_reset_and_copy_stream(hs_stream_t *to_id, - const hs_stream_t *from_id, - hs_scratch_t *scratch, - match_event_handler onEvent, - void *context) { +hs_error_t HS_CDECL hs_reset_and_copy_stream(hs_stream_t *to_id, + const hs_stream_t *from_id, + hs_scratch_t *scratch, + match_event_handler onEvent, + void *context) { if (!from_id || !from_id->rose) { return HS_INVALID; } @@ -906,9 +909,10 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data, } HS_PUBLIC_API -hs_error_t hs_scan_stream(hs_stream_t *id, const char *data, unsigned length, - unsigned flags, hs_scratch_t *scratch, - match_event_handler onEvent, void *context) { +hs_error_t HS_CDECL hs_scan_stream(hs_stream_t *id, const char *data, + unsigned length, unsigned flags, + hs_scratch_t *scratch, + match_event_handler onEvent, void *context) { if (unlikely(!id || !scratch || !data || !validScratch(id->rose, scratch))) { return HS_INVALID; @@ -924,8 +928,9 @@ hs_error_t hs_scan_stream(hs_stream_t *id, const char *data, unsigned length, } HS_PUBLIC_API -hs_error_t hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch, - match_event_handler onEvent, void *context) { +hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch, + match_event_handler onEvent, + void *context) { if (!id) { return HS_INVALID; } @@ -947,9 +952,10 @@ hs_error_t hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch, } HS_PUBLIC_API -hs_error_t hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags, - hs_scratch_t *scratch, match_event_handler onEvent, - void *context) { +hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags, + hs_scratch_t *scratch, + match_event_handler onEvent, + void *context) { if (!id) { return HS_INVALID; } @@ -972,7 +978,8 @@ hs_error_t hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags, } HS_PUBLIC_API -hs_error_t hs_stream_size(const hs_database_t *db, size_t *stream_size) { +hs_error_t HS_CDECL hs_stream_size(const hs_database_t *db, + size_t *stream_size) { if (!stream_size) { return HS_INVALID; } @@ -1019,10 +1026,13 @@ void dumpData(const char *data, size_t len) { #endif HS_PUBLIC_API -hs_error_t hs_scan_vector(const hs_database_t *db, const char * const * data, - const unsigned int *length, unsigned int count, - UNUSED unsigned int flags, hs_scratch_t *scratch, - match_event_handler onEvent, void *context) { +hs_error_t HS_CDECL hs_scan_vector(const hs_database_t *db, + const char * const * data, + const unsigned int *length, + unsigned int count, + UNUSED unsigned int flags, + hs_scratch_t *scratch, + match_event_handler onEvent, void *context) { if (unlikely(!scratch || !data || !length)) { return HS_INVALID; } diff --git a/src/scratch.c b/src/scratch.c index 8cbe9760..84d23ced 100644 --- a/src/scratch.c +++ b/src/scratch.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -240,7 +240,8 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { } HS_PUBLIC_API -hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch) { +hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db, + hs_scratch_t **scratch) { if (!db || !scratch) { return HS_INVALID; } @@ -385,7 +386,8 @@ hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch) { } HS_PUBLIC_API -hs_error_t hs_clone_scratch(const hs_scratch_t *src, hs_scratch_t **dest) { +hs_error_t HS_CDECL hs_clone_scratch(const hs_scratch_t *src, + hs_scratch_t **dest) { if (!dest || !src || !ISALIGNED_CL(src) || src->magic != SCRATCH_MAGIC) { return HS_INVALID; } @@ -402,7 +404,7 @@ hs_error_t hs_clone_scratch(const hs_scratch_t *src, hs_scratch_t **dest) { } HS_PUBLIC_API -hs_error_t hs_free_scratch(hs_scratch_t *scratch) { +hs_error_t HS_CDECL hs_free_scratch(hs_scratch_t *scratch) { if (scratch) { /* has to be aligned before we can do anything with it */ if (!ISALIGNED_CL(scratch)) { @@ -426,7 +428,7 @@ hs_error_t hs_free_scratch(hs_scratch_t *scratch) { } HS_PUBLIC_API -hs_error_t hs_scratch_size(const hs_scratch_t *scratch, size_t *size) { +hs_error_t HS_CDECL hs_scratch_size(const hs_scratch_t *scratch, size_t *size) { if (!size || !scratch || !ISALIGNED_CL(scratch) || scratch->magic != SCRATCH_MAGIC) { return HS_INVALID; diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index 75ee3e65..a7658b26 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -30,12 +30,41 @@ if(CMAKE_COMPILER_IS_GNUCC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-array-bounds") endif() -add_library(gtest STATIC ${gtest_SOURCES}) - add_definitions(-DGTEST_HAS_PTHREAD=0 -DSRCDIR=${PROJECT_SOURCE_DIR}) +set(unit_hyperscan_SOURCES + ${gtest_SOURCES} + hyperscan/allocators.cpp + hyperscan/arg_checks.cpp + hyperscan/bad_patterns.cpp + hyperscan/bad_patterns.txt + hyperscan/behaviour.cpp + hyperscan/expr_info.cpp + hyperscan/extparam.cpp + hyperscan/identical.cpp + hyperscan/main.cpp + hyperscan/multi.cpp + hyperscan/order.cpp + hyperscan/scratch_op.cpp + hyperscan/scratch_in_use.cpp + hyperscan/serialize.cpp + hyperscan/single.cpp + hyperscan/som.cpp + hyperscan/stream_op.cpp + hyperscan/test_util.cpp + hyperscan/test_util.h + ) +add_executable(unit-hyperscan ${unit_hyperscan_SOURCES}) +if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) +target_link_libraries(unit-hyperscan hs_shared expressionutil) +else() +target_link_libraries(unit-hyperscan hs expressionutil) +endif() + + if (NOT (RELEASE_BUILD OR FAT_RUNTIME)) set(unit_internal_SOURCES + ${gtest_SOURCES} internal/bitfield.cpp internal/bitutils.cpp internal/charreach.cpp @@ -83,40 +112,13 @@ set(unit_internal_SOURCES internal/util_string.cpp internal/vermicelli.cpp internal/main.cpp -) + ) add_executable(unit-internal ${unit_internal_SOURCES}) -target_link_libraries(unit-internal hs gtest corpusomatic) +set_target_properties(unit-internal PROPERTIES COMPILE_FLAGS "${HS_CXX_FLAGS}") +target_link_libraries(unit-internal hs corpusomatic) endif(NOT (RELEASE_BUILD OR FAT_RUNTIME)) -set(unit_hyperscan_SOURCES - hyperscan/allocators.cpp - hyperscan/arg_checks.cpp - hyperscan/bad_patterns.cpp - hyperscan/bad_patterns.txt - hyperscan/behaviour.cpp - hyperscan/expr_info.cpp - hyperscan/extparam.cpp - hyperscan/identical.cpp - hyperscan/main.cpp - hyperscan/multi.cpp - hyperscan/order.cpp - hyperscan/scratch_op.cpp - hyperscan/scratch_in_use.cpp - hyperscan/serialize.cpp - hyperscan/single.cpp - hyperscan/som.cpp - hyperscan/stream_op.cpp - hyperscan/test_util.cpp - hyperscan/test_util.h - ) -add_executable(unit-hyperscan ${unit_hyperscan_SOURCES}) -if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) -target_link_libraries(unit-hyperscan hs_shared gtest expressionutil) -else() -target_link_libraries(unit-hyperscan hs gtest expressionutil) -endif() - # # build target to run unit tests # diff --git a/unit/internal/main.cpp b/unit/internal/main.cpp index 566ae1a5..15e41d0b 100644 --- a/unit/internal/main.cpp +++ b/unit/internal/main.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -27,9 +27,10 @@ */ #include "gtest/gtest.h" +#include "hs_common.h" // Driver: run all the tests (defined in other source files in this directory) -int main(int argc, char **argv) { +int HS_CDECL main(int argc, char **argv) { testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); diff --git a/unit/internal/utf8_validate.cpp b/unit/internal/utf8_validate.cpp index 6649e6fe..f570e6b0 100644 --- a/unit/internal/utf8_validate.cpp +++ b/unit/internal/utf8_validate.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -117,6 +117,6 @@ INSTANTIATE_TEST_CASE_P(ValidUtf8, ValidUtf8Test, ValuesIn(valid_utf8_tests)); TEST_P(ValidUtf8Test, check) { const auto &info = GetParam(); - ASSERT_EQ(info.is_valid, isValidUtf8(info.str.c_str())) - << "String is: " << printable(info.str) << std::endl; + SCOPED_TRACE(testing::Message() << "String is: " << printable(info.str)); + ASSERT_EQ(info.is_valid, isValidUtf8(info.str.c_str())); } diff --git a/util/CMakeLists.txt b/util/CMakeLists.txt index c0a6bc21..ea942ef1 100644 --- a/util/CMakeLists.txt +++ b/util/CMakeLists.txt @@ -2,7 +2,7 @@ CHECK_FUNCTION_EXISTS(mmap HAVE_MMAP) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS} ${HS_CXX_FLAGS}") include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR} ${PROJECT_SOURCE_DIR}) diff --git a/util/ExpressionParser.h b/util/ExpressionParser.h index 99230448..c97c114e 100644 --- a/util/ExpressionParser.h +++ b/util/ExpressionParser.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,12 +29,14 @@ #ifndef EXPRESSIONPARSER_H #define EXPRESSIONPARSER_H +#include "hs_common.h" + #include struct hs_expr_ext; -bool readExpression(const std::string &line, std::string &expr, - unsigned int *flags, hs_expr_ext *ext, - bool *must_be_ordered = nullptr); +bool HS_CDECL readExpression(const std::string &line, std::string &expr, + unsigned int *flags, hs_expr_ext *ext, + bool *must_be_ordered = nullptr); #endif diff --git a/util/ExpressionParser.rl b/util/ExpressionParser.rl index 073f5300..233b70c1 100644 --- a/util/ExpressionParser.rl +++ b/util/ExpressionParser.rl @@ -115,9 +115,9 @@ void initExt(hs_expr_ext *ext) { ext->max_offset = MAX_OFFSET; } -bool readExpression(const std::string &input, std::string &expr, - unsigned int *flags, hs_expr_ext *ext, - bool *must_be_ordered) { +bool HS_CDECL readExpression(const std::string &input, std::string &expr, + unsigned int *flags, hs_expr_ext *ext, + bool *must_be_ordered) { assert(flags); assert(ext); diff --git a/util/expressions.cpp b/util/expressions.cpp index 93062b48..a81e0cd5 100644 --- a/util/expressions.cpp +++ b/util/expressions.cpp @@ -102,7 +102,7 @@ void processLine(string &line, unsigned lineNum, #define S_ISDIR(st_m) (_S_IFDIR & (st_m)) #define S_ISREG(st_m) (_S_IFREG & (st_m)) #endif -void loadExpressionsFromFile(const string &fname, ExpressionMap &exprMap) { +void HS_CDECL loadExpressionsFromFile(const string &fname, ExpressionMap &exprMap) { struct stat st; if (stat(fname.c_str(), &st) != 0) { return; @@ -195,7 +195,7 @@ void loadExpressions(const string &inPath, ExpressionMap &exprMap) { } } #else // windows TODO: improve -void loadExpressions(const string &inPath, ExpressionMap &exprMap) { +void HS_CDECL loadExpressions(const string &inPath, ExpressionMap &exprMap) { // Is our input path a file or a directory? struct stat st; if (stat(inPath.c_str(), &st) != 0) { @@ -251,8 +251,8 @@ void loadExpressions(const string &inPath, ExpressionMap &exprMap) { } #endif -void loadSignatureList(const string &inFile, - SignatureSet &signatures) { +void HS_CDECL loadSignatureList(const string &inFile, + SignatureSet &signatures) { ifstream f(inFile.c_str()); if (!f.good()) { cerr << "Can't open file: '" << inFile << "'" << endl; diff --git a/util/expressions.h b/util/expressions.h index 03d59e15..078b9972 100644 --- a/util/expressions.h +++ b/util/expressions.h @@ -29,6 +29,8 @@ #ifndef EXPRESSIONS_H #define EXPRESSIONS_H +#include "hs_common.h" + #include #include #include @@ -38,12 +40,12 @@ using SignatureSet = std::vector; // load all of the expressions from the given directory into the given // expression map. Exits on failure. -void loadExpressions(const std::string &inDir, ExpressionMap &exprMap); +void HS_CDECL loadExpressions(const std::string &inDir, ExpressionMap &exprMap); -void loadExpressionsFromFile(const std::string &fname, ExpressionMap &exprMap); +void HS_CDECL loadExpressionsFromFile(const std::string &fname, ExpressionMap &exprMap); // load a list of signature IDs -void loadSignatureList(const std::string &inFile, SignatureSet &signatures); +void HS_CDECL loadSignatureList(const std::string &inFile, SignatureSet &signatures); // trim expression map to only the given signatures, returning result ExpressionMap limitToSignatures(const ExpressionMap &exprMap, From 9ce0abe0ff6d43390fecbbe642e8eeda5f5f1619 Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 7 Jun 2017 14:10:19 +1000 Subject: [PATCH 319/326] Docs: avx512 support --- doc/dev-reference/getting_started.rst | 36 +++++++++++++++++++-------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/doc/dev-reference/getting_started.rst b/doc/dev-reference/getting_started.rst index 1794f3e9..1d44705b 100644 --- a/doc/dev-reference/getting_started.rst +++ b/doc/dev-reference/getting_started.rst @@ -254,18 +254,32 @@ the current platform is supported by Hyperscan. At of this release, the variants of the runtime that are built, and the CPU capability that is required, are the following: -+----------+-------------------------------+---------------------+ -| Variant | CPU Feature Flag(s) Required | gcc arch flag | -+==========+===============================+=====================+ -| Core 2 | ``SSSE3`` | ``-march=core2`` | -+----------+-------------------------------+---------------------+ -| Core i7 | ``SSE4_2`` and ``POPCNT`` | ``-march=corei7`` | -+----------+-------------------------------+---------------------+ -| AVX 2 | ``AVX2`` | ``-march=avx2`` | -+----------+-------------------------------+---------------------+ ++----------+-------------------------------+---------------------------+ +| Variant | CPU Feature Flag(s) Required | gcc arch flag | ++==========+===============================+===========================+ +| Core 2 | ``SSSE3`` | ``-march=core2`` | ++----------+-------------------------------+---------------------------+ +| Core i7 | ``SSE4_2`` and ``POPCNT`` | ``-march=corei7`` | ++----------+-------------------------------+---------------------------+ +| AVX 2 | ``AVX2`` | ``-march=core-avx2`` | ++----------+-------------------------------+---------------------------+ +| AVX 512 | ``AVX512BW`` (see note below) | ``-march=skylake-avx512`` | ++----------+-------------------------------+---------------------------+ -As this requires compiler, libc, and binutils support, at this time the fat -runtime will only be enabled for Linux builds where the compiler supports the +.. note:: + + Hyperscan v4.5 adds support for AVX-512 instructions - in particular the + ``AVX-512BW`` instruction set that was introduced on Intel "Skylake" Xeon + processors - however the AVX-512 runtime variant is **not** enabled by + default in fat runtime builds as not all toolchains support AVX-512 + instruction sets. To build an AVX-512 runtime, the CMake variable + ``BUILD_AVX512`` must be enabled manually during configuration. For + example: :: + + cmake -DBUILD_AVX512=on <...> + +As the fat runtime requires compiler, libc, and binutils support, at this time +it will only be enabled for Linux builds where the compiler supports the `indirect function "ifunc" function attribute `_. From aad55e1b72b775511fd5eb6f1718ba60cdbad9aa Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Wed, 7 Jun 2017 14:45:57 +1000 Subject: [PATCH 320/326] Use env to get python bin, don't hard code path --- tools/hsbench/scripts/CorpusBuilder.py | 2 +- tools/hsbench/scripts/gutenbergCorpus.py | 2 +- tools/hsbench/scripts/linebasedCorpus.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/hsbench/scripts/CorpusBuilder.py b/tools/hsbench/scripts/CorpusBuilder.py index 5baed2bd..da2d593f 100755 --- a/tools/hsbench/scripts/CorpusBuilder.py +++ b/tools/hsbench/scripts/CorpusBuilder.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python ''' A module to construct corpora databases for the Hyperscan benchmarker diff --git a/tools/hsbench/scripts/gutenbergCorpus.py b/tools/hsbench/scripts/gutenbergCorpus.py index fa1b1570..62752a4d 100755 --- a/tools/hsbench/scripts/gutenbergCorpus.py +++ b/tools/hsbench/scripts/gutenbergCorpus.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python ''' This script creates a Hyperscan benchmarking corpus database from a supplied diff --git a/tools/hsbench/scripts/linebasedCorpus.py b/tools/hsbench/scripts/linebasedCorpus.py index bde20e39..b27f8674 100755 --- a/tools/hsbench/scripts/linebasedCorpus.py +++ b/tools/hsbench/scripts/linebasedCorpus.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python ''' Simple script to take a file full of lines of text and push them into a From 9589ee9f90a62136018ea027ea01aaa4a38a6d38 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 5 Jun 2017 14:38:57 +1000 Subject: [PATCH 321/326] hs_expression_info: check unsupported constructs --- src/hs.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/hs.cpp b/src/hs.cpp index b93a56ee..33459347 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -44,6 +44,7 @@ #include "parser/parse_error.h" #include "parser/Parser.h" #include "parser/prefilter.h" +#include "parser/unsupported.h" #include "util/compile_error.h" #include "util/cpuid_flags.h" #include "util/depth.h" @@ -376,6 +377,14 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, prefilterTree(pe.component, ParseMode(flags)); } + // Expressions containing zero-width assertions and other extended pcre + // types aren't supported yet. This call will throw a ParseError + // exception if the component tree contains such a construct. + checkUnsupported(*pe.component); + + pe.component->checkEmbeddedStartAnchor(true); + pe.component->checkEmbeddedEndAnchor(true); + auto built_expr = buildGraph(rm, cc, pe); unique_ptr &g = built_expr.g; ExpressionInfo &expr = built_expr.expr; From c4e2459318abc02287028d75c98dd3f0760a93cb Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Mon, 5 Jun 2017 14:33:09 +1000 Subject: [PATCH 322/326] hs_compile: add note to hs_expression_info() docs Successful analysis of a pattern with hs_expression_info() does not imply that the pattern will successfully compile with hs_compile(), etc. It is merely a utility function for pattern analysis. Addresses Github issue #54. --- src/hs_compile.h | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/src/hs_compile.h b/src/hs_compile.h index c1074ffc..3d527044 100644 --- a/src/hs_compile.h +++ b/src/hs_compile.h @@ -169,13 +169,23 @@ typedef struct hs_platform_info { typedef struct hs_expr_info { /** * The minimum length in bytes of a match for the pattern. + * + * Note: in some cases when using advanced features to suppress matches + * (such as extended parameters or the @ref HS_FLAG_SINGLEMATCH flag) this + * may represent a conservative lower bound for the true minimum length of + * a match. */ unsigned int min_width; /** * The maximum length in bytes of a match for the pattern. If the pattern - * has an unbounded maximum width, this will be set to the maximum value of - * an unsigned int (UINT_MAX). + * has an unbounded maximum length, this will be set to the maximum value + * of an unsigned int (UINT_MAX). + * + * Note: in some cases when using advanced features to suppress matches + * (such as extended parameters or the @ref HS_FLAG_SINGLEMATCH flag) this + * may represent a conservative upper bound for the true maximum length of + * a match. */ unsigned int max_width; @@ -525,6 +535,17 @@ hs_error_t HS_CDECL hs_free_compile_error(hs_compile_error_t *error); * information provided in @ref hs_expr_info_t includes the minimum and maximum * width of a pattern match. * + * Note: successful analysis of an expression with this function does not imply + * that compilation of the same expression (via @ref hs_compile(), @ref + * hs_compile_multi() or @ref hs_compile_ext_multi()) would succeed. This + * function may return @ref HS_SUCCESS for regular expressions that Hyperscan + * cannot compile. + * + * Note: some per-pattern flags (such as @ref HS_FLAG_ALLOWEMPTY, @ref + * HS_FLAG_SOM_LEFTMOST) are accepted by this call, but as they do not affect + * the properties returned in the @ref hs_expr_info_t structure, they will not + * affect the outcome of this function. + * * @param expression * The NULL-terminated expression to parse. Note that this string must * represent ONLY the pattern to be matched, with no delimiters or flags; @@ -576,6 +597,17 @@ hs_error_t HS_CDECL hs_expression_info(const char *expression, * extended parameter support. The information provided in @ref hs_expr_info_t * includes the minimum and maximum width of a pattern match. * + * Note: successful analysis of an expression with this function does not imply + * that compilation of the same expression (via @ref hs_compile(), @ref + * hs_compile_multi() or @ref hs_compile_ext_multi()) would succeed. This + * function may return @ref HS_SUCCESS for regular expressions that Hyperscan + * cannot compile. + * + * Note: some per-pattern flags (such as @ref HS_FLAG_ALLOWEMPTY, @ref + * HS_FLAG_SOM_LEFTMOST) are accepted by this call, but as they do not affect + * the properties returned in the @ref hs_expr_info_t structure, they will not + * affect the outcome of this function. + * * @param expression * The NULL-terminated expression to parse. Note that this string must * represent ONLY the pattern to be matched, with no delimiters or flags; From 9aee3b22b50e0f369d7bf50cd25cbc4e3725771b Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 8 Jun 2017 10:33:23 +1000 Subject: [PATCH 323/326] ng_expr_info: more complete analysis passes --- src/hs.cpp | 13 ++--------- src/nfagraph/ng_expr_info.cpp | 41 +++++++++++++++++++++++++++++++++-- src/nfagraph/ng_expr_info.h | 5 +++-- 3 files changed, 44 insertions(+), 15 deletions(-) diff --git a/src/hs.cpp b/src/hs.cpp index 33459347..e3c1f811 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -39,10 +39,8 @@ #include "compiler/error.h" #include "nfagraph/ng.h" #include "nfagraph/ng_expr_info.h" -#include "nfagraph/ng_extparam.h" -#include "nfagraph/ng_fuzzy.h" -#include "parser/parse_error.h" #include "parser/Parser.h" +#include "parser/parse_error.h" #include "parser/prefilter.h" #include "parser/unsupported.h" #include "util/compile_error.h" @@ -394,14 +392,7 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, throw ParseError("Internal error."); } - // validate graph's suitability for fuzzing - validate_fuzzy_compile(*g, expr.edit_distance, expr.utf8, cc.grey); - - // fuzz graph - this must happen before any transformations are made - make_fuzzy(*g, expr.edit_distance, cc.grey); - - propagateExtendedParams(*g, expr, rm); - fillExpressionInfo(rm, *g, expr, &local_info); + fillExpressionInfo(rm, cc, *g, expr, &local_info); } catch (const CompileError &e) { // Compiler error occurred diff --git a/src/nfagraph/ng_expr_info.cpp b/src/nfagraph/ng_expr_info.cpp index 6a625ddf..5f5bbea7 100644 --- a/src/nfagraph/ng_expr_info.cpp +++ b/src/nfagraph/ng_expr_info.cpp @@ -37,10 +37,14 @@ #include "ng_asserts.h" #include "ng_depth.h" #include "ng_edge_redundancy.h" +#include "ng_extparam.h" +#include "ng_fuzzy.h" #include "ng_holder.h" +#include "ng_prune.h" #include "ng_reports.h" #include "ng_util.h" #include "ue2common.h" +#include "compiler/expression_info.h" #include "parser/position.h" // for POS flags #include "util/boundary_reports.h" #include "util/compile_context.h" @@ -135,15 +139,48 @@ bool hasOffsetAdjust(const ReportManager &rm, const NGHolder &g) { return false; } -void fillExpressionInfo(ReportManager &rm, NGHolder &g, - const ExpressionInfo &expr, hs_expr_info *info) { +void fillExpressionInfo(ReportManager &rm, const CompileContext &cc, + NGHolder &g, ExpressionInfo &expr, + hs_expr_info *info) { assert(info); + // remove reports that aren't on vertices connected to accept. + clearReports(g); + + assert(allMatchStatesHaveReports(g)); + + /* + * Note: the following set of analysis passes / transformations should + * match those in NG::addGraph(). + */ + /* ensure utf8 starts at cp boundary */ ensureCodePointStart(rm, g, expr); + + if (can_never_match(g)) { + throw CompileError(expr.index, "Pattern can never match."); + } + + // validate graph's suitability for fuzzing + validate_fuzzy_compile(g, expr.edit_distance, expr.utf8, cc.grey); + resolveAsserts(rm, g, expr); + assert(allMatchStatesHaveReports(g)); + + // fuzz graph - this must happen before any transformations are made + make_fuzzy(g, expr.edit_distance, cc.grey); + + pruneUseless(g); + pruneEmptyVertices(g); + + if (can_never_match(g)) { + throw CompileError(expr.index, "Pattern can never match."); + } + optimiseVirtualStarts(g); + propagateExtendedParams(g, expr, rm); + removeLeadingVirtualVerticesFromRoot(g, g.start); removeLeadingVirtualVerticesFromRoot(g, g.startDs); diff --git a/src/nfagraph/ng_expr_info.h b/src/nfagraph/ng_expr_info.h index e518738c..f9bd6809 100644 --- a/src/nfagraph/ng_expr_info.h +++ b/src/nfagraph/ng_expr_info.h @@ -41,9 +41,10 @@ namespace ue2 { class ExpressionInfo; class NGHolder; class ReportManager; +struct CompileContext; -void fillExpressionInfo(ReportManager &rm, NGHolder &g, - const ExpressionInfo &expr, hs_expr_info *info); +void fillExpressionInfo(ReportManager &rm, const CompileContext &cc, + NGHolder &g, ExpressionInfo &expr, hs_expr_info *info); } // namespace ue2 From 173178b00b1e4ed1d0829f17c7f82d93b594d052 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 30 May 2017 09:47:53 +1000 Subject: [PATCH 324/326] changelog: quote function name --- CHANGELOG.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c4fbe49..91e4330b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ This is a list of notable changes to Hyperscan, in reverse chronological order. ## [4.4.1] 2017-02-28 - Bugfixes to fix issues where stale data was being referenced in scratch - memory. In particular this may have resulted in hs_close_stream() + memory. In particular this may have resulted in `hs_close_stream()` referencing data from other previously scanned streams. This may result in incorrect matches being been reported. @@ -142,9 +142,7 @@ This is a list of notable changes to Hyperscan, in reverse chronological order. supplied with a NULL scratch pointer if no matches are required. This is in line with the behaviour of `hs_close_stream()`. - Disallow bounded repeats with a very large minimum repeat but no maximum, - i.e. { - N, -} for very large N. + i.e. {N,} for very large N. - Reduce compile memory usage in literal set explansion for some large cases. ## [4.0.0] 2015-10-20 From 9b8b609207274d9563851c3ed7fa81accdee3a5d Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Fri, 9 Jun 2017 09:47:03 +1000 Subject: [PATCH 325/326] changelog: updates for 4.5 release --- CHANGELOG.md | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 91e4330b..9ebe1ec4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,49 @@ This is a list of notable changes to Hyperscan, in reverse chronological order. +## [4.5.0] 2017-06-09 +- New API feature: approximate matching using the "edit distance" extended + parameter. This allows the user to request all matches that are a given edit + distance from an exact match for a pattern. +- Initial support for Intel(R) Advanced Vector Extensions 512 (Intel(R) + AVX-512), disabled by default. To enable it, pass `-DBUILD_AVX512=1` to + `cmake`. +- Major compile time improvements in many subsystems, reducing compile time + significantly for many large pattern sets. +- Internal reworking of literal matchers to operate on literals of at + most eight characters, with subsequent confirmation done in the Rose + interpreter. This reduces complexity and bytecode size and improves + performance for many pattern sets. +- Improve performance of the FDR literal matcher front end. +- Improve bucket assignment and other heuristics governing the FDR literal + matcher. +- Improve optimisation passes that take advantage of extended parameter + constraints (`min_offset`, etc). +- Introduce further lookaround specialisations to improve scanning performance. +- Optimise Rose interpreter construction to reduce the length of programs + generated in some situations. +- Remove the old "Rose" pattern decomposition analysis pass in favour of the + new "Violet" pass introduced in Hyperscan 4.3.0. +- In streaming mode, allow exhaustion (where the stream can no longer produce + matchers) to be detected in more situations, improving scanning performance. +- Improve parsing of control verbs (such as `(*UTF8)`) that can only occur at + the beginning of the pattern. Combinations of supported verbs in any order + are now permitted. +- Update version of PCRE used by testing tools as a syntax and semantic + reference to PCRE 8.40. +- Tuning support for Intel(R) microarchitecture code names Skylake, Skylake + Server, Goldmont. +- CMake: when building a native build with a version of GCC that doesn't + recognise the host compiler, tune for the microarch selected by + `-march=native`. +- CMake: don't fail if SQLite (which is only required to build the `hsbench` + tool) is not present. +- CMake: detect libc++ directly and use that to inform the Boost version + requirement. +- Bugfix for issue #51: make the fat runtime build wrapper less fragile. +- Bugfix for issues #46, #52: use `sqlite3_errmsg()` to allow SQLite 3.6.x to + be used. Thanks to @EaseTheWorld for the PR. + ## [4.4.1] 2017-02-28 - Bugfixes to fix issues where stale data was being referenced in scratch memory. In particular this may have resulted in `hs_close_stream()` From 87469d477587616866afed551f3b67f465ee374a Mon Sep 17 00:00:00 2001 From: Matthew Barr Date: Fri, 9 Jun 2017 09:50:23 +1000 Subject: [PATCH 326/326] Bump version number for release --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 330b8650..7f452696 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,8 +2,8 @@ cmake_minimum_required (VERSION 2.8.11) project (hyperscan C CXX) set (HS_MAJOR_VERSION 4) -set (HS_MINOR_VERSION 4) -set (HS_PATCH_VERSION 1) +set (HS_MINOR_VERSION 5) +set (HS_PATCH_VERSION 0) set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION}) set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)