diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ebe1ec4..73cc2f3d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,15 @@ This is a list of notable changes to Hyperscan, in reverse chronological order. +## [4.5.1] 2017-06-16 +- Bugfix for issue #56: workaround for gcc-4.8 C++11 defect. +- Bugfix for literal matching table generation, reversing a regression in + performance for some literal matching cases. +- Bugfixes for hsbench, related to multicore benchmarking, portability fixes + for FreeBSD, and clarifying output results. +- CMake: removed a duplicate else branch that causes very recent (v3.9) builds + of CMake to fail. + ## [4.5.0] 2017-06-09 - New API feature: approximate matching using the "edit distance" extended parameter. This allows the user to request all matches that are a given edit diff --git a/CMakeLists.txt b/CMakeLists.txt index 7f452696..a02584de 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ project (hyperscan C CXX) set (HS_MAJOR_VERSION 4) set (HS_MINOR_VERSION 5) -set (HS_PATCH_VERSION 0) +set (HS_PATCH_VERSION 1) set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION}) set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) diff --git a/cmake/arch.cmake b/cmake/arch.cmake index 0519b2e5..5be258aa 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -75,7 +75,6 @@ else (NOT FAT_RUNTIME) if (NOT HAVE_AVX512) message(STATUS "Building without AVX512 support") endif () -else (NOT FAT_RUNTIME) if (NOT HAVE_SSSE3) message(FATAL_ERROR "A minimum of SSSE3 compiler support is required") endif () diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index 319141c4..b14ffb42 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -163,7 +163,7 @@ bytecode_ptr getFDRConfirm(const vector &lits, if (make_small) { nBits = min(10U, lg2(lits.size()) + 1); } else { - nBits = lg2(lits.size() + 4); + nBits = lg2(lits.size()) + 4; } CONF_TYPE mult = (CONF_TYPE)0x0b4e0ef37bc32127ULL; diff --git a/src/util/small_vector.h b/src/util/small_vector.h index 0b60d8c0..6293759c 100644 --- a/src/util/small_vector.h +++ b/src/util/small_vector.h @@ -55,6 +55,9 @@ using small_vector = boost::container::small_vector; template > using small_vector = std::vector; +// Support workarounds for flat_set/flat_map and GCC 4.8. +#define SMALL_VECTOR_IS_STL_VECTOR 1 + #endif // HAVE_BOOST_CONTAINER_SMALL_VECTOR } // namespace ue2 diff --git a/src/util/ue2_containers.h b/src/util/ue2_containers.h index 29919c7e..d345a4fa 100644 --- a/src/util/ue2_containers.h +++ b/src/util/ue2_containers.h @@ -162,9 +162,19 @@ class flat_set public totally_ordered> { using base_type = flat_detail::flat_base; using storage_type = typename base_type::storage_type; + using storage_iterator = typename storage_type::iterator; + using storage_const_iterator = typename storage_type::const_iterator; using base_type::data; using base_type::comp; +#if defined(SMALL_VECTOR_IS_STL_VECTOR) + // Construct a non-const iterator from a const iterator. Used in flat_map + // and flat_set erase() calls to work around g++-4.8 compatibility issues. + storage_iterator mutable_iterator(storage_const_iterator it) { + return data().begin() + std::distance(data().cbegin(), it); + } +#endif + public: // Member types. using key_type = T; @@ -282,11 +292,27 @@ public: } void erase(const_iterator pos) { - data().erase(pos.get()); +#if defined(SMALL_VECTOR_IS_STL_VECTOR) + // Cope with libstdc++ 4.8's incomplete STL (it's missing C++11 + // vector::erase(const_iterator)) by explicitly using a non-const + // iterator. + auto pos_it = mutable_iterator(pos.get()); +#else + auto pos_it = pos.get(); +#endif + data().erase(pos_it); } void erase(const_iterator first, const_iterator last) { - data().erase(first.get(), last.get()); +#if defined(SMALL_VECTOR_IS_STL_VECTOR) + // As above, work around libstdc++ 4.8's incomplete C++11 support. + auto first_it = mutable_iterator(first.get()); + auto last_it = mutable_iterator(last.get()); +#else + auto first_it = first.get(); + auto last_it = last.get(); +#endif + data().erase(first_it, last_it); } void erase(const key_type &key) { @@ -374,9 +400,19 @@ private: flat_detail::flat_base, Compare, Allocator>; using keyval_storage_type = std::pair; using storage_type = typename base_type::storage_type; + using storage_iterator = typename storage_type::iterator; + using storage_const_iterator = typename storage_type::const_iterator; using base_type::data; using base_type::comp; +#if defined(SMALL_VECTOR_IS_STL_VECTOR) + // Construct a non-const iterator from a const iterator. Used in flat_map + // and flat_set erase() calls to work around g++-4.8 compatibility issues. + storage_iterator mutable_iterator(storage_const_iterator it) { + return data().begin() + std::distance(data().cbegin(), it); + } +#endif + public: // More Member types. using size_type = typename storage_type::size_type; @@ -444,9 +480,6 @@ public: const_reverse_iterator rend() const { return crend(); } private: - using storage_iterator = typename storage_type::iterator; - using storage_const_iterator = typename storage_type::const_iterator; - storage_iterator data_lower_bound(const key_type &key) { return std::lower_bound( data().begin(), data().end(), key, @@ -526,11 +559,27 @@ public: } void erase(const_iterator pos) { - data().erase(pos.get()); +#if defined(SMALL_VECTOR_IS_STL_VECTOR) + // Cope with libstdc++ 4.8's incomplete STL (it's missing C++11 + // vector::erase(const_iterator)) by explicitly using a non-const + // iterator. + auto pos_it = mutable_iterator(pos.get()); +#else + auto pos_it = pos.get(); +#endif + data().erase(pos_it); } void erase(const_iterator first, const_iterator last) { - data().erase(first.get(), last.get()); +#if defined(SMALL_VECTOR_IS_STL_VECTOR) + // As above, work around libstdc++ 4.8's incomplete C++11 support. + auto first_it = mutable_iterator(first.get()); + auto last_it = mutable_iterator(last.get()); +#else + auto first_it = first.get(); + auto last_it = last.get(); +#endif + data().erase(first_it, last_it); } void erase(const key_type &key) { diff --git a/tools/hsbench/main.cpp b/tools/hsbench/main.cpp index 3153737e..9c5fd6cb 100644 --- a/tools/hsbench/main.cpp +++ b/tools/hsbench/main.cpp @@ -125,10 +125,10 @@ public: // Apply processor affinity (if available) to this thread. bool affine(UNUSED int cpu) { #ifdef HAVE_DECL_PTHREAD_SETAFFINITY_NP -#if defined(__linux__) - cpu_set_t cpuset; -#else // BSD +#if defined(__FreeBSD__) cpuset_t cpuset; +#else + cpu_set_t cpuset; #endif CPU_ZERO(&cpuset); assert(cpu >= 0 && cpu < CPU_SETSIZE); @@ -205,7 +205,7 @@ static void processArgs(int argc, char *argv[], vector &sigSets, UNUSED unique_ptr &grey) { const char options[] = "-b:c:Cd:e:E:G:hi:n:No:p:sVw:z:" -#if HAVE_DECL_PTHREAD_SETAFFINITY_N +#ifdef HAVE_DECL_PTHREAD_SETAFFINITY_NP "T:" // add the thread flag #endif ; @@ -287,6 +287,7 @@ void processArgs(int argc, char *argv[], vector &sigSets, case 'V': scan_mode = ScanMode::VECTORED; break; +#ifdef HAVE_DECL_PTHREAD_SETAFFINITY_NP case 'T': if (!strToList(optarg, threadCores)) { usage("Couldn't parse argument to -T flag, should be" @@ -294,6 +295,7 @@ void processArgs(int argc, char *argv[], vector &sigSets, exit(1); } break; +#endif case 'z': { unsigned int sinumber; if (!fromString(optarg, sinumber)) { @@ -649,8 +651,8 @@ void displayResults(const vector> &threads, } } - printf("Time spent scanning: %'0.3f seconds\n", totalSecs); - printf("Corpus size: %'llu bytes ", bytesPerRun); + printf("Time spent scanning: %'0.3f seconds\n", totalSecs); + printf("Corpus size: %'llu bytes ", bytesPerRun); switch (scan_mode) { case ScanMode::STREAMING: printf("(%'zu blocks in %'llu streams)\n", corpus_blocks.size(), @@ -669,16 +671,16 @@ void displayResults(const vector> &threads, u64a totalBlocks = corpus_blocks.size() * repeats * threads.size(); double matchRate = ((double)matchesPerRun * 1024) / bytesPerRun; - printf("Matches per iteration: %'llu (%'0.3f matches/kilobyte)\n", + printf("Matches per iteration: %'llu (%'0.3f matches/kilobyte)\n", matchesPerRun, matchRate); double blockRate = (double)totalBlocks / (double)totalSecs; - printf("Overall block rate: %'0.2f blocks/sec\n", blockRate); - printf("Mean throughput: %'0.2Lf Mbit/sec\n", + printf("Overall block rate: %'0.2f blocks/sec\n", blockRate); + printf("Mean throughput (overall): %'0.2Lf Mbit/sec\n", calc_mbps(totalSecs, totalBytes)); double lowestScanTime = fastestResult(threads); - printf("Maximum throughput: %'0.2Lf Mbit/sec\n", + printf("Max throughput (per core): %'0.2Lf Mbit/sec\n", calc_mbps(lowestScanTime, bytesPerRun)); printf("\n");