From b40899966fa6ec1d02616ba936040d491e3e6766 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Wed, 22 Sep 2021 11:21:37 +0300 Subject: [PATCH] Unify benchmarks, more accurate measurements (cherry picked from commit f50d7656bc78c54ec25916b6c8e655c188d79a13) --- benchmarks/CMakeLists.txt | 4 +- benchmarks/benchmarks.cpp | 209 +++++++++++++++++++++----------------- benchmarks/benchmarks.hpp | 31 +++++- 3 files changed, 143 insertions(+), 101 deletions(-) diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index f56a5f5b..921b013e 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -1,4 +1,4 @@ -add_executable(benchmarks benchmarks.cpp noodle.cpp) +add_executable(benchmarks benchmarks.cpp) set_source_files_properties(benchmarks.cpp PROPERTIES COMPILE_FLAGS "-Wall -Wno-unused-variable") -target_link_libraries(benchmarks hs) \ No newline at end of file +target_link_libraries(benchmarks hs) diff --git a/benchmarks/benchmarks.cpp b/benchmarks/benchmarks.cpp index ce680334..a0df3706 100644 --- a/benchmarks/benchmarks.cpp +++ b/benchmarks/benchmarks.cpp @@ -4,39 +4,14 @@ #include #include #include +#include -#include "nfa/shufti.h" -#include "nfa/shufticompile.h" -#include "nfa/truffle.h" -#include "nfa/trufflecompile.h" #include "benchmarks.hpp" -#define MAX_LOOPS 500000000 -#define MAX_MATCHES 10 +#define MAX_LOOPS 1000000000 +#define MAX_MATCHES 5 +#define N 8 -/* -void shuffle_init(){ - m128 lo, hi; - ue2::CharReach chars; - chars.set('a'); - shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); - std::unique_ptr kt1 ( new u8[size] ); - memset(kt1.get(),'b',size); -} -*/ - -/* -void truffle_init(){ - m128 lo, hi; - ue2::CharReach chars; - chars.set('a'); - truffleBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); - std::unique_ptr kt1 ( new u8[size] ); - memset(kt1.get(),'b',size); -} -*/ - -/* struct hlmMatchEntry { size_t to; u32 id; @@ -56,71 +31,56 @@ hwlmcb_rv_t hlmSimpleCallback(size_t to, u32 id, return HWLM_CONTINUE_MATCHING; } -void noodle_init(){ - ctxt.clear(); - std::unique_ptr data ( new u8[size] ); - memset(data.get(), 'a', size); - double total_sec = 0.0; - u64a transferred_size = 0; - double avg_time = 0.0; - double max_bw = 0.0; - double bandwitdh = 0.0; - u32 id = 1000; - ue2::hwlmLiteral lit(std::string(lit_str, lit_len), nocase, id); - auto n = ue2::noodBuildTable(lit); - assert(n != nullptr); - struct hs_scratch scratch; -} -*/ - -void run_benchmarks(int size, int loops, int M, bool has_match, std::function function) { - m128 lo, hi; - ue2::CharReach chars; - chars.set('a'); - shuftiBuildMasks(chars, (u8 *)&lo, (u8 *)&hi); - std::unique_ptr kt1 ( new u8[size] ); - memset(kt1.get(),'b',size); +template +static void run_benchmarks(int size, int loops, int max_matches, bool is_reverse, MicroBenchmark &bench, InitFunc &&init, BenchFunc &&func) { + init(bench); double total_sec = 0.0; u64a transferred_size = 0; - double bandwidth = 0.0; + double bw = 0.0; + double avg_bw = 0.0; double max_bw = 0.0; double avg_time = 0.0; - if (has_match) { + if (max_matches) { int pos = 0; - for(int j = 0; j < M; j++) { - kt1[pos] = 'b'; - pos = (j*size) / M ; - kt1[pos] = 'a'; + for(int j = 0; j < max_matches - 1; j++) { + bench.buf[pos] = 'b'; + pos = (j+1) *size / max_matches ; + bench.buf[pos] = 'a'; unsigned long act_size = 0; auto start = std::chrono::steady_clock::now(); for(int i = 0; i < loops; i++) { - const u8 *res = function(lo, hi, kt1.get(), kt1.get() + size); - act_size += res - kt1.get(); + const u8 *res = func(bench); + if (is_reverse) + act_size += bench.buf.data() + size - res; + else + act_size += res - bench.buf.data(); } auto end = std::chrono::steady_clock::now(); double dt = std::chrono::duration_cast(end - start).count(); total_sec += dt; /*convert microseconds to seconds*/ - total_sec /= 1000000.0; /*calculate bandwidth*/ - bandwidth += (act_size / dt) * 1000000.0; + bw = (act_size / dt) * 1000000.0 / 1048576.0; + /*std::cout << "act_size = " << act_size << std::endl; + std::cout << "dt = " << dt << std::endl; + std::cout << "bw = " << bw << std::endl;*/ + avg_bw += bw; /*convert to MB/s*/ - bandwidth = bandwidth / 1048576.0; - max_bw = std::max(bandwidth ,max_bw); + max_bw = std::max(bw, max_bw); /*calculate average time*/ avg_time += total_sec / loops; } - avg_time /= M; - bandwidth /= M; + avg_time /= max_matches; + avg_bw /= max_matches; + total_sec /= 1000000.0; /*convert average time to us*/ - avg_time *= 1000000.0; - printf(KMAG "case with %u matches, %u * %u iterations," KBLU " total elapsed time =" RST " %.3f s, " + printf(KMAG "%s: %u matches, %u * %u iterations," KBLU " total elapsed time =" RST " %.3f s, " KBLU "average time per call =" RST " %.3f μs," KBLU " max bandwidth = " RST " %.3f MB/s," KBLU " average bandwidth =" RST " %.3f MB/s \n", - M, size ,loops, total_sec, avg_time, max_bw, bandwidth); + bench.label, max_matches, size ,loops, total_sec, avg_time, max_bw, avg_bw); } else { auto start = std::chrono::steady_clock::now(); for (int i = 0; i < loops; i++) { - function(lo, hi, kt1.get(), kt1.get() + size); + const u8 *res = func(bench); } auto end = std::chrono::steady_clock::now(); total_sec += std::chrono::duration_cast(end - start).count(); @@ -134,40 +94,97 @@ void run_benchmarks(int size, int loops, int M, bool has_match, std::function functions[] = {shuftiExec, rshuftiExec, truffleExec, rtruffleExec}; - int sizes[] = { 16000, 32000, 64000, 120000, 1600000, 2000000, 2500000, 3500000, 150000000, 250000000, 350000000, 500000000 }; - std::string labels[] = {"\x1B[33m shuftiExec Benchmarks \x1B[0m\n", "\x1B[33m rshuftiExec Benchmarks \x1B[0m\n", - "\x1B[33m triffleExec Benchmarks \x1B[0m\n", "\x1B[33m triffleExec Benchmarks \x1B[0m\n"}; + std::vector sizes; + for (size_t i = 0; i < N; i++) sizes.push_back(16000 << i*2); const char charset[] = "aAaAaAaAAAaaaaAAAAaaaaAAAAAAaaaAAaaa"; for (size_t i = 0; i < std::size(sizes); i++) { - for(size_t j = 0; j < std::size(functions); j++) { - std::cout << labels[j]; - run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], MAX_MATCHES, false, functions[j]); - run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], MAX_MATCHES, true, functions[j]); - } + MicroBenchmark bench("Shufti", sizes[i]); + run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], MAX_MATCHES, false, bench, + [&](MicroBenchmark &b) { + b.chars.set('a'); + ue2::shuftiBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi); + memset(b.buf.data(), 'b', b.size); + }, + [&](MicroBenchmark &b) { + return shuftiExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size); + }); } - - for(size_t i=0; i < std::size(sizes); i++){ - //we imitate the noodle unit tests - for (int char_len = 1; char_len < 9; char_len++) { - std::unique_ptr str ( new char[char_len] ); - for (int j=0; j +#include "nfa/shufti.h" +#include "nfa/shufticompile.h" +#include "nfa/truffle.h" +#include "nfa/trufflecompile.h" +#include "hwlm/noodle_build.h" +#include "hwlm/noodle_engine.h" +#include "hwlm/noodle_internal.h" +#include "hwlm/hwlm_literal.h" +#include "util/bytecode_ptr.h" +#include "scratch.h" /*define colour control characters*/ #define RST "\x1B[0m" @@ -10,6 +19,22 @@ #define KCYN "\x1B[36m" #define KWHT "\x1B[37m" +class MicroBenchmark +{ +public: + char const *label; + size_t size; -void noodle_benchmarks(int size, int M, const char *lit_str, int lit_len, char nocase); -void run_benchmarks(int size, int loops, int M, bool has_match, std::function function); \ No newline at end of file + // Shufti/Truffle + m128 lo, hi; + ue2::CharReach chars; + std::vector buf; + + // Noodle + struct hs_scratch scratch; + ue2::bytecode_ptr nt; + + MicroBenchmark(char const *label_, size_t size_) + :label(label_), size(size_), buf(size_) { + }; +};