diff --git a/benchmarks/benchmarks.cpp b/benchmarks/benchmarks.cpp index a0df3706..b10351cb 100644 --- a/benchmarks/benchmarks.cpp +++ b/benchmarks/benchmarks.cpp @@ -35,7 +35,7 @@ template static void run_benchmarks(int size, int loops, int max_matches, bool is_reverse, MicroBenchmark &bench, InitFunc &&init, BenchFunc &&func) { init(bench); double total_sec = 0.0; - u64a transferred_size = 0; + u64a total_size = 0; double bw = 0.0; double avg_bw = 0.0; double max_bw = 0.0; @@ -46,21 +46,21 @@ static void run_benchmarks(int size, int loops, int max_matches, bool is_reverse bench.buf[pos] = 'b'; pos = (j+1) *size / max_matches ; bench.buf[pos] = 'a'; - unsigned long act_size = 0; + u64a actual_size = 0; auto start = std::chrono::steady_clock::now(); for(int i = 0; i < loops; i++) { const u8 *res = func(bench); if (is_reverse) - act_size += bench.buf.data() + size - res; + actual_size += bench.buf.data() + size - res; else - act_size += res - bench.buf.data(); + actual_size += res - bench.buf.data(); } auto end = std::chrono::steady_clock::now(); double dt = std::chrono::duration_cast(end - start).count(); total_sec += dt; /*convert microseconds to seconds*/ /*calculate bandwidth*/ - bw = (act_size / dt) * 1000000.0 / 1048576.0; + bw = (actual_size / dt) * 1000000.0 / 1048576.0; /*std::cout << "act_size = " << act_size << std::endl; std::cout << "dt = " << dt << std::endl; std::cout << "bw = " << bw << std::endl;*/ @@ -85,105 +85,112 @@ static void run_benchmarks(int size, int loops, int max_matches, bool is_reverse auto end = std::chrono::steady_clock::now(); total_sec += std::chrono::duration_cast(end - start).count(); /*calculate transferred size*/ - transferred_size = size * loops; + total_size = size * loops; /*calculate average time*/ avg_time = total_sec / loops; /*convert microseconds to seconds*/ total_sec /= 1000000.0; /*calculate maximum bandwidth*/ - max_bw = transferred_size / total_sec; + max_bw = total_size / total_sec; /*convert to MB/s*/ max_bw /= 1048576.0; printf(KMAG "%s: no matches, %u * %u iterations," KBLU " total elapsed time =" RST " %.3f s, " KBLU "average time per call =" RST " %.3f μs ," KBLU " bandwidth = " RST " %.3f MB/s \n", - bench.label, size ,loops, total_sec, avg_time, max_bw); + bench.label, size ,loops, total_sec, avg_time, max_bw ); } } int main(){ + int matches[] = {0, MAX_MATCHES}; std::vector sizes; for (size_t i = 0; i < N; i++) sizes.push_back(16000 << i*2); const char charset[] = "aAaAaAaAAAaaaaAAAAaaaaAAAAAAaaaAAaaa"; - - for (size_t i = 0; i < std::size(sizes); i++) { - MicroBenchmark bench("Shufti", sizes[i]); - run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], MAX_MATCHES, false, bench, - [&](MicroBenchmark &b) { - b.chars.set('a'); - ue2::shuftiBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi); - memset(b.buf.data(), 'b', b.size); - }, - [&](MicroBenchmark &b) { - return shuftiExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size); - }); - } - - for (size_t i = 0; i < std::size(sizes); i++) { - MicroBenchmark bench("Reverse Shufti", sizes[i]); - run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], MAX_MATCHES, true, bench, - [&](MicroBenchmark &b) { - b.chars.set('a'); - ue2::shuftiBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi); - memset(b.buf.data(), 'b', b.size); - }, - [&](MicroBenchmark &b) { - return rshuftiExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size); - }); - } - - for (size_t i = 0; i < std::size(sizes); i++) { - MicroBenchmark bench("Truffle", sizes[i]); - run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], MAX_MATCHES, false, bench, - [&](MicroBenchmark &b) { - b.chars.set('a'); - ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi); - memset(b.buf.data(), 'b', b.size); - }, - [&](MicroBenchmark &b) { - return truffleExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size); - }); - } - - for (size_t i = 0; i < std::size(sizes); i++) { - MicroBenchmark bench("Reverse Truffle", sizes[i]); - run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], MAX_MATCHES, true, bench, - [&](MicroBenchmark &b) { - b.chars.set('a'); - ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi); - memset(b.buf.data(), 'b', b.size); - }, - [&](MicroBenchmark &b) { - return rtruffleExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size); - }); - } - - for (size_t i = 0; i < std::size(sizes); i++) { - //we imitate the noodle unit tests - std::string str; - const size_t char_len = 5; - str.resize(char_len + 1); - for (size_t j=0; j < char_len; j++) { - srand (time(NULL)); - int key = rand() % + 36 ; - str[char_len] = charset[key]; - str[char_len + 1] = '\0'; - } - - MicroBenchmark bench("Noodle", sizes[i]); - run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], MAX_MATCHES, false, bench, - [&](MicroBenchmark &b) { - ctxt.clear(); - memset(b.buf.data(), 'a', b.size); - u32 id = 1000; - ue2::hwlmLiteral lit(str, true, id); - b.nt = ue2::noodBuildTable(lit); - assert(b.nt != nullptr); - }, - [&](MicroBenchmark &b) { - noodExec(b.nt.get(), b.buf.data(), b.size, 0, hlmSimpleCallback, &b.scratch); - return b.buf.data() + b.size; + + for (int m = 0; m < 2; m++) { + for (size_t i = 0; i < std::size(sizes); i++) { + MicroBenchmark bench("Shufti", sizes[i]); + run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench, + [&](MicroBenchmark &b) { + b.chars.set('a'); + ue2::shuftiBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi); + memset(b.buf.data(), 'b', b.size); + }, + [&](MicroBenchmark &b) { + return shuftiExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size); + } + ); + } + + for (size_t i = 0; i < std::size(sizes); i++) { + MicroBenchmark bench("Reverse Shufti", sizes[i]); + run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench, + [&](MicroBenchmark &b) { + b.chars.set('a'); + ue2::shuftiBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi); + memset(b.buf.data(), 'b', b.size); + }, + [&](MicroBenchmark &b) { + return rshuftiExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size); + } + ); + } + + for (size_t i = 0; i < std::size(sizes); i++) { + MicroBenchmark bench("Truffle", sizes[i]); + run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench, + [&](MicroBenchmark &b) { + b.chars.set('a'); + ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi); + memset(b.buf.data(), 'b', b.size); + }, + [&](MicroBenchmark &b) { + return truffleExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size); + } + ); + } + + for (size_t i = 0; i < std::size(sizes); i++) { + MicroBenchmark bench("Reverse Truffle", sizes[i]); + run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench, + [&](MicroBenchmark &b) { + b.chars.set('a'); + ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi); + memset(b.buf.data(), 'b', b.size); + }, + [&](MicroBenchmark &b) { + return rtruffleExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size); + } + ); + } + + for (size_t i = 0; i < std::size(sizes); i++) { + //we imitate the noodle unit tests + std::string str; + const size_t char_len = 5; + str.resize(char_len + 1); + for (size_t j=0; j < char_len; j++) { + srand (time(NULL)); + int key = rand() % + 36 ; + str[char_len] = charset[key]; + str[char_len + 1] = '\0'; + } + + MicroBenchmark bench("Noodle", sizes[i]); + run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench, + [&](MicroBenchmark &b) { + ctxt.clear(); + memset(b.buf.data(), 'a', b.size); + u32 id = 1000; + ue2::hwlmLiteral lit(str, true, id); + b.nt = ue2::noodBuildTable(lit); + assert(b.nt != nullptr); + }, + [&](MicroBenchmark &b) { + noodExec(b.nt.get(), b.buf.data(), b.size, 0, hlmSimpleCallback, &b.scratch); + return b.buf.data() + b.size; + } + ); } - ); } return 0;