Improve benchmarks

This commit is contained in:
Konstantinos Margaritis 2021-10-03 10:51:31 +00:00
parent fad39b6058
commit 96af3e8613

View File

@ -35,7 +35,7 @@ template<typename InitFunc, typename BenchFunc>
static void run_benchmarks(int size, int loops, int max_matches, bool is_reverse, MicroBenchmark &bench, InitFunc &&init, BenchFunc &&func) { static void run_benchmarks(int size, int loops, int max_matches, bool is_reverse, MicroBenchmark &bench, InitFunc &&init, BenchFunc &&func) {
init(bench); init(bench);
double total_sec = 0.0; double total_sec = 0.0;
u64a transferred_size = 0; u64a total_size = 0;
double bw = 0.0; double bw = 0.0;
double avg_bw = 0.0; double avg_bw = 0.0;
double max_bw = 0.0; double max_bw = 0.0;
@ -46,21 +46,21 @@ static void run_benchmarks(int size, int loops, int max_matches, bool is_reverse
bench.buf[pos] = 'b'; bench.buf[pos] = 'b';
pos = (j+1) *size / max_matches ; pos = (j+1) *size / max_matches ;
bench.buf[pos] = 'a'; bench.buf[pos] = 'a';
unsigned long act_size = 0; u64a actual_size = 0;
auto start = std::chrono::steady_clock::now(); auto start = std::chrono::steady_clock::now();
for(int i = 0; i < loops; i++) { for(int i = 0; i < loops; i++) {
const u8 *res = func(bench); const u8 *res = func(bench);
if (is_reverse) if (is_reverse)
act_size += bench.buf.data() + size - res; actual_size += bench.buf.data() + size - res;
else else
act_size += res - bench.buf.data(); actual_size += res - bench.buf.data();
} }
auto end = std::chrono::steady_clock::now(); auto end = std::chrono::steady_clock::now();
double dt = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count(); double dt = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
total_sec += dt; total_sec += dt;
/*convert microseconds to seconds*/ /*convert microseconds to seconds*/
/*calculate bandwidth*/ /*calculate bandwidth*/
bw = (act_size / dt) * 1000000.0 / 1048576.0; bw = (actual_size / dt) * 1000000.0 / 1048576.0;
/*std::cout << "act_size = " << act_size << std::endl; /*std::cout << "act_size = " << act_size << std::endl;
std::cout << "dt = " << dt << std::endl; std::cout << "dt = " << dt << std::endl;
std::cout << "bw = " << bw << std::endl;*/ std::cout << "bw = " << bw << std::endl;*/
@ -85,105 +85,112 @@ static void run_benchmarks(int size, int loops, int max_matches, bool is_reverse
auto end = std::chrono::steady_clock::now(); auto end = std::chrono::steady_clock::now();
total_sec += std::chrono::duration_cast<std::chrono::microseconds>(end - start).count(); total_sec += std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
/*calculate transferred size*/ /*calculate transferred size*/
transferred_size = size * loops; total_size = size * loops;
/*calculate average time*/ /*calculate average time*/
avg_time = total_sec / loops; avg_time = total_sec / loops;
/*convert microseconds to seconds*/ /*convert microseconds to seconds*/
total_sec /= 1000000.0; total_sec /= 1000000.0;
/*calculate maximum bandwidth*/ /*calculate maximum bandwidth*/
max_bw = transferred_size / total_sec; max_bw = total_size / total_sec;
/*convert to MB/s*/ /*convert to MB/s*/
max_bw /= 1048576.0; max_bw /= 1048576.0;
printf(KMAG "%s: no matches, %u * %u iterations," KBLU " total elapsed time =" RST " %.3f s, " printf(KMAG "%s: no matches, %u * %u iterations," KBLU " total elapsed time =" RST " %.3f s, "
KBLU "average time per call =" RST " %.3f μs ," KBLU " bandwidth = " RST " %.3f MB/s \n", KBLU "average time per call =" RST " %.3f μs ," KBLU " bandwidth = " RST " %.3f MB/s \n",
bench.label, size ,loops, total_sec, avg_time, max_bw); bench.label, size ,loops, total_sec, avg_time, max_bw );
} }
} }
int main(){ int main(){
int matches[] = {0, MAX_MATCHES};
std::vector<size_t> sizes; std::vector<size_t> sizes;
for (size_t i = 0; i < N; i++) sizes.push_back(16000 << i*2); for (size_t i = 0; i < N; i++) sizes.push_back(16000 << i*2);
const char charset[] = "aAaAaAaAAAaaaaAAAAaaaaAAAAAAaaaAAaaa"; const char charset[] = "aAaAaAaAAAaaaaAAAAaaaaAAAAAAaaaAAaaa";
for (size_t i = 0; i < std::size(sizes); i++) { for (int m = 0; m < 2; m++) {
MicroBenchmark bench("Shufti", sizes[i]); for (size_t i = 0; i < std::size(sizes); i++) {
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], MAX_MATCHES, false, bench, MicroBenchmark bench("Shufti", sizes[i]);
[&](MicroBenchmark &b) { run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench,
b.chars.set('a'); [&](MicroBenchmark &b) {
ue2::shuftiBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi); b.chars.set('a');
memset(b.buf.data(), 'b', b.size); ue2::shuftiBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
}, memset(b.buf.data(), 'b', b.size);
[&](MicroBenchmark &b) { },
return shuftiExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size); [&](MicroBenchmark &b) {
}); return shuftiExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size);
} }
);
for (size_t i = 0; i < std::size(sizes); i++) { }
MicroBenchmark bench("Reverse Shufti", sizes[i]);
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], MAX_MATCHES, true, bench, for (size_t i = 0; i < std::size(sizes); i++) {
[&](MicroBenchmark &b) { MicroBenchmark bench("Reverse Shufti", sizes[i]);
b.chars.set('a'); run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench,
ue2::shuftiBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi); [&](MicroBenchmark &b) {
memset(b.buf.data(), 'b', b.size); b.chars.set('a');
}, ue2::shuftiBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
[&](MicroBenchmark &b) { memset(b.buf.data(), 'b', b.size);
return rshuftiExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size); },
}); [&](MicroBenchmark &b) {
} return rshuftiExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size);
}
for (size_t i = 0; i < std::size(sizes); i++) { );
MicroBenchmark bench("Truffle", sizes[i]); }
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], MAX_MATCHES, false, bench,
[&](MicroBenchmark &b) { for (size_t i = 0; i < std::size(sizes); i++) {
b.chars.set('a'); MicroBenchmark bench("Truffle", sizes[i]);
ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi); run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench,
memset(b.buf.data(), 'b', b.size); [&](MicroBenchmark &b) {
}, b.chars.set('a');
[&](MicroBenchmark &b) { ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
return truffleExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size); memset(b.buf.data(), 'b', b.size);
}); },
} [&](MicroBenchmark &b) {
return truffleExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size);
for (size_t i = 0; i < std::size(sizes); i++) { }
MicroBenchmark bench("Reverse Truffle", sizes[i]); );
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], MAX_MATCHES, true, bench, }
[&](MicroBenchmark &b) {
b.chars.set('a'); for (size_t i = 0; i < std::size(sizes); i++) {
ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi); MicroBenchmark bench("Reverse Truffle", sizes[i]);
memset(b.buf.data(), 'b', b.size); run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench,
}, [&](MicroBenchmark &b) {
[&](MicroBenchmark &b) { b.chars.set('a');
return rtruffleExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size); ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
}); memset(b.buf.data(), 'b', b.size);
} },
[&](MicroBenchmark &b) {
for (size_t i = 0; i < std::size(sizes); i++) { return rtruffleExec(b.lo, b.hi, b.buf.data(), b.buf.data() + b.size);
//we imitate the noodle unit tests }
std::string str; );
const size_t char_len = 5; }
str.resize(char_len + 1);
for (size_t j=0; j < char_len; j++) { for (size_t i = 0; i < std::size(sizes); i++) {
srand (time(NULL)); //we imitate the noodle unit tests
int key = rand() % + 36 ; std::string str;
str[char_len] = charset[key]; const size_t char_len = 5;
str[char_len + 1] = '\0'; str.resize(char_len + 1);
} for (size_t j=0; j < char_len; j++) {
srand (time(NULL));
MicroBenchmark bench("Noodle", sizes[i]); int key = rand() % + 36 ;
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], MAX_MATCHES, false, bench, str[char_len] = charset[key];
[&](MicroBenchmark &b) { str[char_len + 1] = '\0';
ctxt.clear(); }
memset(b.buf.data(), 'a', b.size);
u32 id = 1000; MicroBenchmark bench("Noodle", sizes[i]);
ue2::hwlmLiteral lit(str, true, id); run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench,
b.nt = ue2::noodBuildTable(lit); [&](MicroBenchmark &b) {
assert(b.nt != nullptr); ctxt.clear();
}, memset(b.buf.data(), 'a', b.size);
[&](MicroBenchmark &b) { u32 id = 1000;
noodExec(b.nt.get(), b.buf.data(), b.size, 0, hlmSimpleCallback, &b.scratch); ue2::hwlmLiteral lit(str, true, id);
return b.buf.data() + b.size; b.nt = ue2::noodBuildTable(lit);
assert(b.nt != nullptr);
},
[&](MicroBenchmark &b) {
noodExec(b.nt.get(), b.buf.data(), b.size, 0, hlmSimpleCallback, &b.scratch);
return b.buf.data() + b.size;
}
);
} }
);
} }
return 0; return 0;