Commit 0b4580ee by FritzFlorian

Show worst case runtime in mini benchmark.

parent 5eef2e0e
Pipeline #1270 passed with stages
in 4 minutes 0 seconds
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
#include <pls/internal/helpers/profiler.h> #include <pls/internal/helpers/profiler.h>
#include <pls/internal/helpers/mini_benchmark.h> #include <pls/internal/helpers/mini_benchmark.h>
#include <chrono>
const int MATRIX_SIZE = 128; const int MATRIX_SIZE = 128;
template<typename T, int SIZE> template<typename T, int SIZE>
...@@ -58,8 +60,8 @@ int main() { ...@@ -58,8 +60,8 @@ int main() {
//int main() { //int main() {
// PROFILE_ENABLE // PROFILE_ENABLE
// pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 18}; // pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 18u};
// pls::scheduler scheduler{&my_scheduler_memory, 8}; // pls::scheduler scheduler{&my_scheduler_memory, 4};
// //
// matrix<double, MATRIX_SIZE> a; // matrix<double, MATRIX_SIZE> a;
// matrix<double, MATRIX_SIZE> b; // matrix<double, MATRIX_SIZE> b;
...@@ -67,13 +69,17 @@ int main() { ...@@ -67,13 +69,17 @@ int main() {
// fill_with_data(a, b); // fill_with_data(a, b);
// //
// scheduler.perform_work([&] { // scheduler.perform_work([&] {
// auto start_time = std::chrono::high_resolution_clock::now();
// PROFILE_MAIN_THREAD // PROFILE_MAIN_THREAD
// for (int i = 0; i < 5000; i++) { // for (int i = 0; i < 10000; i++) {
// PROFILE_WORK_BLOCK("Top Level") // PROFILE_WORK_BLOCK("Top Level")
// result.multiply(a, b); // result.multiply(a, b);
// } // }
// auto end_time = std::chrono::high_resolution_clock::now();
// long time = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count();
// std::cout << "Runtime: " << time << "us" << std::endl;
// }); // });
// //
// PROFILE_SAVE("test_profile.prof") // PROFILE_SAVE("test_profile.prof")
//} //}
//
...@@ -14,36 +14,46 @@ namespace helpers { ...@@ -14,36 +14,46 @@ namespace helpers {
// TODO: Clean up (separate into small functions and .cpp file) // TODO: Clean up (separate into small functions and .cpp file)
template<typename Function> template<typename Function>
void run_mini_benchmark(const Function &lambda, size_t max_threads, unsigned long max_runtime_ms = 1000) { void run_mini_benchmark(const Function &lambda,
size_t max_threads,
unsigned long max_runtime_ms = 1000,
unsigned long warmup_time_ms = 100) {
using namespace std; using namespace std;
using namespace pls::internal::scheduling; using namespace pls::internal::scheduling;
malloc_scheduler_memory scheduler_memory{max_threads, 2 << 12}; malloc_scheduler_memory scheduler_memory{max_threads, 2u << 14};
for (unsigned int num_threads = 1; num_threads <= max_threads; num_threads++) { for (unsigned int num_threads = 1; num_threads <= max_threads; num_threads++) {
scheduler local_scheduler{&scheduler_memory, num_threads}; scheduler local_scheduler{&scheduler_memory, num_threads};
chrono::high_resolution_clock::time_point start_time; chrono::high_resolution_clock::time_point start_time;
chrono::high_resolution_clock::time_point end_time; chrono::high_resolution_clock::time_point end_time;
long max_local_time = 0; long max_local_time = 0;
unsigned long iterations = 0; long total_time = 0;
long iterations = 0;
local_scheduler.perform_work([&] { local_scheduler.perform_work([&] {
start_time = chrono::high_resolution_clock::now(); start_time = chrono::high_resolution_clock::now();
end_time = start_time; end_time = start_time;
chrono::high_resolution_clock::time_point planned_end_time = start_time + chrono::milliseconds(max_runtime_ms); chrono::high_resolution_clock::time_point planned_end_time = start_time + chrono::milliseconds(max_runtime_ms);
chrono::high_resolution_clock::time_point planned_warmup_time = start_time + chrono::milliseconds(warmup_time_ms);
while (end_time < planned_end_time) { while (end_time < planned_end_time) {
auto local_start_time = chrono::high_resolution_clock::now(); if (end_time < planned_warmup_time) {
lambda(); lambda();
auto local_end_time = chrono::high_resolution_clock::now(); } else {
long local_time = chrono::duration_cast<chrono::microseconds>(local_end_time - local_start_time).count(); auto local_start_time = chrono::high_resolution_clock::now();
max_local_time = std::max(local_time, max_local_time); lambda();
auto local_end_time = chrono::high_resolution_clock::now();
long local_time = chrono::duration_cast<chrono::microseconds>(local_end_time - local_start_time).count();
total_time += local_time;
max_local_time = std::max(local_time, max_local_time);
iterations++;
}
end_time = chrono::high_resolution_clock::now(); end_time = chrono::high_resolution_clock::now();
iterations++;
} }
}); });
double time_per_iteration = (double) total_time / iterations;
long time = chrono::duration_cast<chrono::microseconds>(end_time - start_time).count();
double time_per_iteration = (double) time / iterations;
std::cout << (long) time_per_iteration << " (" << max_local_time << ")"; std::cout << (long) time_per_iteration << " (" << max_local_time << ")";
if (num_threads < max_threads) { if (num_threads < max_threads) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment