Commit 0b4580ee by FritzFlorian

Show worst case runtime in mini benchmark.

parent 5eef2e0e
Pipeline #1270 passed with stages
in 4 minutes 0 seconds
......@@ -2,6 +2,8 @@
#include <pls/internal/helpers/profiler.h>
#include <pls/internal/helpers/mini_benchmark.h>
#include <chrono>
const int MATRIX_SIZE = 128;
template<typename T, int SIZE>
......@@ -58,8 +60,8 @@ int main() {
//int main() {
// PROFILE_ENABLE
// pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 18};
// pls::scheduler scheduler{&my_scheduler_memory, 8};
// pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 18u};
// pls::scheduler scheduler{&my_scheduler_memory, 4};
//
// matrix<double, MATRIX_SIZE> a;
// matrix<double, MATRIX_SIZE> b;
......@@ -67,13 +69,17 @@ int main() {
// fill_with_data(a, b);
//
// scheduler.perform_work([&] {
// auto start_time = std::chrono::high_resolution_clock::now();
// PROFILE_MAIN_THREAD
// for (int i = 0; i < 5000; i++) {
// for (int i = 0; i < 10000; i++) {
// PROFILE_WORK_BLOCK("Top Level")
// result.multiply(a, b);
// }
// auto end_time = std::chrono::high_resolution_clock::now();
// long time = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count();
// std::cout << "Runtime: " << time << "us" << std::endl;
// });
//
// PROFILE_SAVE("test_profile.prof")
//}
//
......@@ -14,36 +14,46 @@ namespace helpers {
// TODO: Clean up (separate into small functions and .cpp file)
template<typename Function>
void run_mini_benchmark(const Function &lambda, size_t max_threads, unsigned long max_runtime_ms = 1000) {
void run_mini_benchmark(const Function &lambda,
size_t max_threads,
unsigned long max_runtime_ms = 1000,
unsigned long warmup_time_ms = 100) {
using namespace std;
using namespace pls::internal::scheduling;
malloc_scheduler_memory scheduler_memory{max_threads, 2 << 12};
malloc_scheduler_memory scheduler_memory{max_threads, 2u << 14};
for (unsigned int num_threads = 1; num_threads <= max_threads; num_threads++) {
scheduler local_scheduler{&scheduler_memory, num_threads};
chrono::high_resolution_clock::time_point start_time;
chrono::high_resolution_clock::time_point end_time;
long max_local_time = 0;
unsigned long iterations = 0;
long total_time = 0;
long iterations = 0;
local_scheduler.perform_work([&] {
start_time = chrono::high_resolution_clock::now();
end_time = start_time;
chrono::high_resolution_clock::time_point planned_end_time = start_time + chrono::milliseconds(max_runtime_ms);
chrono::high_resolution_clock::time_point planned_warmup_time = start_time + chrono::milliseconds(warmup_time_ms);
while (end_time < planned_end_time) {
auto local_start_time = chrono::high_resolution_clock::now();
lambda();
auto local_end_time = chrono::high_resolution_clock::now();
long local_time = chrono::duration_cast<chrono::microseconds>(local_end_time - local_start_time).count();
max_local_time = std::max(local_time, max_local_time);
if (end_time < planned_warmup_time) {
lambda();
} else {
auto local_start_time = chrono::high_resolution_clock::now();
lambda();
auto local_end_time = chrono::high_resolution_clock::now();
long local_time = chrono::duration_cast<chrono::microseconds>(local_end_time - local_start_time).count();
total_time += local_time;
max_local_time = std::max(local_time, max_local_time);
iterations++;
}
end_time = chrono::high_resolution_clock::now();
iterations++;
}
});
long time = chrono::duration_cast<chrono::microseconds>(end_time - start_time).count();
double time_per_iteration = (double) time / iterations;
double time_per_iteration = (double) total_time / iterations;
std::cout << (long) time_per_iteration << " (" << max_local_time << ")";
if (num_threads < max_threads) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment