From e2f584c480dd398170258480d957808ae4808e6c Mon Sep 17 00:00:00 2001 From: FritzFlorian Date: Thu, 11 Jun 2020 22:07:37 +0200 Subject: [PATCH] Allow flexible problem sizes and periodic execution for benchmarks. --- app/benchmark_fft/main.cpp | 53 +++++++++++++++++++++++++++++++---------------------- app/benchmark_fib/main.cpp | 51 ++++++++++++++++++++++++++++++--------------------- app/benchmark_matrix/main.cpp | 50 ++++++++++++++++++++++++++++++-------------------- app/benchmark_matrix_div_conquer/main.cpp | 50 +++++++++++++++++++++++++++++++------------------- app/benchmark_unbalanced/main.cpp | 80 +++++++++++++++++++++++++++++++++++++++----------------------------------------- extern/benchmark_runner/benchmark_runner.h | 167 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------- 6 files changed, 315 insertions(+), 136 deletions(-) diff --git a/app/benchmark_fft/main.cpp b/app/benchmark_fft/main.cpp index db22fee..50f4419 100644 --- a/app/benchmark_fft/main.cpp +++ b/app/benchmark_fft/main.cpp @@ -31,35 +31,44 @@ constexpr int MAX_NUM_TASKS = 16; constexpr int MAX_STACK_SIZE = 4096 * 1; int main(int argc, char **argv) { - int num_threads; - string directory; - benchmark_runner::read_args(argc, argv, num_threads, directory); + auto settings = benchmark_runner::parse_parameters(argc, argv); - string test_name = to_string(num_threads) + ".csv"; - string full_directory = directory + "/PLS_v3/"; + fft::complex_vector data(settings.size_); + fft::complex_vector swap_array(settings.size_); + fft::fill_input(data); + + string test_name = to_string(settings.num_threads_) + ".csv"; + string full_directory = settings.output_directory_ + "/PLS_v3/"; benchmark_runner runner{full_directory, test_name}; - runner.enable_memory_stats(); - runner.pre_allocate_stats(); - fft::complex_vector data(fft::SIZE); - fft::complex_vector swap_array(fft::SIZE); - fft::fill_input(data); + pls::scheduler scheduler{(unsigned) settings.num_threads_, MAX_NUM_TASKS, MAX_STACK_SIZE}; - pls::scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE}; + if (settings.type_ == benchmark_runner::benchmark_settings::ISOLATED) { + printf("Running isolated measurement...\n"); + runner.enable_memory_stats(); + runner.pre_allocate_stats(); -// scheduler.get_profiler().disable_memory_measure(); - runner.run_iterations(fft::NUM_ITERATIONS, [&]() { - scheduler.perform_work([&]() { - pls_conquer(data.begin(), swap_array.begin(), fft::SIZE);; + runner.run_iterations(settings.iterations_, [&]() { + scheduler.perform_work([&]() { + pls_conquer(data.begin(), swap_array.begin(), settings.size_);; + }); + }, [&]() { + fft::fill_input(data); // Reset data before each run }); -// scheduler.get_profiler().current_run().print_stats(); - }, fft::NUM_WARMUP_ITERATIONS, [&]() { - fft::fill_input(data); // Reset data before each run - }); -// scheduler.get_profiler().current_run().print_dag(std::cout); -// scheduler.get_profiler().current_run().print_stats(); + runner.commit_results(true); + } else { + printf("Running periodic measurement...\n"); + runner.enable_wall_time_stats(); + runner.pre_allocate_stats(); - runner.commit_results(true); + runner.run_periodic(settings.iterations_, settings.interval_period_, settings.interval_deadline_, [&]() { + scheduler.perform_work([&]() { + pls_conquer(data.begin(), swap_array.begin(), settings.size_);; + }); + fft::fill_input(data); // Reset data before each run + }); + runner.commit_results(true); + } return 0; } diff --git a/app/benchmark_fib/main.cpp b/app/benchmark_fib/main.cpp index ae5a2e4..6e3452b 100644 --- a/app/benchmark_fib/main.cpp +++ b/app/benchmark_fib/main.cpp @@ -8,7 +8,7 @@ using namespace comparison_benchmarks::base; constexpr int MAX_NUM_TASKS = 32; constexpr int MAX_STACK_SIZE = 4096 * 1; -int pls_fib(int n, int d) { +int pls_fib(int n) { if (n == 0) { return 0; } @@ -17,11 +17,11 @@ int pls_fib(int n, int d) { } int a, b; - pls::spawn([n, d, &a]() { - a = pls_fib(n - 1, d + 1); + pls::spawn([n, &a]() { + a = pls_fib(n - 1); }); - pls::spawn([n, d, &b]() { - b = pls_fib(n - 2, d + 1); + pls::spawn([n, &b]() { + b = pls_fib(n - 2); }); pls::sync(); @@ -29,29 +29,38 @@ int pls_fib(int n, int d) { } int main(int argc, char **argv) { - int num_threads; - string directory; - benchmark_runner::read_args(argc, argv, num_threads, directory); + auto settings = benchmark_runner::parse_parameters(argc, argv); - string test_name = to_string(num_threads) + ".csv"; - string full_directory = directory + "/PLS_v3/"; + string test_name = to_string(settings.num_threads_) + ".csv"; + string full_directory = settings.output_directory_ + "/PLS_v3/"; benchmark_runner runner{full_directory, test_name}; - runner.enable_memory_stats(); - runner.pre_allocate_stats(); - pls::scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE}; + pls::scheduler scheduler{(unsigned) settings.num_threads_, MAX_NUM_TASKS, MAX_STACK_SIZE}; volatile int res; -// scheduler.get_profiler().disable_memory_measure(); - runner.run_iterations(fib::NUM_ITERATIONS, [&]() { - scheduler.perform_work([&]() { - res = pls_fib(fib::INPUT_N, 0); + if (settings.type_ == benchmark_runner::benchmark_settings::ISOLATED) { + printf("Running isolated measurement...\n"); + runner.enable_memory_stats(); + runner.pre_allocate_stats(); + + runner.run_iterations(settings.iterations_, [&]() { + scheduler.perform_work([&]() { + res = pls_fib(settings.size_); + }); }); - }, fib::NUM_WARMUP_ITERATIONS); -// scheduler.get_profiler().current_run().print_dag(std::cout); -// scheduler.get_profiler().current_run().print_stats(); + runner.commit_results(true); + } else { + printf("Running periodic measurement...\n"); + runner.enable_wall_time_stats(); + runner.pre_allocate_stats(); - runner.commit_results(true); + runner.run_periodic(settings.iterations_, settings.interval_period_, settings.interval_deadline_, [&]() { + scheduler.perform_work([&]() { + res = pls_fib(settings.size_); + }); + }); + runner.commit_results(true); + } return 0; } diff --git a/app/benchmark_matrix/main.cpp b/app/benchmark_matrix/main.cpp index 95cba2f..4ff4075 100644 --- a/app/benchmark_matrix/main.cpp +++ b/app/benchmark_matrix/main.cpp @@ -20,34 +20,44 @@ class pls_matrix : public matrix::matrix { } }; -constexpr int MAX_NUM_TASKS = 32; +constexpr int MAX_NUM_TASKS = 10; constexpr int MAX_STACK_SIZE = 4096 * 1; int main(int argc, char **argv) { - int num_threads; - string directory; - benchmark_runner::read_args(argc, argv, num_threads, directory); + auto settings = benchmark_runner::parse_parameters(argc, argv); - string test_name = to_string(num_threads) + ".csv"; - string full_directory = directory + "/PLS_v3/"; + pls_matrix a{settings.size_}; + pls_matrix b{settings.size_}; + pls_matrix result{settings.size_}; + + string test_name = to_string(settings.num_threads_) + ".csv"; + string full_directory = settings.output_directory_ + "/PLS_v3/"; benchmark_runner runner{full_directory, test_name}; - runner.enable_memory_stats(); - runner.pre_allocate_stats(); - pls_matrix a{matrix::MATRIX_SIZE}; - pls_matrix b{matrix::MATRIX_SIZE}; - pls_matrix result{matrix::MATRIX_SIZE}; + pls::scheduler scheduler{(unsigned) settings.num_threads_, MAX_NUM_TASKS, MAX_STACK_SIZE}; - scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE}; + if (settings.type_ == benchmark_runner::benchmark_settings::ISOLATED) { + printf("Running isolated measurement...\n"); + runner.enable_memory_stats(); + runner.pre_allocate_stats(); -// scheduler.get_profiler().disable_memory_measure(); - runner.run_iterations(matrix::NUM_ITERATIONS, [&]() { - scheduler.perform_work([&]() { - result.multiply(a, b); + runner.run_iterations(settings.iterations_, [&]() { + scheduler.perform_work([&]() { + result.multiply(a, b); + }); + }); + runner.commit_results(true); + } else { + printf("Running periodic measurement...\n"); + runner.enable_wall_time_stats(); + runner.pre_allocate_stats(); + + runner.run_periodic(settings.iterations_, settings.interval_period_, settings.interval_deadline_, [&]() { + scheduler.perform_work([&]() { + result.multiply(a, b); + }); }); - }, matrix::WARMUP_ITERATIONS); -// scheduler.get_profiler().current_run().print_dag(std::cout); -// scheduler.get_profiler().current_run().print_stats(); + runner.commit_results(true); + } - runner.commit_results(true); } diff --git a/app/benchmark_matrix_div_conquer/main.cpp b/app/benchmark_matrix_div_conquer/main.cpp index 8bf0e75..7c91ab0 100644 --- a/app/benchmark_matrix_div_conquer/main.cpp +++ b/app/benchmark_matrix_div_conquer/main.cpp @@ -99,21 +99,13 @@ void multiply_div_conquer(const std::vector result_data{new double[size * size]}; @@ -145,16 +137,36 @@ int main(int argc, char **argv) { max_depth++; remaining_size = remaining_size / 2; } - pls::strain_local_resource local_indices{(unsigned) num_threads, (unsigned) max_depth}; + pls::strain_local_resource local_indices{(unsigned) settings.num_threads_, (unsigned) max_depth}; + + string test_name = to_string(settings.num_threads_) + ".csv"; + string full_directory = settings.output_directory_ + "/PLS_v3/"; + benchmark_runner runner{full_directory, test_name}; - scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE}; + pls::scheduler scheduler{(unsigned) settings.num_threads_, MAX_NUM_TASKS, MAX_STACK_SIZE}; - runner.run_iterations(1, [&]() { - scheduler.perform_work([&]() { - multiply_div_conquer(div_conquer_temp_arrays, local_indices, size, 0, result, a, b); + if (settings.type_ == benchmark_runner::benchmark_settings::ISOLATED) { + printf("Running isolated measurement...\n"); + runner.enable_memory_stats(); + runner.pre_allocate_stats(); + + runner.run_iterations(settings.iterations_, [&]() { + scheduler.perform_work([&]() { + multiply_div_conquer(div_conquer_temp_arrays, local_indices, size, 0, result, a, b); + }); + }); + runner.commit_results(true); + } else { + printf("Running periodic measurement...\n"); + runner.enable_wall_time_stats(); + runner.pre_allocate_stats(); + + runner.run_periodic(settings.iterations_, settings.interval_period_, settings.interval_deadline_, [&]() { + scheduler.perform_work([&]() { + multiply_div_conquer(div_conquer_temp_arrays, local_indices, size, 0, result, a, b); + }); }); - }, 0); - runner.commit_results(true); + runner.commit_results(true); + } - scheduler.terminate(); } diff --git a/app/benchmark_unbalanced/main.cpp b/app/benchmark_unbalanced/main.cpp index 82a981f..d4936ac 100644 --- a/app/benchmark_unbalanced/main.cpp +++ b/app/benchmark_unbalanced/main.cpp @@ -1,4 +1,4 @@ -#include "pls/internal/scheduling/scheduler.h" +#include "pls/pls.h" using namespace pls::internal::scheduling; @@ -16,12 +16,12 @@ int count_child_nodes(unbalanced::node &node) { std::atomic count{1}; for (int i = 0; i < node.get_num_children(); i++) { - scheduler::spawn([i, &count, &node] { + pls::spawn([i, &count, &node] { unbalanced::node child_node = node.spawn_child_node(i); count.fetch_add(count_child_nodes(child_node)); }); } - scheduler::sync(); + pls::sync(); return count; } @@ -32,49 +32,47 @@ int unbalanced_tree_search(int seed, int root_children, double q, int normal_chi } constexpr int MAX_NUM_TASKS = 256; -constexpr int MAX_STACK_SIZE = 1024 * 2; +constexpr int MAX_STACK_SIZE = 4096 * 1; int main(int argc, char **argv) { - int num_threads; - string directory; - benchmark_runner::read_args(argc, argv, num_threads, directory); + auto settings = benchmark_runner::parse_parameters(argc, argv); - string test_name = to_string(num_threads) + ".csv"; - string full_directory = directory + "/PLS_v3/"; + string test_name = to_string(settings.num_threads_) + ".csv"; + string full_directory = settings.output_directory_ + "/PLS_v3/"; benchmark_runner runner{full_directory, test_name}; - runner.enable_memory_stats(); - runner.pre_allocate_stats(); - - scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE}; - -// scheduler.get_profiler().disable_memory_measure(); - runner.run_iterations(1, [&]() { - scheduler.perform_work([&]() { - unbalanced_tree_search(unbalanced::SEED, - unbalanced::ROOT_CHILDREN, - unbalanced::Q, - unbalanced::NORMAL_CHILDREN); + + pls::scheduler scheduler{(unsigned) settings.num_threads_, MAX_NUM_TASKS, MAX_STACK_SIZE}; + + if (settings.type_ == benchmark_runner::benchmark_settings::ISOLATED) { + printf("Running isolated measurement...\n"); + runner.enable_memory_stats(); + runner.pre_allocate_stats(); + + runner.run_iterations(settings.iterations_, [&]() { + scheduler.perform_work([&]() { + unbalanced_tree_search(unbalanced::SEED, + unbalanced::ROOT_CHILDREN, + unbalanced::Q, + unbalanced::NORMAL_CHILDREN); + }); }); - }, 0); -// scheduler.get_profiler().current_run().print_dag(std::cout); -// scheduler.get_profiler().current_run().print_stats(); - runner.commit_results(true); + runner.commit_results(true); + } else { + printf("Running periodic measurement...\n"); + runner.enable_wall_time_stats(); + runner.pre_allocate_stats(); + + runner.run_periodic(settings.iterations_, settings.interval_period_, settings.interval_deadline_, [&]() { + scheduler.perform_work([&]() { + unbalanced_tree_search(unbalanced::SEED, + unbalanced::ROOT_CHILDREN, + unbalanced::Q, + unbalanced::NORMAL_CHILDREN); + }); + }); + runner.commit_results(true); + } } -//int main() { -// PROFILE_ENABLE -// pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 18}; -// pls::scheduler scheduler{&my_scheduler_memory, 8}; -// -// scheduler.perform_work([&] { -// PROFILE_MAIN_THREAD -// for (int i = 0; i < 50; i++) { -// PROFILE_WORK_BLOCK("Top Level") -// int result = unbalanced_tree_search(SEED, ROOT_CHILDREN, Q, NORMAL_CHILDREN); -// std::cout << result << std::endl; -// } -// }); -// -// PROFILE_SAVE("test_profile.prof") -//} + diff --git a/extern/benchmark_runner/benchmark_runner.h b/extern/benchmark_runner/benchmark_runner.h index f2c0cab..1a89990 100644 --- a/extern/benchmark_runner/benchmark_runner.h +++ b/extern/benchmark_runner/benchmark_runner.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -35,6 +36,12 @@ class benchmark_runner { unsigned long memory_pre_run_; unsigned long memory_post_run_; + bool wall_time_enabled_{false}; + const string WALL_TIME_PRE_RUN = "wall_time_pre_run_us"; + const string WALL_TIME_POST_RUN = "wall_time_post_run_us"; + unsigned long wall_time_pre_run_; + unsigned long wall_time_post_run_; + map> custom_stats_; void print_statistics() { @@ -108,6 +115,12 @@ class benchmark_runner { add_custom_stats_field(MEMORY_POST_RUN); } + void enable_wall_time_stats() { + wall_time_enabled_ = true; + add_custom_stats_field(WALL_TIME_PRE_RUN); + add_custom_stats_field(WALL_TIME_POST_RUN); + } + void pre_allocate_stats(size_t num = 100000) { times_.reserve(num); memset(times_.data(), 'a', num * sizeof(long)); @@ -117,16 +130,54 @@ class benchmark_runner { } } - static void read_args(int argc, char **argv, int &num_threads, string &path) { - if (argc < 3) { - cout << "Must Specifiy concurrency and output directory! (usage: `benchmark `)" - << endl; + struct benchmark_settings { + enum TYPE { ISOLATED, PERIODIC }; + + string output_directory_; + size_t size_; + unsigned num_threads_; + + TYPE type_; + + size_t iterations_; + + unsigned long interval_period_; + unsigned long interval_deadline_; + }; + + static benchmark_settings parse_parameters(int argc, char **argv) { + benchmark_settings result; + string tmp; + + if (argc != 5 && argc != 7) { + printf("usage 1: `benchmark `\n"); + printf("usage 2: `benchmark `\n"); exit(1); } - string tmp = argv[1]; - path = tmp; - num_threads = atoi(argv[2]); + result.output_directory_ = argv[1]; + tmp = argv[2]; + result.size_ = std::stoi(tmp); + tmp = argv[3]; + result.num_threads_ = std::stoi(tmp); + + if (argc == 5) { + result.type_ = benchmark_settings::ISOLATED; + + tmp = argv[4]; + result.iterations_ = std::stoi(tmp); + } else { + result.type_ = benchmark_settings::PERIODIC; + + tmp = argv[4]; + result.iterations_ = std::stoi(tmp); + tmp = argv[5]; + result.interval_period_ = std::stoi(tmp); + tmp = argv[6]; + result.interval_deadline_ = std::stoi(tmp); + } + + return result; } void start_iteration() { @@ -134,6 +185,11 @@ class benchmark_runner { auto memory_stats = query_process_memory_pages(); memory_pre_run_ = memory_stats.first; } + if (wall_time_enabled_) { + wall_time_pre_run_ = + std::chrono::duration_cast(std::chrono::time_point_cast( + std::chrono::system_clock::now()).time_since_epoch()).count(); + } last_start_time_ = chrono::steady_clock::now(); } @@ -155,6 +211,13 @@ class benchmark_runner { custom_stats_[MEMORY_PRE_RUN][iteration_index] = memory_pre_run_; custom_stats_[MEMORY_POST_RUN][iteration_index] = memory_post_run_; } + if (wall_time_enabled_) { + wall_time_post_run_ = + std::chrono::duration_cast(std::chrono::time_point_cast( + std::chrono::system_clock::now()).time_since_epoch()).count(); + custom_stats_[WALL_TIME_PRE_RUN][iteration_index] = wall_time_pre_run_; + custom_stats_[WALL_TIME_POST_RUN][iteration_index] = wall_time_post_run_; + } } void store_custom_stat(const string &name, long value) { @@ -164,14 +227,8 @@ class benchmark_runner { void run_iterations(int count, const function measure, - int warmup_count, const function prepare = []() {}, const function finish = []() {}) { - for (int i = 0; i < warmup_count; i++) { - prepare(); - measure(); - } - for (int i = 0; i < count; i++) { using namespace std::literals; this_thread::sleep_for(100us); @@ -183,6 +240,90 @@ class benchmark_runner { } } + void add_to_timespec(timespec ×pec, size_t seconds, size_t nanoseconds) { + timespec.tv_sec += seconds; + timespec.tv_nsec += nanoseconds; + + while (timespec.tv_nsec > 1000l * 1000l * 1000l) { + timespec.tv_nsec -= 1000l * 1000l * 1000l; + timespec.tv_sec++; + } + } + + void run_periodic(size_t count, + size_t period_us, + size_t deadline_us, + const function measure) { + size_t period_nanoseconds = 1000lu * period_us; + size_t period_seconds = period_nanoseconds / (1000lu * 1000lu * 1000lu); + period_nanoseconds = period_nanoseconds - period_seconds * 1000lu * 1000lu * 1000lu; + + size_t deadline_nanoseconds = 1000lu * deadline_us; + size_t deadline_seconds = deadline_nanoseconds / (1000lu * 1000lu * 1000lu); + deadline_nanoseconds = deadline_nanoseconds - deadline_seconds * 1000lu * 1000lu * 1000lu; + + // Prepare basic time spec for first iteration + timespec iteration_start, iteration_end, deadline_end, finish_time; + if (clock_gettime(CLOCK_MONOTONIC, &iteration_start) == -1) { + perror("clock_gettime"); + exit(1); + } + add_to_timespec(iteration_start, period_seconds, period_nanoseconds); + + size_t current_iteration = 0; + while (current_iteration < count) { + // Sleep until the next iteration + long sleep_error = clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &iteration_start, nullptr); + if (sleep_error) { + printf("Sleep Error %ld\n", sleep_error); + } + + // Invoke iteration + start_iteration(); + measure(); + end_iteration(); + + // Calculate all relevant time points for this iteration + if (clock_gettime(CLOCK_MONOTONIC, &finish_time) == -1) { + perror("clock_gettime"); + exit(1); + } + + iteration_end = iteration_start; + add_to_timespec(iteration_end, period_seconds, period_nanoseconds); + + deadline_end = iteration_start; + add_to_timespec(deadline_end, deadline_seconds, deadline_nanoseconds); + + // Store 'actual' wall time instead of iteration time (we want to include sleeping here!) + long wall_time_us = 0; + wall_time_us += (finish_time.tv_sec - iteration_start.tv_sec) * 1000l * 1000l; + wall_time_us += ((long) finish_time.tv_nsec - (long) iteration_start.tv_nsec) / 1000l; + printf("Difference: %d\n", wall_time_us - times_[current_iteration]); + times_[current_iteration] = wall_time_us; + + if (finish_time.tv_sec >= deadline_end.tv_sec && finish_time.tv_nsec > deadline_end.tv_nsec) { + printf("Deadline Miss!\n"); // TODO: Remove + } + + // Skip iterations if their start time is later than the current time (skipping) + while (finish_time.tv_sec >= iteration_end.tv_sec && finish_time.tv_nsec > iteration_end.tv_nsec) { + iteration_start = iteration_end; + + iteration_end = iteration_start; + add_to_timespec(iteration_end, period_seconds, period_nanoseconds); + + current_iteration++; + start_iteration(); + end_iteration(); + times_[current_iteration] = 0; + } + // Progress to next iteration (normally) + current_iteration++; + iteration_start = iteration_end; + } + } + void commit_results(bool print_stats) { if (print_stats) { print_statistics(); -- libgit2 0.26.0