Commit 7a8f320b by FritzFlorian

Add profiling information to benchmarks and matrix_then_fft benchmark.

parent 08bc7855
Pipeline #1588 passed with stages
in 4 minutes 47 seconds
......@@ -54,6 +54,7 @@ add_subdirectory(app/benchmark_matrix)
add_subdirectory(app/benchmark_matrix_div_conquer)
add_subdirectory(app/benchmark_fib)
add_subdirectory(app/context_switch)
add_subdirectory(app/benchmark_matrix_then_fft)
# Add optional tests
option(PACKAGE_TESTS "Build the tests" ON)
......
......@@ -43,6 +43,11 @@ int main(int argc, char **argv) {
pls::scheduler scheduler{(unsigned) settings.num_threads_, MAX_NUM_TASKS, MAX_STACK_SIZE};
if (settings.type_ == benchmark_runner::benchmark_settings::ISOLATED) {
#if PLS_PROFILING_ENABLED
scheduler.get_profiler().disable_memory_measure();
runner.add_custom_stats_field("T_1");
runner.add_custom_stats_field("T_inf");
#endif
printf("Running isolated measurement...\n");
runner.enable_memory_stats();
runner.pre_allocate_stats();
......@@ -53,6 +58,11 @@ int main(int argc, char **argv) {
});
}, [&]() {
fft::fill_input(data); // Reset data before each run
}, [&]() {
#if PLS_PROFILING_ENABLED
runner.store_custom_stat("T_1", scheduler.get_profiler().current_run().t_1_);
runner.store_custom_stat("T_inf", scheduler.get_profiler().current_run().t_inf_);
#endif
});
runner.commit_results(true);
} else {
......
......@@ -34,6 +34,11 @@ int main(int argc, char **argv) {
volatile int res;
if (settings.type_ == benchmark_runner::benchmark_settings::ISOLATED) {
#if PLS_PROFILING_ENABLED
scheduler.get_profiler().disable_memory_measure();
runner.add_custom_stats_field("T_1");
runner.add_custom_stats_field("T_inf");
#endif
printf("Running isolated measurement...\n");
runner.enable_memory_stats();
runner.pre_allocate_stats();
......@@ -42,6 +47,11 @@ int main(int argc, char **argv) {
scheduler.perform_work([&]() {
res = pls_fib(settings.size_);
});
}, [&]() {}, [&]() {
#if PLS_PROFILING_ENABLED
runner.store_custom_stat("T_1", scheduler.get_profiler().current_run().t_1_);
runner.store_custom_stat("T_inf", scheduler.get_profiler().current_run().t_inf_);
#endif
});
runner.commit_results(true);
} else {
......
......@@ -125,6 +125,7 @@ int main(int argc, char **argv) {
// Fill data arrays as needed
a.fill_default_data();
b.fill_default_data();
result.fill_default_data();
// Strain local data
std::vector<std::vector<std::vector<std::unique_ptr<double[]>>>> div_conquer_temp_arrays;
......@@ -158,6 +159,11 @@ int main(int argc, char **argv) {
pls::scheduler scheduler{(unsigned) settings.num_threads_, max_depth + 2, MAX_STACK_SIZE};
if (settings.type_ == benchmark_runner::benchmark_settings::ISOLATED) {
#if PLS_PROFILING_ENABLED
scheduler.get_profiler().disable_memory_measure();
runner.add_custom_stats_field("T_1");
runner.add_custom_stats_field("T_inf");
#endif
printf("Running isolated measurement...\n");
runner.enable_memory_stats();
runner.pre_allocate_stats();
......@@ -166,6 +172,11 @@ int main(int argc, char **argv) {
scheduler.perform_work([&]() {
multiply_div_conquer(div_conquer_temp_arrays, local_indices, size, 0, 0, result, a, b);
});
}, [&]() {}, [&]() {
#if PLS_PROFILING_ENABLED
runner.store_custom_stat("T_1", scheduler.get_profiler().current_run().t_1_);
runner.store_custom_stat("T_inf", scheduler.get_profiler().current_run().t_inf_);
#endif
});
runner.commit_results(true);
} else {
......
add_executable(benchmark_matrix_then_fft_pls_v3 main.cpp)
target_link_libraries(benchmark_matrix_then_fft_pls_v3 pls benchmark_runner benchmark_base)
ADD_DEPENDENCIES(benchmark.pls benchmark_matrix_then_fft_pls_v3)
if (EASY_PROFILER)
target_link_libraries(benchmark_matrix_then_fft_pls_v3 easy_profiler)
endif ()
#include "pls/pls.h"
#include "benchmark_runner.h"
#include "benchmark_base/matrix.h"
#include "benchmark_base/fft.h"
using namespace comparison_benchmarks::base;
void pls_conquer(fft::complex_vector::iterator data, fft::complex_vector::iterator swap_array, int n) {
if (n < 2) {
return;
}
fft::divide(data, swap_array, n);
if (n <= fft::RECURSIVE_CUTOFF) {
fft::conquer(data, swap_array, n / 2);
fft::conquer(data + n / 2, swap_array + n / 2, n / 2);
} else {
pls::spawn([data, n, swap_array]() {
pls_conquer(data, swap_array, n / 2);
});
pls::spawn_and_sync([data, n, swap_array]() {
pls_conquer(data + n / 2, swap_array + n / 2, n / 2);
});
}
fft::combine(data, n);
}
constexpr int MAX_NUM_TASKS = 16;
constexpr int MAX_STACK_SIZE = 4096 * 1;
int main(int argc, char **argv) {
auto settings = benchmark_runner::parse_parameters(argc, argv);
size_t matrix_size = settings.size_;
size_t fft_size = 8192;
string test_name = to_string(settings.num_threads_) + ".csv";
string full_directory = settings.output_directory_ + "/PLS_v3/";
benchmark_runner runner{full_directory, test_name};
pls::scheduler scheduler{(unsigned) settings.num_threads_, MAX_NUM_TASKS, MAX_STACK_SIZE};
// Data Containers
fft::complex_vector fft_data(fft_size);
fft::complex_vector fft_swap_array(fft_size);
fft::fill_input(fft_data);
matrix::matrix<double> matrix_a{settings.size_};
matrix::matrix<double> matrix_b{settings.size_};
matrix::matrix<double> matrix_result{settings.size_};
if (settings.type_ == benchmark_runner::benchmark_settings::ISOLATED) {
#if PLS_PROFILING_ENABLED
scheduler.get_profiler().disable_memory_measure();
runner.add_custom_stats_field("T_1");
runner.add_custom_stats_field("T_inf");
#endif
printf("Running isolated measurement...\n");
runner.enable_memory_stats();
runner.pre_allocate_stats();
runner.run_iterations(settings.iterations_, [&]() {
// Serial Matrix Multiplication
matrix_result.multiply(matrix_a, matrix_b);
// Parallel FFT
scheduler.perform_work([&]() {
pls_conquer(fft_data.begin(), fft_swap_array.begin(), fft_size);
});
}, [&]() {
fft::fill_input(fft_data); // Reset data before each run
}, [&]() {
#if PLS_PROFILING_ENABLED
runner.store_custom_stat("T_1", scheduler.get_profiler().current_run().t_1_);
runner.store_custom_stat("T_inf", scheduler.get_profiler().current_run().t_inf_);
#endif
});
runner.commit_results(true);
} else {
printf("Running periodic measurement...\n");
runner.enable_wall_time_stats();
runner.pre_allocate_stats();
runner.run_periodic(settings.iterations_, settings.interval_period_, settings.interval_deadline_, [&]() {
// Serial Matrix Multiplication
matrix_result.multiply(matrix_a, matrix_b);
// Parallel FFT
scheduler.perform_work([&]() {
pls_conquer(fft_data.begin(), fft_swap_array.begin(), fft_size);
});
// Reset data before each run
fft::fill_input(fft_data);
});
runner.commit_results(true);
}
return 0;
}
......@@ -44,6 +44,11 @@ int main(int argc, char **argv) {
pls::scheduler scheduler{(unsigned) settings.num_threads_, MAX_NUM_TASKS, MAX_STACK_SIZE};
if (settings.type_ == benchmark_runner::benchmark_settings::ISOLATED) {
#if PLS_PROFILING_ENABLED
scheduler.get_profiler().disable_memory_measure();
runner.add_custom_stats_field("T_1");
runner.add_custom_stats_field("T_inf");
#endif
printf("Running isolated measurement...\n");
runner.enable_memory_stats();
runner.pre_allocate_stats();
......@@ -55,6 +60,11 @@ int main(int argc, char **argv) {
unbalanced::Q,
unbalanced::NORMAL_CHILDREN);
});
}, [&]() {}, [&]() {
#if PLS_PROFILING_ENABLED
runner.store_custom_stat("T_1", scheduler.get_profiler().current_run().t_1_);
runner.store_custom_stat("T_inf", scheduler.get_profiler().current_run().t_inf_);
#endif
});
runner.commit_results(true);
} else {
......
......@@ -42,7 +42,10 @@ class benchmark_runner {
unsigned long wall_time_pre_run_;
unsigned long wall_time_post_run_;
map<string, vector<long>> custom_stats_;
const string WALL_TIME_ITERATION_START = "wall_time_iteration_start_us";
const string WALL_TIME_ITERATION_END = "wall_time_iteration_end_us";
map<string, vector<unsigned long>> custom_stats_;
void print_statistics() {
long time_sum = std::accumulate(times_.begin(), times_.end(), 0l);
......@@ -119,9 +122,11 @@ class benchmark_runner {
wall_time_enabled_ = true;
add_custom_stats_field(WALL_TIME_PRE_RUN);
add_custom_stats_field(WALL_TIME_POST_RUN);
add_custom_stats_field(WALL_TIME_ITERATION_START);
add_custom_stats_field(WALL_TIME_ITERATION_END);
}
void pre_allocate_stats(size_t num = 100000) {
void pre_allocate_stats(size_t num = 500000) {
times_.reserve(num);
memset(times_.data(), 'a', num * sizeof(long));
for (auto &iter : custom_stats_) {
......@@ -187,8 +192,7 @@ class benchmark_runner {
}
if (wall_time_enabled_) {
wall_time_pre_run_ =
std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::time_point_cast<std::chrono::microseconds>(
std::chrono::system_clock::now()).time_since_epoch()).count();
chrono::duration_cast<chrono::microseconds>(chrono::system_clock::now().time_since_epoch()).count();
}
last_start_time_ = chrono::steady_clock::now();
......@@ -213,8 +217,8 @@ class benchmark_runner {
}
if (wall_time_enabled_) {
wall_time_post_run_ =
std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::time_point_cast<std::chrono::microseconds>(
std::chrono::system_clock::now()).time_since_epoch()).count();
chrono::duration_cast<chrono::microseconds>(chrono::system_clock::now().time_since_epoch()).count();
custom_stats_[WALL_TIME_PRE_RUN][iteration_index] = wall_time_pre_run_;
custom_stats_[WALL_TIME_POST_RUN][iteration_index] = wall_time_post_run_;
}
......@@ -271,6 +275,7 @@ class benchmark_runner {
add_to_timespec(iteration_start, period_seconds, period_nanoseconds);
size_t current_iteration = 0;
size_t deadline_misses = 0;
while (current_iteration < count) {
// Sleep until the next iteration
long sleep_error = clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &iteration_start, nullptr);
......@@ -295,19 +300,28 @@ class benchmark_runner {
deadline_end = iteration_start;
add_to_timespec(deadline_end, deadline_seconds, deadline_nanoseconds);
// Keep stats...
if (wall_time_enabled_) {
custom_stats_[WALL_TIME_ITERATION_START][current_iteration] =
iteration_start.tv_sec * 1000 * 1000 + iteration_start.tv_nsec / 1000;
custom_stats_[WALL_TIME_ITERATION_END][current_iteration] =
iteration_end.tv_sec * 1000 * 1000 + iteration_end.tv_nsec / 1000;
}
// Store 'actual' wall time instead of iteration time (we want to include sleeping here!)
long wall_time_us = 0;
unsigned long wall_time_us = 0;
wall_time_us += (finish_time.tv_sec - iteration_start.tv_sec) * 1000l * 1000l;
wall_time_us += ((long) finish_time.tv_nsec - (long) iteration_start.tv_nsec) / 1000l;
printf("Difference: %ld\n", wall_time_us - times_[current_iteration]);
long nano_second_difference = ((long) finish_time.tv_nsec - (long) iteration_start.tv_nsec) / 1000l;
wall_time_us += nano_second_difference;
times_[current_iteration] = wall_time_us;
if (finish_time.tv_sec >= deadline_end.tv_sec && finish_time.tv_nsec > deadline_end.tv_nsec) {
printf("Deadline Miss!\n"); // TODO: Remove
deadline_misses++;
}
// Skip iterations if their start time is later than the current time (skipping)
while (finish_time.tv_sec >= iteration_end.tv_sec && finish_time.tv_nsec > iteration_end.tv_nsec) {
while (finish_time.tv_sec > iteration_end.tv_sec
|| (finish_time.tv_sec == iteration_end.tv_sec && finish_time.tv_nsec > iteration_end.tv_nsec)) {
iteration_start = iteration_end;
iteration_end = iteration_start;
......@@ -322,6 +336,8 @@ class benchmark_runner {
current_iteration++;
iteration_start = iteration_end;
}
printf("%ld deadline misses!\n", deadline_misses);
}
void commit_results(bool print_stats) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment