diff --git a/app/benchmark_fft/main.cpp b/app/benchmark_fft/main.cpp index a78a0ce..db22fee 100644 --- a/app/benchmark_fft/main.cpp +++ b/app/benchmark_fft/main.cpp @@ -39,6 +39,7 @@ int main(int argc, char **argv) { string full_directory = directory + "/PLS_v3/"; benchmark_runner runner{full_directory, test_name}; runner.enable_memory_stats(); + runner.pre_allocate_stats(); fft::complex_vector data(fft::SIZE); fft::complex_vector swap_array(fft::SIZE); @@ -47,12 +48,12 @@ int main(int argc, char **argv) { pls::scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE}; // scheduler.get_profiler().disable_memory_measure(); - runner.run_iterations(10, [&]() { + runner.run_iterations(fft::NUM_ITERATIONS, [&]() { scheduler.perform_work([&]() { pls_conquer(data.begin(), swap_array.begin(), fft::SIZE);; }); // scheduler.get_profiler().current_run().print_stats(); - }, 1, [&]() { + }, fft::NUM_WARMUP_ITERATIONS, [&]() { fft::fill_input(data); // Reset data before each run }); // scheduler.get_profiler().current_run().print_dag(std::cout); diff --git a/app/benchmark_fib/main.cpp b/app/benchmark_fib/main.cpp index 3df5218..ae5a2e4 100644 --- a/app/benchmark_fib/main.cpp +++ b/app/benchmark_fib/main.cpp @@ -37,6 +37,7 @@ int main(int argc, char **argv) { string full_directory = directory + "/PLS_v3/"; benchmark_runner runner{full_directory, test_name}; runner.enable_memory_stats(); + runner.pre_allocate_stats(); pls::scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE}; diff --git a/app/benchmark_matrix/main.cpp b/app/benchmark_matrix/main.cpp index 4ce92a8..ba02904 100644 --- a/app/benchmark_matrix/main.cpp +++ b/app/benchmark_matrix/main.cpp @@ -32,6 +32,7 @@ int main(int argc, char **argv) { string full_directory = directory + "/PLS_v3/"; benchmark_runner runner{full_directory, test_name}; runner.enable_memory_stats(); + runner.pre_allocate_stats(); pls_matrix a; pls_matrix b; diff --git a/app/benchmark_matrix_div_conquer/main.cpp b/app/benchmark_matrix_div_conquer/main.cpp index 8532657..8bf0e75 100644 --- a/app/benchmark_matrix_div_conquer/main.cpp +++ b/app/benchmark_matrix_div_conquer/main.cpp @@ -113,6 +113,7 @@ int main(int argc, char **argv) { string full_directory = directory + "/PLS_v3/"; benchmark_runner runner{full_directory, test_name}; runner.enable_memory_stats(); + runner.pre_allocate_stats(); // Only run on one version to avoid copy std::unique_ptr result_data{new double[size * size]}; diff --git a/app/benchmark_unbalanced/main.cpp b/app/benchmark_unbalanced/main.cpp index 7e151f9..82a981f 100644 --- a/app/benchmark_unbalanced/main.cpp +++ b/app/benchmark_unbalanced/main.cpp @@ -43,6 +43,7 @@ int main(int argc, char **argv) { string full_directory = directory + "/PLS_v3/"; benchmark_runner runner{full_directory, test_name}; runner.enable_memory_stats(); + runner.pre_allocate_stats(); scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE}; diff --git a/extern/benchmark_base/include/benchmark_base/fft.h b/extern/benchmark_base/include/benchmark_base/fft.h index d1c4c9d..77c1bcd 100644 --- a/extern/benchmark_base/include/benchmark_base/fft.h +++ b/extern/benchmark_base/include/benchmark_base/fft.h @@ -11,7 +11,7 @@ namespace fft { const int SIZE = 8192; const int NUM_ITERATIONS = 1000; -const int NUM_WARMUP_ITERATIONS = 100; +const int NUM_WARMUP_ITERATIONS = 0; const int RECURSIVE_CUTOFF = 32; typedef std::vector> complex_vector; diff --git a/extern/benchmark_runner/benchmark_runner.h b/extern/benchmark_runner/benchmark_runner.h index fc61b6a..f2c0cab 100644 --- a/extern/benchmark_runner/benchmark_runner.h +++ b/extern/benchmark_runner/benchmark_runner.h @@ -108,6 +108,15 @@ class benchmark_runner { add_custom_stats_field(MEMORY_POST_RUN); } + void pre_allocate_stats(size_t num = 100000) { + times_.reserve(num); + memset(times_.data(), 'a', num * sizeof(long)); + for (auto &iter : custom_stats_) { + iter.second.reserve(num); + memset(iter.second.data(), 'a', num * sizeof(long)); + } + } + static void read_args(int argc, char **argv, int &num_threads, string &path) { if (argc < 3) { cout << "Must Specifiy concurrency and output directory! (usage: `benchmark `)"