diff --git a/app/benchmark_fft/main.cpp b/app/benchmark_fft/main.cpp index 5a35e16..cbd5fb9 100644 --- a/app/benchmark_fft/main.cpp +++ b/app/benchmark_fft/main.cpp @@ -1,33 +1,28 @@ #include "pls/internal/scheduling/scheduler.h" #include "pls/internal/scheduling/static_scheduler_memory.h" -#include "pls/internal/helpers/profiler.h" using namespace pls::internal::scheduling; -#include -#include -#include - #include "benchmark_runner.h" #include "benchmark_base/fft.h" using namespace comparison_benchmarks::base; -void conquer(fft::complex_vector::iterator data, int n) { +void pls_conquer(fft::complex_vector::iterator data, fft::complex_vector::iterator swap_array, int n) { if (n < 2) { return; } - fft::divide(data, n); + fft::divide(data, swap_array, n); if (n <= fft::RECURSIVE_CUTOFF) { - fft::conquer(data, n / 2); - fft::conquer(data + n / 2, n / 2); + fft::conquer(data, swap_array, n / 2); + fft::conquer(data + n / 2, swap_array + n / 2, n / 2); } else { - scheduler::spawn([data, n]() { - conquer(data, n / 2); + scheduler::spawn([data, n, swap_array]() { + pls_conquer(data, swap_array, n / 2); }); - scheduler::spawn([data, n]() { - conquer(data + n / 2, n / 2); + scheduler::spawn([data, n, swap_array]() { + pls_conquer(data + n / 2, swap_array + n / 2, n / 2); }); scheduler::sync(); } @@ -37,11 +32,7 @@ void conquer(fft::complex_vector::iterator data, int n) { constexpr int MAX_NUM_THREADS = 8; constexpr int MAX_NUM_TASKS = 32; -constexpr int MAX_STACK_SIZE = 1024 * 32; - -static_scheduler_memory global_scheduler_memory; +constexpr int MAX_STACK_SIZE = 1024 * 8; int main(int argc, char **argv) { int num_threads; @@ -53,12 +44,16 @@ int main(int argc, char **argv) { benchmark_runner runner{full_directory, test_name}; fft::complex_vector data = fft::generate_input(); + fft::complex_vector swap_array(data.size()); + static_scheduler_memory global_scheduler_memory; scheduler scheduler{global_scheduler_memory, (unsigned) num_threads}; runner.run_iterations(fft::NUM_ITERATIONS, [&]() { scheduler.perform_work([&]() { - conquer(data.begin(), fft::SIZE);; + pls_conquer(data.begin(), swap_array.begin(), fft::SIZE);; }); }, fft::NUM_WARMUP_ITERATIONS); runner.commit_results(true); diff --git a/extern/benchmark_base/include/benchmark_base/fft.h b/extern/benchmark_base/include/benchmark_base/fft.h index f778b43..c7be893 100644 --- a/extern/benchmark_base/include/benchmark_base/fft.h +++ b/extern/benchmark_base/include/benchmark_base/fft.h @@ -18,8 +18,8 @@ typedef std::vector> complex_vector; complex_vector generate_input(); -void divide(complex_vector::iterator data, int n); -void conquer(complex_vector::iterator data, int n); +void divide(complex_vector::iterator data, complex_vector::iterator swap_array, int n); +void conquer(complex_vector::iterator data, complex_vector::iterator swap_array, int n); void combine(complex_vector::iterator data, int n); } diff --git a/extern/benchmark_base/src/fft.cpp b/extern/benchmark_base/src/fft.cpp index c85c0b3..95f6a64 100644 --- a/extern/benchmark_base/src/fft.cpp +++ b/extern/benchmark_base/src/fft.cpp @@ -19,8 +19,7 @@ complex_vector generate_input() { return data; } -void divide(complex_vector::iterator data, int n) { - complex_vector tmp_odd_elements(n / 2); +void divide(complex_vector::iterator data, complex_vector::iterator tmp_odd_elements, int n) { for (int i = 0; i < n / 2; i++) { tmp_odd_elements[i] = data[i * 2 + 1]; } @@ -47,14 +46,14 @@ void combine(complex_vector::iterator data, int n) { } } -void conquer(complex_vector::iterator data, int n) { +void conquer(complex_vector::iterator data, complex_vector::iterator swap_array, int n) { if (n < 2) { return; } - divide(data, n); - conquer(data, n / 2); - conquer(data + n / 2, n / 2); + divide(data, swap_array, n); + conquer(data, swap_array, n / 2); + conquer(data + n / 2, swap_array + n / 2, n / 2); combine(data, n); }