#include #include #include #include #include #include static constexpr int CUTOFF = 10; static constexpr int NUM_ITERATIONS = 1000; static constexpr int INPUT_SIZE = 2064; typedef std::vector> complex_vector; void divide(complex_vector::iterator data, int n) { complex_vector tmp_odd_elements(n / 2); for (int i = 0; i < n / 2; i++) { tmp_odd_elements[i] = data[i * 2 + 1]; } for (int i = 0; i < n / 2; i++) { data[i] = data[i * 2]; } for (int i = 0; i < n / 2; i++) { data[i + n / 2] = tmp_odd_elements[i]; } } void combine(complex_vector::iterator data, int n) { for (int i = 0; i < n / 2; i++) { std::complex even = data[i]; std::complex odd = data[i + n / 2]; // w is the "twiddle-factor". // this could be cached, but we run the same 'base' algorithm parallel/serial, // so it won't impact the performance comparison. std::complex w = exp(std::complex(0, -2. * M_PI * i / n)); data[i] = even + w * odd; data[i + n / 2] = even - w * odd; } } void fft(complex_vector::iterator data, int n) { if (n < 2) { return; } divide(data, n); if (n <= CUTOFF) { fft(data, n / 2); fft(data + n / 2, n / 2); } else { pls::invoke_parallel( [&] { fft(data, n / 2); }, [&] { fft(data + n / 2, n / 2); } ); } combine(data, n); } complex_vector prepare_input(int input_size) { std::vector known_frequencies{2, 11, 52, 88, 256}; complex_vector data(input_size); // Set our input data to match a time series of the known_frequencies. // When applying fft to this time-series we should find these frequencies. for (int i = 0; i < input_size; i++) { data[i] = std::complex(0.0, 0.0); for (auto frequencie : known_frequencies) { data[i] += sin(2 * M_PI * frequencie * i / input_size); } } return data; } int main() { PROFILE_ENABLE complex_vector initial_input = prepare_input(INPUT_SIZE); pls::internal::helpers::run_mini_benchmark([&] { complex_vector input = initial_input; fft(input.begin(), input.size()); }, 8); PROFILE_SAVE("test_profile.prof") }