#include #include #include #include #include static constexpr int CUTOFF = 16; static constexpr int INPUT_SIZE = 8192; typedef std::vector> complex_vector; void divide(complex_vector::iterator data, int n) { complex_vector tmp_odd_elements(n / 2); for (int i = 0; i < n / 2; i++) { tmp_odd_elements[i] = data[i * 2 + 1]; } for (int i = 0; i < n / 2; i++) { data[i] = data[i * 2]; } for (int i = 0; i < n / 2; i++) { data[i + n / 2] = tmp_odd_elements[i]; } } void combine(complex_vector::iterator data, int n) { for (int i = 0; i < n / 2; i++) { std::complex even = data[i]; std::complex odd = data[i + n / 2]; // w is the "twiddle-factor". // this could be cached, but we run the same 'data_structures' algorithm parallel/serial, // so it won't impact the performance comparison. std::complex w = exp(std::complex(0, -2. * M_PI * i / n)); data[i] = even + w * odd; data[i + n / 2] = even - w * odd; } } void fft(complex_vector::iterator data, int n) { if (n < 2) { return; } PROFILE_WORK_BLOCK("Divide") divide(data, n); PROFILE_END_BLOCK PROFILE_WORK_BLOCK("Invoke Parallel") if (n == CUTOFF) { PROFILE_WORK_BLOCK("FFT Serial") fft(data, n / 2); fft(data + n / 2, n / 2); } else if (n <= CUTOFF) { fft(data, n / 2); fft(data + n / 2, n / 2); } else { pls::invoke( [n, &data] { fft(data, n / 2); }, [n, &data] { fft(data + n / 2, n / 2); } ); } PROFILE_END_BLOCK PROFILE_WORK_BLOCK("Combine") combine(data, n); PROFILE_END_BLOCK } complex_vector prepare_input(int input_size) { std::vector known_frequencies{2, 11, 52, 88, 256}; complex_vector data(input_size); // Set our input data to match a time series of the known_frequencies. // When applying fft to this time-series we should find these frequencies. for (int i = 0; i < input_size; i++) { data[i] = std::complex(0.0, 0.0); for (auto frequencie : known_frequencies) { data[i] += sin(2 * M_PI * frequencie * i / input_size); } } return data; } int main() { PROFILE_ENABLE pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 14}; pls::scheduler scheduler{&my_scheduler_memory, 8}; complex_vector initial_input = prepare_input(INPUT_SIZE); scheduler.perform_work([&] { PROFILE_MAIN_THREAD // Call looks just the same, only requirement is // the enclosure in the perform_work lambda. for (int i = 0; i < 10; i++) { PROFILE_WORK_BLOCK("Top Level FFT") complex_vector input = initial_input; fft(input.begin(), input.size()); } }); PROFILE_SAVE("test_profile.prof") }