main.cpp 2.74 KB
Newer Older
1
#include <pls/pls.h>
2
#include <pls/internal/helpers/profiler.h>
3

4
#include <iostream>
5 6
#include <complex>
#include <vector>
7

8 9 10
static constexpr int CUTOFF = 16;
static constexpr int INPUT_SIZE = 2064;
typedef std::vector<std::complex<double>> complex_vector;
11

12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
void divide(complex_vector::iterator data, int n) {
  complex_vector tmp_odd_elements(n / 2);
  for (int i = 0; i < n / 2; i++) {
    tmp_odd_elements[i] = data[i * 2 + 1];
  }
  for (int i = 0; i < n / 2; i++) {
    data[i] = data[i * 2];
  }
  for (int i = 0; i < n / 2; i++) {
    data[i + n / 2] = tmp_odd_elements[i];
  }
}

void combine(complex_vector::iterator data, int n) {
  for (int i = 0; i < n / 2; i++) {
    std::complex<double> even = data[i];
    std::complex<double> odd = data[i + n / 2];

    // w is the "twiddle-factor".
    // this could be cached, but we run the same 'data_structures' algorithm parallel/serial,
    // so it won't impact the performance comparison.
    std::complex<double> w = exp(std::complex<double>(0, -2. * M_PI * i / n));
34

35 36
    data[i] = even + w * odd;
    data[i + n / 2] = even - w * odd;
37
  }
38 39 40 41 42
}

void fft(complex_vector::iterator data, int n) {
  if (n < 2) {
    return;
43 44
  }

45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
  PROFILE_WORK_BLOCK("Divide")
  divide(data, n);
  PROFILE_END_BLOCK
  PROFILE_WORK_BLOCK("Invoke Parallel")
  if (n == CUTOFF) {
    PROFILE_WORK_BLOCK("FFT Serial")
    fft(data, n / 2);
    fft(data + n / 2, n / 2);
  } else if (n <= CUTOFF) {
    fft(data, n / 2);
    fft(data + n / 2, n / 2);
  } else {
    pls::invoke_parallel(
        [&] { fft(data, n / 2); },
        [&] { fft(data + n / 2, n / 2); }
    );
  }
  PROFILE_END_BLOCK
  PROFILE_WORK_BLOCK("Combine")
  combine(data, n);
  PROFILE_END_BLOCK
66 67
}

68 69 70 71 72 73 74 75 76 77 78
complex_vector prepare_input(int input_size) {
  std::vector<double> known_frequencies{2, 11, 52, 88, 256};
  complex_vector data(input_size);

  // Set our input data to match a time series of the known_frequencies.
  // When applying fft to this time-series we should find these frequencies.
  for (int i = 0; i < input_size; i++) {
    data[i] = std::complex<double>(0.0, 0.0);
    for (auto frequencie : known_frequencies) {
      data[i] += sin(2 * M_PI * frequencie * i / input_size);
    }
79 80
  }

81
  return data;
82 83 84
}

int main() {
85
  PROFILE_ENABLE
86
  pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 14};
87 88
  pls::scheduler scheduler{&my_scheduler_memory, 8};

89
  complex_vector initial_input = prepare_input(INPUT_SIZE);
90 91 92 93 94
  scheduler.perform_work([&] {
    PROFILE_MAIN_THREAD
    // Call looks just the same, only requirement is
    // the enclosure in the perform_work lambda.
    for (int i = 0; i < 10; i++) {
95 96 97
      PROFILE_WORK_BLOCK("Top Level FFT")
      complex_vector input = initial_input;
      fft(input.begin(), input.size());
98 99 100 101
    }
  });

  PROFILE_SAVE("test_profile.prof")
102
}