main.cpp 2.78 KB
Newer Older
1
#include <pls/pls.h>
2
#include <pls/internal/helpers/profiler.h>
3

4
#include <iostream>
5 6
#include <complex>
#include <vector>
7

8 9 10 11
static constexpr int CUTOFF = 16;
static constexpr int NUM_ITERATIONS = 1000;
static constexpr int INPUT_SIZE = 2064;
typedef std::vector<std::complex<double>> complex_vector;
12

13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
void divide(complex_vector::iterator data, int n) {
  complex_vector tmp_odd_elements(n / 2);
  for (int i = 0; i < n / 2; i++) {
    tmp_odd_elements[i] = data[i * 2 + 1];
  }
  for (int i = 0; i < n / 2; i++) {
    data[i] = data[i * 2];
  }
  for (int i = 0; i < n / 2; i++) {
    data[i + n / 2] = tmp_odd_elements[i];
  }
}

void combine(complex_vector::iterator data, int n) {
  for (int i = 0; i < n / 2; i++) {
    std::complex<double> even = data[i];
    std::complex<double> odd = data[i + n / 2];

    // w is the "twiddle-factor".
    // this could be cached, but we run the same 'data_structures' algorithm parallel/serial,
    // so it won't impact the performance comparison.
    std::complex<double> w = exp(std::complex<double>(0, -2. * M_PI * i / n));
35

36 37
    data[i] = even + w * odd;
    data[i + n / 2] = even - w * odd;
38
  }
39 40 41 42 43
}

void fft(complex_vector::iterator data, int n) {
  if (n < 2) {
    return;
44 45
  }

46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
  PROFILE_WORK_BLOCK("Divide")
  divide(data, n);
  PROFILE_END_BLOCK
  PROFILE_WORK_BLOCK("Invoke Parallel")
  if (n == CUTOFF) {
    PROFILE_WORK_BLOCK("FFT Serial")
    fft(data, n / 2);
    fft(data + n / 2, n / 2);
  } else if (n <= CUTOFF) {
    fft(data, n / 2);
    fft(data + n / 2, n / 2);
  } else {
    pls::invoke_parallel(
        [&] { fft(data, n / 2); },
        [&] { fft(data + n / 2, n / 2); }
    );
  }
  PROFILE_END_BLOCK
  PROFILE_WORK_BLOCK("Combine")
  combine(data, n);
  PROFILE_END_BLOCK
67 68
}

69 70 71 72 73 74 75 76 77 78 79
complex_vector prepare_input(int input_size) {
  std::vector<double> known_frequencies{2, 11, 52, 88, 256};
  complex_vector data(input_size);

  // Set our input data to match a time series of the known_frequencies.
  // When applying fft to this time-series we should find these frequencies.
  for (int i = 0; i < input_size; i++) {
    data[i] = std::complex<double>(0.0, 0.0);
    for (auto frequencie : known_frequencies) {
      data[i] += sin(2 * M_PI * frequencie * i / input_size);
    }
80 81
  }

82
  return data;
83 84 85
}

int main() {
86
  PROFILE_ENABLE
87
  pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 14};
88 89
  pls::scheduler scheduler{&my_scheduler_memory, 8};

90
  complex_vector initial_input = prepare_input(INPUT_SIZE);
91 92 93 94 95
  scheduler.perform_work([&] {
    PROFILE_MAIN_THREAD
    // Call looks just the same, only requirement is
    // the enclosure in the perform_work lambda.
    for (int i = 0; i < 10; i++) {
96 97 98
      PROFILE_WORK_BLOCK("Top Level FFT")
      complex_vector input = initial_input;
      fft(input.begin(), input.size());
99 100 101 102
    }
  });

  PROFILE_SAVE("test_profile.prof")
103
}