main.cpp 4.01 KB
Newer Older
1 2 3
#include "pls/internal/scheduling/scheduler.h"
#include "pls/internal/scheduling/parallel_result.h"
#include "pls/internal/scheduling/scheduler_memory.h"
4
#include "pls/internal/helpers/profiler.h"
5
using namespace pls::internal::scheduling;
6 7 8 9

#include <iostream>
#include <complex>
#include <vector>
10 11
#include <atomic>

12
static constexpr int CUTOFF = 16;
13
static constexpr int INPUT_SIZE = 16384;
14 15 16
typedef std::vector<std::complex<double>> complex_vector;

void divide(complex_vector::iterator data, int n) {
17 18 19 20 21 22 23 24 25 26
  complex_vector tmp_odd_elements(n / 2);
  for (int i = 0; i < n / 2; i++) {
    tmp_odd_elements[i] = data[i * 2 + 1];
  }
  for (int i = 0; i < n / 2; i++) {
    data[i] = data[i * 2];
  }
  for (int i = 0; i < n / 2; i++) {
    data[i + n / 2] = tmp_odd_elements[i];
  }
27 28 29
}

void combine(complex_vector::iterator data, int n) {
30 31 32
  for (int i = 0; i < n / 2; i++) {
    std::complex<double> even = data[i];
    std::complex<double> odd = data[i + n / 2];
33

34 35 36 37
    // w is the "twiddle-factor".
    // this could be cached, but we run the same 'data_structures' algorithm parallel/serial,
    // so it won't impact the performance comparison.
    std::complex<double> w = exp(std::complex<double>(0, -2. * M_PI * i / n));
38

39 40 41
    data[i] = even + w * odd;
    data[i + n / 2] = even - w * odd;
  }
42 43
}

44
void fft_normal(complex_vector::iterator data, int n) {
45 46 47
  if (n < 2) {
    return;
  }
48

49
  divide(data, n);
50 51 52 53 54 55 56
  fft_normal(data, n / 2);
  fft_normal(data + n / 2, n / 2);
  combine(data, n);
}

parallel_result<short> fft(complex_vector::iterator data, int n) {
  if (n < 2) {
57
    return parallel_result<short>{0};
58 59 60
  }

  divide(data, n);
61
  if (n <= CUTOFF) {
62 63
    fft_normal(data, n / 2);
    fft_normal(data + n / 2, n / 2);
64 65
    combine(data, n);
    return parallel_result<short>{0};
66
  } else {
67 68 69 70 71 72
    return scheduler::par([=]() {
      return fft(data, n / 2);
    }, [=]() {
      return fft(data + n / 2, n / 2);
    }).then([=](int, int) {
      combine(data, n);
73
      return parallel_result<short>{0};
74
    });
75
  }
76 77 78
}

complex_vector prepare_input(int input_size) {
79 80
  std::vector<double> known_frequencies{2, 11, 52, 88, 256};
  complex_vector data(input_size);
81

82 83 84 85 86 87
  // Set our input data to match a time series of the known_frequencies.
  // When applying fft to this time-series we should find these frequencies.
  for (int i = 0; i < input_size; i++) {
    data[i] = std::complex<double>(0.0, 0.0);
    for (auto frequencie : known_frequencies) {
      data[i] += sin(2 * M_PI * frequencie * i / input_size);
88
    }
89
  }
90

91
  return data;
92 93
}

94 95
static constexpr int NUM_ITERATIONS = 500;
constexpr size_t NUM_THREADS = 2;
96

97
constexpr size_t NUM_TASKS = 128;
98

99 100
constexpr size_t NUM_CONTS = 128;
constexpr size_t MAX_CONT_SIZE = 512;
101

102
int main() {
103
  PROFILE_ENABLE;
104
  complex_vector initial_input = prepare_input(INPUT_SIZE);
105

106 107 108 109 110 111 112 113 114 115 116
  static_scheduler_memory<NUM_THREADS,
                          NUM_TASKS,
                          NUM_CONTS,
                          MAX_CONT_SIZE> static_scheduler_memory;

  scheduler scheduler{static_scheduler_memory, NUM_THREADS};

  auto start = std::chrono::steady_clock::now();
  for (int i = 0; i < NUM_ITERATIONS; i++) {
    complex_vector input_2(initial_input);
    scheduler.perform_work([&]() {
117
      PROFILE_MAIN_THREAD;
118 119 120 121 122
      return scheduler::par([&]() {
        return fft(input_2.begin(), INPUT_SIZE);
      }, []() {
        return parallel_result<int>{0};
      }).then([](int, int) {
123
        return parallel_result<int>{0};
124 125
      });
    });
126
    PROFILE_LOCK("DONE");
127 128 129 130
  }
  auto end = std::chrono::steady_clock::now();
  std::cout << "Framework:  " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count()
            << std::endl;
131
  PROFILE_SAVE("test_profile.prof");
132

133 134 135 136 137 138 139 140
  start = std::chrono::steady_clock::now();
  for (int i = 0; i < NUM_ITERATIONS; i++) {
    complex_vector input_1(initial_input);
    fft_normal(input_1.begin(), INPUT_SIZE);
  }
  end = std::chrono::steady_clock::now();
  std::cout << "Normal:     " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count()
            << std::endl;
141

142
  return 0;
143
}