main.cpp 3.06 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
#include "pls/pls.h"

#include "benchmark_runner.h"
#include "benchmark_base/matrix.h"
#include "benchmark_base/fft.h"

using namespace comparison_benchmarks::base;

void pls_conquer(fft::complex_vector::iterator data, fft::complex_vector::iterator swap_array, int n) {
  if (n < 2) {
    return;
  }

  fft::divide(data, swap_array, n);
  if (n <= fft::RECURSIVE_CUTOFF) {
    fft::conquer(data, swap_array, n / 2);
    fft::conquer(data + n / 2, swap_array + n / 2, n / 2);
  } else {
    pls::spawn([data, n, swap_array]() {
      pls_conquer(data, swap_array, n / 2);
    });
    pls::spawn_and_sync([data, n, swap_array]() {
      pls_conquer(data + n / 2, swap_array + n / 2, n / 2);
    });
  }

  fft::combine(data, n);
}

constexpr int MAX_NUM_TASKS = 16;
constexpr int MAX_STACK_SIZE = 4096 * 1;

int main(int argc, char **argv) {
  auto settings = benchmark_runner::parse_parameters(argc, argv);
  size_t matrix_size = settings.size_;
  size_t fft_size = 8192;

  string test_name = to_string(settings.num_threads_) + ".csv";
  string full_directory = settings.output_directory_ + "/PLS_v3/";
  benchmark_runner runner{full_directory, test_name};

  pls::scheduler scheduler{(unsigned) settings.num_threads_, MAX_NUM_TASKS, MAX_STACK_SIZE};

  // Data Containers
  fft::complex_vector fft_data(fft_size);
  fft::complex_vector fft_swap_array(fft_size);
  fft::fill_input(fft_data);
FritzFlorian committed
48 49 50
  matrix::matrix<double> matrix_a{matrix_size};
  matrix::matrix<double> matrix_b{matrix_size};
  matrix::matrix<double> matrix_result{matrix_size};
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97

  if (settings.type_ == benchmark_runner::benchmark_settings::ISOLATED) {
#if PLS_PROFILING_ENABLED
    scheduler.get_profiler().disable_memory_measure();
    runner.add_custom_stats_field("T_1");
    runner.add_custom_stats_field("T_inf");
#endif
    printf("Running isolated measurement...\n");
    runner.enable_memory_stats();
    runner.pre_allocate_stats();

    runner.run_iterations(settings.iterations_, [&]() {
      // Serial Matrix Multiplication
      matrix_result.multiply(matrix_a, matrix_b);
      // Parallel FFT
      scheduler.perform_work([&]() {
        pls_conquer(fft_data.begin(), fft_swap_array.begin(), fft_size);
      });
    }, [&]() {
      fft::fill_input(fft_data); // Reset data before each run
    }, [&]() {
#if PLS_PROFILING_ENABLED
      runner.store_custom_stat("T_1", scheduler.get_profiler().current_run().t_1_);
      runner.store_custom_stat("T_inf", scheduler.get_profiler().current_run().t_inf_);
#endif
    });
    runner.commit_results(true);
  } else {
    printf("Running periodic measurement...\n");
    runner.enable_wall_time_stats();
    runner.pre_allocate_stats();

    runner.run_periodic(settings.iterations_, settings.interval_period_, settings.interval_deadline_, [&]() {
      // Serial Matrix Multiplication
      matrix_result.multiply(matrix_a, matrix_b);
      // Parallel FFT
      scheduler.perform_work([&]() {
        pls_conquer(fft_data.begin(), fft_swap_array.begin(), fft_size);
      });
      // Reset data before each run
      fft::fill_input(fft_data);
    });
    runner.commit_results(true);
  }

  return 0;
}