main.cpp 7.43 KB
Newer Older
1 2
#include "pls/pls.h"
using namespace pls;
3 4

#include "benchmark_runner.h"
5 6 7
#include "benchmark_base/matrix_div_conquer.h"

using namespace comparison_benchmarks::base;
8 9 10

#include <memory>
#include <array>
11
#include <vector>
12

13 14 15 16 17 18 19
void multiply_div_conquer(const std::vector<std::vector<std::vector<std::unique_ptr<double[]>>>> &tmp_arrays,
                          pls::strain_local_resource &local_indices,
                          size_t size,
                          size_t depth,
                          matrix_div_conquer::blocked_matrix_view &result,
                          matrix_div_conquer::blocked_matrix_view &a,
                          matrix_div_conquer::blocked_matrix_view &b) {
20 21 22 23 24 25

  if (size <= 8) {
    multiply_naive(size, result, a, b);
    return;
  }
  // Temporary storage required for the intermediate results
26 27 28 29 30 31 32 33 34
  auto strain_local_index = local_indices.get_item(depth);
  std::unique_ptr<double[]> const &data_1_1_a = tmp_arrays[depth][strain_local_index.get_strain_index()][0];
  std::unique_ptr<double[]> const &data_1_1_b = tmp_arrays[depth][strain_local_index.get_strain_index()][1];
  std::unique_ptr<double[]> const &data_1_2_a = tmp_arrays[depth][strain_local_index.get_strain_index()][2];
  std::unique_ptr<double[]> const &data_1_2_b = tmp_arrays[depth][strain_local_index.get_strain_index()][3];
  std::unique_ptr<double[]> const &data_2_1_a = tmp_arrays[depth][strain_local_index.get_strain_index()][4];
  std::unique_ptr<double[]> const &data_2_1_b = tmp_arrays[depth][strain_local_index.get_strain_index()][5];
  std::unique_ptr<double[]> const &data_2_2_a = tmp_arrays[depth][strain_local_index.get_strain_index()][6];
  std::unique_ptr<double[]> const &data_2_2_b = tmp_arrays[depth][strain_local_index.get_strain_index()][7];
35 36

  // Handles to sub-matrices used
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
  matrix_div_conquer::blocked_matrix_view result_1_1 = result.quadrant_1_1();
  matrix_div_conquer::blocked_matrix_view result_1_2 = result.quadrant_1_2();
  matrix_div_conquer::blocked_matrix_view result_2_1 = result.quadrant_2_1();
  matrix_div_conquer::blocked_matrix_view result_2_2 = result.quadrant_2_2();

  matrix_div_conquer::blocked_matrix_view result_1_1_a{data_1_1_a.get(), size / 2};
  matrix_div_conquer::blocked_matrix_view result_1_1_b{data_1_1_b.get(), size / 2};
  matrix_div_conquer::blocked_matrix_view result_1_2_a{data_1_2_a.get(), size / 2};
  matrix_div_conquer::blocked_matrix_view result_1_2_b{data_1_2_b.get(), size / 2};
  matrix_div_conquer::blocked_matrix_view result_2_1_a{data_2_1_a.get(), size / 2};
  matrix_div_conquer::blocked_matrix_view result_2_1_b{data_2_1_b.get(), size / 2};
  matrix_div_conquer::blocked_matrix_view result_2_2_a{data_2_2_a.get(), size / 2};
  matrix_div_conquer::blocked_matrix_view result_2_2_b{data_2_2_b.get(), size / 2};

  matrix_div_conquer::blocked_matrix_view a_1_1 = a.quadrant_1_1();
  matrix_div_conquer::blocked_matrix_view a_1_2 = a.quadrant_1_2();
  matrix_div_conquer::blocked_matrix_view a_2_1 = a.quadrant_2_1();
  matrix_div_conquer::blocked_matrix_view a_2_2 = a.quadrant_2_2();

  matrix_div_conquer::blocked_matrix_view b_1_1 = b.quadrant_1_1();
  matrix_div_conquer::blocked_matrix_view b_1_2 = b.quadrant_1_2();
  matrix_div_conquer::blocked_matrix_view b_2_1 = b.quadrant_2_1();
  matrix_div_conquer::blocked_matrix_view b_2_2 = b.quadrant_2_2();
60 61

  // Divide Work Into Sub-Calls
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
  pls::spawn(
      [&]() { multiply_div_conquer(tmp_arrays, local_indices, size / 2, depth + 1, result_1_1_a, a_1_1, b_1_1); }
  );
  pls::spawn(
      [&]() { multiply_div_conquer(tmp_arrays, local_indices, size / 2, depth + 1, result_1_1_b, a_1_2, b_2_1); }
  );

  pls::spawn(
      [&]() { multiply_div_conquer(tmp_arrays, local_indices, size / 2, depth + 1, result_1_2_a, a_1_1, b_1_2); }
  );
  pls::spawn(
      [&]() { multiply_div_conquer(tmp_arrays, local_indices, size / 2, depth + 1, result_1_2_b, a_1_2, b_2_2); }
  );

  pls::spawn(
      [&]() { multiply_div_conquer(tmp_arrays, local_indices, size / 2, depth + 1, result_2_1_a, a_2_1, b_1_1); }
  );
  pls::spawn(
      [&]() { multiply_div_conquer(tmp_arrays, local_indices, size / 2, depth + 1, result_2_1_b, a_2_2, b_2_1); }
  );

  pls::spawn(
      [&]() { multiply_div_conquer(tmp_arrays, local_indices, size / 2, depth + 1, result_2_2_a, a_2_1, b_1_2); }
  );
  pls::spawn(
      [&]() { multiply_div_conquer(tmp_arrays, local_indices, size / 2, depth + 1, result_2_2_b, a_2_2, b_2_2); }
  );

  pls::sync();
91 92

  // Combine results
93 94 95 96 97 98
  for (size_t i = 0; i < (size / 2) * (size / 2); i++) {
    // The layout is not important here, ass all have the same order, so just sum element wise
    result_1_1.get_data()[i] = result_1_1_a.get_data()[i] + result_1_1_b.get_data()[i];
    result_1_2.get_data()[i] = result_1_2_a.get_data()[i] + result_1_2_b.get_data()[i];
    result_2_1.get_data()[i] = result_2_1_a.get_data()[i] + result_2_1_b.get_data()[i];
    result_2_2.get_data()[i] = result_2_2_a.get_data()[i] + result_2_2_b.get_data()[i];
99 100 101
  }
}

102
constexpr int MAX_NUM_TASKS = 16;
103
constexpr int MAX_STACK_SIZE = 4096 * 2;
104 105

int main(int argc, char **argv) {
106 107
  auto settings = benchmark_runner::parse_parameters(argc, argv);
  const size_t size = settings.size_;
108

109 110 111

  // Only run on one version to avoid copy
  std::unique_ptr<double[]> result_data{new double[size * size]};
112 113 114
  std::unique_ptr<double[]> a_data{new double[size * size]};
  std::unique_ptr<double[]> b_data{new double[size * size]};

115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
  matrix_div_conquer::blocked_matrix_view a{a_data.get(), size};
  matrix_div_conquer::blocked_matrix_view b{b_data.get(), size};
  matrix_div_conquer::blocked_matrix_view result{result_data.get(), size};

  // Fill data arrays as needed
  a.fill_default_data();
  b.fill_default_data();
  matrix_div_conquer::fill_block_lookup(size);

  // Strain local data
  std::vector<std::vector<std::vector<std::unique_ptr<double[]>>>> div_conquer_temp_arrays;
  size_t max_depth = 0;
  size_t remaining_size = size;
  while (remaining_size > 1) {
    auto &depth_buffers = div_conquer_temp_arrays.emplace_back();
    for (int thread_id = 0; thread_id < 8; thread_id++) {
      auto &depth_thread_buffers = depth_buffers.emplace_back();
      for (int i = 0; i < 8; i++) {
        depth_thread_buffers.emplace_back(new double[(remaining_size / 2) * (remaining_size / 2)]);
      }
135
    }
136 137 138

    max_depth++;
    remaining_size = remaining_size / 2;
139
  }
140 141 142 143 144
  pls::strain_local_resource local_indices{(unsigned) settings.num_threads_, (unsigned) max_depth};

  string test_name = to_string(settings.num_threads_) + ".csv";
  string full_directory = settings.output_directory_ + "/PLS_v3/";
  benchmark_runner runner{full_directory, test_name};
145

146
  pls::scheduler scheduler{(unsigned) settings.num_threads_, MAX_NUM_TASKS, MAX_STACK_SIZE};
147

148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
  if (settings.type_ == benchmark_runner::benchmark_settings::ISOLATED) {
    printf("Running isolated measurement...\n");
    runner.enable_memory_stats();
    runner.pre_allocate_stats();

    runner.run_iterations(settings.iterations_, [&]() {
      scheduler.perform_work([&]() {
        multiply_div_conquer(div_conquer_temp_arrays, local_indices, size, 0, result, a, b);
      });
    });
    runner.commit_results(true);
  } else {
    printf("Running periodic measurement...\n");
    runner.enable_wall_time_stats();
    runner.pre_allocate_stats();

    runner.run_periodic(settings.iterations_, settings.interval_period_, settings.interval_deadline_, [&]() {
      scheduler.perform_work([&]() {
        multiply_div_conquer(div_conquer_temp_arrays, local_indices, size, 0, result, a, b);
      });
168
    });
169 170
    runner.commit_results(true);
  }
171

172
}