Fix matrix multiplication benchmark for new scheduler.

7796022f · FritzFlorian · 01596ff3 · 7796022f · 7796022f · 7796022f
Commit 7796022f authored Jan 30, 2020 by FritzFlorian
Showing with 51 additions and 67 deletions

app/benchmark_matrix/main.cpp
+17 -35

lib/pls/include/pls/algorithms/for_each.h
+4 -6

lib/pls/include/pls/algorithms/for_each_impl.h
+18 -14

lib/pls/include/pls/internal/helpers/range.h
+12 -12

No files found.
--- a/app/benchmark_matrix/main.cpp
+++ b/app/benchmark_matrix/main.cpp
 #include "pls/internal/scheduling/scheduler.h"
-#include "pls/internal/scheduling/parallel_result.h"
-#include "pls/internal/scheduling/scheduler_memory.h"
+#include "pls/internal/scheduling/static_scheduler_memory.h"
 #include "pls/algorithms/for_each.h"

 using namespace pls::internal::scheduling;
@@ -15,17 +14,20 @@ class pls_matrix : public matrix::matrix<T, SIZE> {
 public:
  pls_matrix() : matrix::matrix<T, SIZE>() {}

-  parallel_result<int> pls_multiply(const matrix::matrix<T, SIZE> &a, const matrix::matrix<T, SIZE> &b) {
-    return pls::algorithm::for_each_range(0, SIZE, [this, &a, &b](int i) {
+  void pls_multiply(const matrix::matrix<T, SIZE> &a, const matrix::matrix<T, SIZE> &b) {
+    pls::algorithm::for_each_range(0, SIZE, [this, &a, &b](int i) {
      this->multiply_column(i, a, b);
    });
  }
 };

-constexpr size_t MAX_NUM_THREADS = 8;
-constexpr size_t MAX_NUM_TASKS = 32;
-constexpr size_t MAX_NUM_CONTS = 32;
-constexpr size_t MAX_CONT_SIZE = 512;
+constexpr int MAX_NUM_THREADS = 8;
+constexpr int MAX_NUM_TASKS = 32;
+constexpr int MAX_STACK_SIZE = 1024 * 1;
+
+static_scheduler_memory<MAX_NUM_THREADS,
+                        MAX_NUM_TASKS,
+                        MAX_STACK_SIZE> global_scheduler_memory;

 int main(int argc, char **argv) {
  int num_threads;
@@ -40,40 +42,20 @@ int main(int argc, char **argv) {
  pls_matrix<double, matrix::MATRIX_SIZE> b;
  pls_matrix<double, matrix::MATRIX_SIZE> result;

-  static_scheduler_memory<MAX_NUM_THREADS,
-                          MAX_NUM_TASKS,
-                          MAX_NUM_CONTS,
-                          MAX_CONT_SIZE> static_scheduler_memory;
-
-  scheduler scheduler{static_scheduler_memory, (unsigned int) num_threads};
-
-  for (int i = 0; i < matrix::WARMUP_ITERATIONS; i++) {
+  scheduler scheduler{global_scheduler_memory, (unsigned) num_threads};

  scheduler.perform_work([&]() {
-      return scheduler::par([&]() {
-        return result.pls_multiply(a, b);
-      }, []() {
-        return parallel_result<int>{0};
-      }).then([&](int, int) {
-        return parallel_result<int>{0};
-      });
-    });
+    for (int i = 0; i < matrix::WARMUP_ITERATIONS; i++) {
+      result.pls_multiply(a, b);
    }
+  });

-  for (int i = 0; i < matrix::NUM_ITERATIONS; i++) {
  scheduler.perform_work([&]() {
+    for (int i = 0; i < matrix::NUM_ITERATIONS; i++) {
      runner.start_iteration();
-
-      return scheduler::par([&]() {
-        return result.pls_multiply(a, b);
-      }, []() {
-        return parallel_result<int>{0};
-      }).then([&](int, int) {
+      result.pls_multiply(a, b);
      runner.end_iteration();
-        return parallel_result<int>{0};
-      });
-    });
    }
+  });
  runner.commit_results(true);
-
 }
--- a/lib/pls/include/pls/algorithms/for_each.h
+++ b/lib/pls/include/pls/algorithms/for_each.h
@@ -2,8 +2,6 @@
 #ifndef PLS_PARALLEL_FOR_H
 #define PLS_PARALLEL_FOR_H

-#include "pls/internal/scheduling/parallel_result.h"
-
 namespace pls {
 namespace algorithm {

@@ -11,24 +9,24 @@ class fixed_strategy;
 class dynamic_strategy;

 template<typename Function, typename ExecutionStrategy>
-pls::internal::scheduling::parallel_result<int> for_each_range(unsigned long first,
+void for_each_range(unsigned long first,
                    unsigned long last,
                    const Function &function,
                    ExecutionStrategy &execution_strategy);

 template<typename Function>
-pls::internal::scheduling::parallel_result<int> for_each_range(unsigned long first,
+void for_each_range(unsigned long first,
                    unsigned long last,
                    const Function &function);

 template<typename RandomIt, typename Function, typename ExecutionStrategy>
-pls::internal::scheduling::parallel_result<int> for_each(RandomIt first,
+void for_each(RandomIt first,
              RandomIt last,
              const Function &function,
              ExecutionStrategy execution_strategy);

 template<typename RandomIt, typename Function>
-pls::internal::scheduling::parallel_result<int> for_each(RandomIt first,
+void for_each(RandomIt first,
              RandomIt last,
              const Function &function);


--- a/lib/pls/include/pls/algorithms/for_each_impl.h
+++ b/lib/pls/include/pls/algorithms/for_each_impl.h
@@ -11,7 +11,7 @@ namespace algorithm {
 namespace internal {

 template<typename RandomIt, typename Function>
-pls::internal::scheduling::parallel_result<int> for_each(const RandomIt first,
+void for_each(const RandomIt first,
              const RandomIt last,
              const Function function,
              const long min_elements) {
@@ -23,25 +23,23 @@ pls::internal::scheduling::parallel_result<int> for_each(const RandomIt first,
    for (auto current = first; current != last; current++) {
      function(*current);
    }
-
-    return parallel_result<int>{0};
  } else {
    // Cut in half recursively
    const long middle_index = num_elements / 2;

-    return scheduler::par([first, middle_index, last, function, min_elements] {
+    scheduler::spawn([first, middle_index, last, &function, min_elements] {
      return internal::for_each(first,
                                first + middle_index,
                                function,
                                min_elements);
-    }, [first, middle_index, last, function, min_elements] {
+    });
+    scheduler::spawn([first, middle_index, last, &function, min_elements] {
      return internal::for_each(first + middle_index,
                                last,
                                function,
                                min_elements);
-    }).then([](int, int) {
-      return parallel_result<int>{0};
    });
+    scheduler::sync();
  }
 }

@@ -52,7 +50,7 @@ class dynamic_strategy {
  explicit dynamic_strategy(const unsigned int tasks_per_thread = 4) : tasks_per_thread_{tasks_per_thread} {};

  long calculate_min_elements(long num_elements) const {
-    const long num_threads = pls::internal::scheduling::thread_state::get().scheduler_->num_threads();
+    const long num_threads = pls::internal::scheduling::thread_state::get().get_scheduler().num_threads();
    return num_elements / (num_threads * tasks_per_thread_);
  }
 private:
@@ -71,21 +69,27 @@ class fixed_strategy {
 };

 template<typename RandomIt, typename Function, typename ExecutionStrategy>
-pls::internal::scheduling::parallel_result<int> for_each(RandomIt first,
+void for_each(RandomIt
+              first,
              RandomIt last,
              const Function &function,
-                                                         ExecutionStrategy execution_strategy) {
+              ExecutionStrategy
+              execution_strategy) {
  long num_elements = std::distance(first, last);
-  return internal::for_each(first, last, function, execution_strategy.calculate_min_elements(num_elements));
+  return
+      internal::for_each(first, last, function, execution_strategy
+          .
+              calculate_min_elements(num_elements)
+      );
 }

 template<typename RandomIt, typename Function>
-pls::internal::scheduling::parallel_result<int> for_each(RandomIt first, RandomIt last, const Function &function) {
+void for_each(RandomIt first, RandomIt last, const Function &function) {
  return for_each(first, last, function, dynamic_strategy{4});
 }

 template<typename Function, typename ExecutionStrategy>
-pls::internal::scheduling::parallel_result<int> for_each_range(unsigned long first,
+void for_each_range(unsigned long first,
                    unsigned long last,
                    const Function &function,
                    ExecutionStrategy execution_strategy) {
@@ -94,7 +98,7 @@ pls::internal::scheduling::parallel_result<int> for_each_range(unsigned long fir
 }

 template<typename Function>
-pls::internal::scheduling::parallel_result<int> for_each_range(unsigned long first,
+void for_each_range(unsigned long first,
                    unsigned long last,
                    const Function &function) {
  auto range = pls::internal::helpers::range(first, last);

--- a/lib/pls/include/pls/internal/helpers/range.h
+++ b/lib/pls/include/pls/internal/helpers/range.h
@@ -112,7 +112,7 @@ struct basic_range {
        : r(rhs.r), index(rhs.index) {}

    const_iterator_impl(basic_range<IntegerType> const *p_range, size_type p_index)
-        : r(*p_range), index(p_index) {}
+        : r(p_range), index(p_index) {}

    const_iterator_impl &operator=(const const_iterator_impl &rhs) {
      r = rhs.r;
@@ -121,7 +121,7 @@ struct basic_range {
    }

    bool operator==(const const_iterator_impl &rhs) const {
-      return r == rhs.r && index == rhs.index;
+      return *r == *(rhs.r) && index == rhs.index;
    }

    bool operator!=(const const_iterator_impl &rhs) const {
@@ -145,7 +145,7 @@ struct basic_range {
    }

    value_type operator*() const {
-      return r.m_first_element + r.m_step * index;
+      return r->m_first_element + r->m_step * index;
    }

    // operator->
@@ -212,11 +212,11 @@ struct basic_range {

    value_type operator[](difference_type offset) const {
      size_type new_index = index + offset;
-      return r.m_first_element + r.m_step * new_index;
+      return r->m_first_element + r->m_step * new_index;
    }

   private:
-    basic_range<IntegerType> r;
+    basic_range<IntegerType> const *r;
    size_type index;
  };

@@ -236,7 +236,7 @@ struct basic_range {
        : r(rhs.r), index(rhs.index) {}

    const_reverse_iterator_impl(basic_range<IntegerType> const *p_range, size_type p_index)
-        : r(*p_range), index(p_index) {}
+        : r(p_range), index(p_index) {}

    const_reverse_iterator_impl &operator=(const const_reverse_iterator_impl &rhs) {
      r = rhs.r;
@@ -245,7 +245,7 @@ struct basic_range {
    }

    bool operator==(const const_reverse_iterator_impl &rhs) const {
-      return r == rhs.r && index == rhs.index;
+      return *r == *(rhs.r) && index == rhs.index;
    }

    bool operator!=(const const_reverse_iterator_impl &rhs) const {
@@ -270,8 +270,8 @@ struct basic_range {

    value_type operator*() const {
      size_type reverse_index
-          = (r.m_element_count - 1) - index;
-      return r.m_first_element + r.m_step * reverse_index;
+          = (r->m_element_count - 1) - index;
+      return r->m_first_element + r->m_step * reverse_index;
    }

    // operator->
@@ -338,12 +338,12 @@ struct basic_range {

    value_type operator[](difference_type offset) const {
      size_type new_reverse_index
-          = (r.m_element_count - 1) - (index + offset);
-      return r.m_first_element + r.m_step * new_reverse_index;
+          = (r->m_element_count - 1) - (index + offset);
+      return r->m_first_element + r->m_step * new_reverse_index;
    }

   private:
-    basic_range<IntegerType> r;
+    basic_range<IntegerType> const *r;
    size_type index;
  };