Refactor: Use perfect forwarding instead of copy constructor.

a5bb074c · FritzFlorian · 0228aa92 · a5bb074c · a5bb074c · a5bb074c
Commit a5bb074c authored Jun 10, 2019 by FritzFlorian
11 changed files
--- a/lib/pls/CMakeLists.txt
+++ b/lib/pls/CMakeLists.txt
@@ -34,8 +34,10 @@ add_library(pls STATIC
        include/pls/internal/scheduling/scheduler_impl.h
        include/pls/internal/scheduling/task.h src/internal/scheduling/task.cpp
        include/pls/internal/scheduling/scheduler_memory.h src/internal/scheduling/scheduler_memory.cpp
-        include/pls/internal/scheduling/lambda_task.h include/pls/internal/data_structures/deque.h)
+        include/pls/internal/scheduling/lambda_task.h include/pls/internal/data_structures/deque.h

+        #        include/pls/algorithms/parallel_scan.h include/pls/algorithms/parallel_scan_impl.h)
+        )
 # Add everything in `./include` to be in the include path of this project
 target_include_directories(pls
        PUBLIC

--- a/lib/pls/include/pls/algorithms/invoke_parallel_impl.h
+++ b/lib/pls/include/pls/algorithms/invoke_parallel_impl.h
@@ -11,27 +11,27 @@ namespace pls {
 namespace algorithm {

 template<typename Function1, typename Function2>
-void invoke_parallel(const Function1 &function1, const Function2 &function2) {
+void invoke_parallel(Function1 &&function1, Function2 &&function2) {
  using namespace ::pls::internal::scheduling;

-  auto sub_task_1 = lambda_task_by_reference<Function1>(function1);
-  auto sub_task_2 = lambda_task_by_reference<Function2>(function2);
+  using task_1_t = lambda_task_by_value<Function1>;
+  using task_2_t = lambda_task_by_value<Function2>;

-  scheduler::spawn_child(sub_task_2);
-  scheduler::spawn_child_and_wait(sub_task_1);
+  scheduler::spawn_child<task_2_t>(std::forward<Function2>(function2));
+  scheduler::spawn_child_and_wait<task_1_t>(std::forward<Function1>(function1));
 }

 template<typename Function1, typename Function2, typename Function3>
-void invoke_parallel(const Function1 &function1, const Function2 &function2, const Function3 &function3) {
+void invoke_parallel(Function1 &&function1, Function2 &&function2, Function3 &&function3) {
  using namespace ::pls::internal::scheduling;

-  auto sub_task_1 = lambda_task_by_reference<Function1>(function1);
-  auto sub_task_2 = lambda_task_by_reference<Function2>(function2);
-  auto sub_task_3 = lambda_task_by_reference<Function3>(function3);
+  using task_1_t = lambda_task_by_value<Function1>;
+  using task_2_t = lambda_task_by_value<Function2>;
+  using task_3_t = lambda_task_by_value<Function3>;

-  scheduler::spawn_child(sub_task_3);
-  scheduler::spawn_child(sub_task_2);
-  scheduler::spawn_child_and_wait(sub_task_1);
+  scheduler::spawn_child<task_3_t>(std::forward<Function3>(function3));
+  scheduler::spawn_child<task_2_t>(std::forward<Function2>(function2));
+  scheduler::spawn_child_and_wait<task_1_t>(std::forward<Function1>(function1));
 }

 }

--- a/lib/pls/include/pls/algorithms/parallel_for_impl.h
+++ b/lib/pls/include/pls/algorithms/parallel_for_impl.h
@@ -25,14 +25,13 @@ void parallel_for(RandomIt first, RandomIt last, const Function &function) {
    // Cut in half recursively
    long middle_index = num_elements / 2;

-    auto body2 = [=] { parallel_for(first + middle_index, last, function); };
-    lambda_task_by_reference<decltype(body2)> second_half_task(body2);
-    scheduler::spawn_child(second_half_task);
-
-    auto body1 = [=] { parallel_for(first, first + middle_index, function); };
-    lambda_task_by_reference<decltype(body1)> first_half_task(body1);
-    scheduler::spawn_child(first_half_task);
-    scheduler::wait_for_all();
+    auto second_half_body = [=] { parallel_for(first + middle_index, last, function); };
+    using second_half_t = lambda_task_by_reference<decltype(second_half_body)>;
+    scheduler::spawn_child<second_half_t>(std::move(second_half_body));
+
+    auto first_half_body = [=] { parallel_for(first, first + middle_index, function); };
+    using first_half_t = lambda_task_by_reference<decltype(first_half_body)>;
+    scheduler::spawn_child_and_wait<first_half_t>(std::move(first_half_body));
  }
 }


--- a/lib/pls/include/pls/internal/data_structures/aligned_stack.h
+++ b/lib/pls/include/pls/internal/data_structures/aligned_stack.h
@@ -40,8 +40,8 @@ class aligned_stack {
  aligned_stack(pointer_t memory_region, std::size_t size);
  aligned_stack(char *memory_region, std::size_t size);

-  template<typename T>
-  T *push(const T &object);
+  template<typename T, typename ...ARGS>
+  T *push(ARGS &&... args);
  template<typename T>
  void *push();
  template<typename T>

--- a/lib/pls/include/pls/internal/data_structures/aligned_stack_impl.h
+++ b/lib/pls/include/pls/internal/data_structures/aligned_stack_impl.h
@@ -6,10 +6,10 @@ namespace pls {
 namespace internal {
 namespace data_structures {

-template<typename T>
-T *aligned_stack::push(const T &object) {
-  // Copy-Construct
-  return new(push < T > ())T(object);
+template<typename T, typename ...ARGS>
+T *aligned_stack::push(ARGS &&... args) {
+  // Perfect-Forward construct
+  return new(push < T > ())T(std::forward<ARGS>(args)...);
 }

 template<typename T>

--- a/lib/pls/include/pls/internal/data_structures/work_stealing_deque.h
+++ b/lib/pls/include/pls/internal/data_structures/work_stealing_deque.h
@@ -81,8 +81,8 @@ class work_stealing_deque {
                                                          tail_{other.tail_.load()},
                                                          previous_tail_{other.previous_tail_} {}

-  template<typename T>
-  T *push_tail(const T &new_item);
+  template<typename T, typename Function, typename ...ARGS>
+  T *push_tail(const Function &after_creation, ARGS &&... args);
  Item *pop_tail();
  Item *pop_head();

@@ -94,8 +94,8 @@ class work_stealing_deque {
  work_stealing_deque_item *item_at(offset_t offset);
  offset_t current_stack_offset();

-  template<typename T>
-  std::pair<work_stealing_deque_item, T> *allocate_item(const T &new_item);
+  template<typename T, typename ...ARGS>
+  std::pair<work_stealing_deque_item, T> *allocate_item(ARGS &&... args);
 };

 }

--- a/lib/pls/include/pls/internal/data_structures/work_stealing_deque_impl.h
+++ b/lib/pls/include/pls/internal/data_structures/work_stealing_deque_impl.h
@@ -23,28 +23,30 @@ offset_t work_stealing_deque<Item>::current_stack_offset() {
 }

 template<typename Item>
-template<typename T>
-std::pair<work_stealing_deque_item, T> *work_stealing_deque<Item>::allocate_item(const T &new_item) {
+template<typename T, typename ...ARGS>
+std::pair<work_stealing_deque_item, T> *work_stealing_deque<Item>::allocate_item(ARGS &&... args) {
  // 'Union' type to push both on stack
  using pair_t = std::pair<work_stealing_deque_item, T>;
  // Allocate space on stack
  auto new_pair = reinterpret_cast<pair_t *>(stack_->push<pair_t>());
  // Initialize memory on stack
  new((void *) &(new_pair->first)) work_stealing_deque_item();
-  new((void *) &(new_pair->second)) T(new_item);
+  new((void *) &(new_pair->second)) T(std::forward<ARGS>(args)...);

  return new_pair;
 }

 template<typename Item>
-template<typename T>
-T *work_stealing_deque<Item>::push_tail(const T &new_item) {
+template<typename T, typename Function, typename ...ARGS>
+T *work_stealing_deque<Item>::push_tail(const Function &after_creation, ARGS &&... args) {
  static_assert(std::is_same<Item, T>::value || std::is_base_of<Item, T>::value,
                "Must only push types of <Item> onto work_stealing_deque<Item>");

  offset_t local_tail = tail_;

-  auto new_pair = allocate_item(new_item);
+  auto new_pair = allocate_item<T>(std::forward<ARGS>(args)...);
+  after_creation(&(new_pair->second)); // callback for time after creation but before being visible to others
+
  // Prepare current tail to point to correct next items
  auto tail_deque_item = item_at(local_tail);
  tail_deque_item->set_data(&(new_pair->second));

--- a/lib/pls/include/pls/internal/scheduling/scheduler.h
+++ b/lib/pls/include/pls/internal/scheduling/scheduler.h
@@ -79,19 +79,21 @@ class scheduler {
   * Helper to spawn a child on the currently running task.
   *
   * @tparam T type of the new task
-   * @param sub_task the new task to be spawned
+   * @tparam ARGS Constructor argument types
+   * @param args constructor arguments
   */
-  template<typename T>
-  static void spawn_child(T &sub_task);
+  template<typename T, typename ...ARGS>
+  static void spawn_child(ARGS &&... args);

  /**
   * Helper to spawn a child on the currently running task and waiting for it (skipping over the task-deque).
   *
   * @tparam T type of the new task
-   * @param sub_task the new task to be spawned
+   * @tparam ARGS Constructor argument types
+   * @param args constructor arguments
   */
-  template<typename T>
-  static void spawn_child_and_wait(T &sub_task);
+  template<typename T, typename ...ARGS>
+  static void spawn_child_and_wait(ARGS &&... args);

  /**
   * Helper to wait for all children of the currently executing task.

--- a/lib/pls/include/pls/internal/scheduling/scheduler_impl.h
+++ b/lib/pls/include/pls/internal/scheduling/scheduler_impl.h
@@ -40,14 +40,14 @@ void scheduler::perform_work(Function work_section) {
  }
 }

-template<typename T>
-void scheduler::spawn_child(T &sub_task) {
-  thread_state::get()->current_task_->spawn_child(sub_task);
+template<typename T, typename ...ARGS>
+void scheduler::spawn_child(ARGS &&... args) {
+  thread_state::get()->current_task_->spawn_child<T>(std::forward<ARGS>(args)...);
 }

-template<typename T>
-void scheduler::spawn_child_and_wait(T &sub_task) {
-  thread_state::get()->current_task_->spawn_child_and_wait(sub_task);
+template<typename T, typename ...ARGS>
+void scheduler::spawn_child_and_wait(ARGS &&... args) {
+  thread_state::get()->current_task_->spawn_child_and_wait<T>(std::forward<ARGS>(args)...);
 }

 }

--- a/lib/pls/include/pls/internal/scheduling/task.h
+++ b/lib/pls/include/pls/internal/scheduling/task.h
@@ -24,48 +24,48 @@ class task {
  data_structures::deque<task>::state deque_state_;

 protected:
-  // TODO: Double Check with copy and move constructors, try to minimize overhead while keeping a clean API.
  explicit task();
-  task(const task &other);

  /**
   * Overwrite this with the actual behaviour of concrete tasks.
   */
  virtual void execute_internal() = 0;

-  template<typename T>
-  void spawn_child(T &&sub_task);
-  template<typename T>
-  void spawn_child_and_wait(T &&sub_task);
+  template<typename T, typename ...ARGS>
+  void spawn_child(ARGS &&... args);
+  template<typename T, typename ...ARGS>
+  void spawn_child_and_wait(ARGS &&... args);
  void wait_for_all();

 private:
  void execute();
 };

-template<typename T>
-void task::spawn_child(T &&sub_task) {
+template<typename T, typename ...ARGS>
+void task::spawn_child(ARGS &&... args) {
  PROFILE_FORK_JOIN_STEALING("spawn_child")
  static_assert(std::is_base_of<task, typename std::remove_reference<T>::type>::value, "Only pass task subclasses!");

  // Keep our refcount up to date
  ref_count_++;

-  // Assign forced values (for stack and parent management)
-  sub_task.parent_ = this;
-  sub_task.deque_state_ = thread_state::get()->deque_.save_state();
-
  // Push on our deque
-  const T const_task = sub_task;
-  thread_state::get()->deque_.push_tail(const_task);
+  auto deque_state = thread_state::get()->deque_.save_state();
+  thread_state::get()->deque_.push_tail<T>([this, deque_state](T *item) {
+    // Assign forced values (for stack and parent management)
+    item->parent_ = this;
+    item->deque_state_ = deque_state;
+  }, std::forward<ARGS>(args)...);
 }

-template<typename T>
-void task::spawn_child_and_wait(T &&sub_task) {
+template<typename T, typename ...ARGS>
+void task::spawn_child_and_wait(ARGS &&... args) {
  PROFILE_FORK_JOIN_STEALING("spawn_child_wait")
  static_assert(std::is_base_of<task, typename std::remove_reference<T>::type>::value, "Only pass task subclasses!");

  // Assign forced values (for stack and parent management)
+  // TODO: Move this after construction
+  T sub_task{std::forward<ARGS>(args)...};
  sub_task.parent_ = nullptr;
  sub_task.deque_state_ = thread_state::get()->deque_.save_state();
  PROFILE_END_BLOCK

--- a/lib/pls/src/internal/scheduling/task.cpp
+++ b/lib/pls/src/internal/scheduling/task.cpp
@@ -13,11 +13,6 @@ task::task() :
    parent_{nullptr},
    deque_state_{0} {}

-task::task(const task &other) :
-    ref_count_{0},
-    parent_{other.parent_},
-    deque_state_{other.deque_state_} {}
-
 void task::execute() {
  PROFILE_WORK_BLOCK("execute task")
  auto last_executing = thread_state::get()->current_task_;