Commit a5bb074c by FritzFlorian

Refactor: Use perfect forwarding instead of copy constructor.

parent 0228aa92
Pipeline #1260 failed with stages
in 43 seconds
......@@ -34,8 +34,10 @@ add_library(pls STATIC
include/pls/internal/scheduling/task.h src/internal/scheduling/task.cpp
include/pls/internal/scheduling/scheduler_memory.h src/internal/scheduling/scheduler_memory.cpp
include/pls/internal/scheduling/lambda_task.h include/pls/internal/data_structures/deque.h)
include/pls/internal/scheduling/lambda_task.h include/pls/internal/data_structures/deque.h
# include/pls/algorithms/parallel_scan.h include/pls/algorithms/parallel_scan_impl.h)
# Add everything in `./include` to be in the include path of this project
......@@ -11,27 +11,27 @@ namespace pls {
namespace algorithm {
template<typename Function1, typename Function2>
void invoke_parallel(const Function1 &function1, const Function2 &function2) {
void invoke_parallel(Function1 &&function1, Function2 &&function2) {
using namespace ::pls::internal::scheduling;
auto sub_task_1 = lambda_task_by_reference<Function1>(function1);
auto sub_task_2 = lambda_task_by_reference<Function2>(function2);
using task_1_t = lambda_task_by_value<Function1>;
using task_2_t = lambda_task_by_value<Function2>;
template<typename Function1, typename Function2, typename Function3>
void invoke_parallel(const Function1 &function1, const Function2 &function2, const Function3 &function3) {
void invoke_parallel(Function1 &&function1, Function2 &&function2, Function3 &&function3) {
using namespace ::pls::internal::scheduling;
auto sub_task_1 = lambda_task_by_reference<Function1>(function1);
auto sub_task_2 = lambda_task_by_reference<Function2>(function2);
auto sub_task_3 = lambda_task_by_reference<Function3>(function3);
using task_1_t = lambda_task_by_value<Function1>;
using task_2_t = lambda_task_by_value<Function2>;
using task_3_t = lambda_task_by_value<Function3>;
......@@ -25,14 +25,13 @@ void parallel_for(RandomIt first, RandomIt last, const Function &function) {
// Cut in half recursively
long middle_index = num_elements / 2;
auto body2 = [=] { parallel_for(first + middle_index, last, function); };
lambda_task_by_reference<decltype(body2)> second_half_task(body2);
auto body1 = [=] { parallel_for(first, first + middle_index, function); };
lambda_task_by_reference<decltype(body1)> first_half_task(body1);
auto second_half_body = [=] { parallel_for(first + middle_index, last, function); };
using second_half_t = lambda_task_by_reference<decltype(second_half_body)>;
auto first_half_body = [=] { parallel_for(first, first + middle_index, function); };
using first_half_t = lambda_task_by_reference<decltype(first_half_body)>;
......@@ -40,8 +40,8 @@ class aligned_stack {
aligned_stack(pointer_t memory_region, std::size_t size);
aligned_stack(char *memory_region, std::size_t size);
template<typename T>
T *push(const T &object);
template<typename T, typename ...ARGS>
T *push(ARGS &&... args);
template<typename T>
void *push();
template<typename T>
......@@ -6,10 +6,10 @@ namespace pls {
namespace internal {
namespace data_structures {
template<typename T>
T *aligned_stack::push(const T &object) {
// Copy-Construct
return new(push < T > ())T(object);
template<typename T, typename ...ARGS>
T *aligned_stack::push(ARGS &&... args) {
// Perfect-Forward construct
return new(push < T > ())T(std::forward<ARGS>(args)...);
template<typename T>
......@@ -81,8 +81,8 @@ class work_stealing_deque {
previous_tail_{other.previous_tail_} {}
template<typename T>
T *push_tail(const T &new_item);
template<typename T, typename Function, typename ...ARGS>
T *push_tail(const Function &after_creation, ARGS &&... args);
Item *pop_tail();
Item *pop_head();
......@@ -94,8 +94,8 @@ class work_stealing_deque {
work_stealing_deque_item *item_at(offset_t offset);
offset_t current_stack_offset();
template<typename T>
std::pair<work_stealing_deque_item, T> *allocate_item(const T &new_item);
template<typename T, typename ...ARGS>
std::pair<work_stealing_deque_item, T> *allocate_item(ARGS &&... args);
......@@ -23,28 +23,30 @@ offset_t work_stealing_deque<Item>::current_stack_offset() {
template<typename Item>
template<typename T>
std::pair<work_stealing_deque_item, T> *work_stealing_deque<Item>::allocate_item(const T &new_item) {
template<typename T, typename ...ARGS>
std::pair<work_stealing_deque_item, T> *work_stealing_deque<Item>::allocate_item(ARGS &&... args) {
// 'Union' type to push both on stack
using pair_t = std::pair<work_stealing_deque_item, T>;
// Allocate space on stack
auto new_pair = reinterpret_cast<pair_t *>(stack_->push<pair_t>());
// Initialize memory on stack
new((void *) &(new_pair->first)) work_stealing_deque_item();
new((void *) &(new_pair->second)) T(new_item);
new((void *) &(new_pair->second)) T(std::forward<ARGS>(args)...);
return new_pair;
template<typename Item>
template<typename T>
T *work_stealing_deque<Item>::push_tail(const T &new_item) {
template<typename T, typename Function, typename ...ARGS>
T *work_stealing_deque<Item>::push_tail(const Function &after_creation, ARGS &&... args) {
static_assert(std::is_same<Item, T>::value || std::is_base_of<Item, T>::value,
"Must only push types of <Item> onto work_stealing_deque<Item>");
offset_t local_tail = tail_;
auto new_pair = allocate_item(new_item);
auto new_pair = allocate_item<T>(std::forward<ARGS>(args)...);
after_creation(&(new_pair->second)); // callback for time after creation but before being visible to others
// Prepare current tail to point to correct next items
auto tail_deque_item = item_at(local_tail);
......@@ -79,19 +79,21 @@ class scheduler {
* Helper to spawn a child on the currently running task.
* @tparam T type of the new task
* @param sub_task the new task to be spawned
* @tparam ARGS Constructor argument types
* @param args constructor arguments
template<typename T>
static void spawn_child(T &sub_task);
template<typename T, typename ...ARGS>
static void spawn_child(ARGS &&... args);
* Helper to spawn a child on the currently running task and waiting for it (skipping over the task-deque).
* @tparam T type of the new task
* @param sub_task the new task to be spawned
* @tparam ARGS Constructor argument types
* @param args constructor arguments
template<typename T>
static void spawn_child_and_wait(T &sub_task);
template<typename T, typename ...ARGS>
static void spawn_child_and_wait(ARGS &&... args);
* Helper to wait for all children of the currently executing task.
......@@ -40,14 +40,14 @@ void scheduler::perform_work(Function work_section) {
template<typename T>
void scheduler::spawn_child(T &sub_task) {
template<typename T, typename ...ARGS>
void scheduler::spawn_child(ARGS &&... args) {
template<typename T>
void scheduler::spawn_child_and_wait(T &sub_task) {
template<typename T, typename ...ARGS>
void scheduler::spawn_child_and_wait(ARGS &&... args) {
......@@ -24,48 +24,48 @@ class task {
data_structures::deque<task>::state deque_state_;
// TODO: Double Check with copy and move constructors, try to minimize overhead while keeping a clean API.
explicit task();
task(const task &other);
* Overwrite this with the actual behaviour of concrete tasks.
virtual void execute_internal() = 0;
template<typename T>
void spawn_child(T &&sub_task);
template<typename T>
void spawn_child_and_wait(T &&sub_task);
template<typename T, typename ...ARGS>
void spawn_child(ARGS &&... args);
template<typename T, typename ...ARGS>
void spawn_child_and_wait(ARGS &&... args);
void wait_for_all();
void execute();
template<typename T>
void task::spawn_child(T &&sub_task) {
template<typename T, typename ...ARGS>
void task::spawn_child(ARGS &&... args) {
static_assert(std::is_base_of<task, typename std::remove_reference<T>::type>::value, "Only pass task subclasses!");
// Keep our refcount up to date
// Assign forced values (for stack and parent management)
sub_task.parent_ = this;
sub_task.deque_state_ = thread_state::get()->deque_.save_state();
// Push on our deque
const T const_task = sub_task;
auto deque_state = thread_state::get()->deque_.save_state();
thread_state::get()->deque_.push_tail<T>([this, deque_state](T *item) {
// Assign forced values (for stack and parent management)
item->parent_ = this;
item->deque_state_ = deque_state;
}, std::forward<ARGS>(args)...);
template<typename T>
void task::spawn_child_and_wait(T &&sub_task) {
template<typename T, typename ...ARGS>
void task::spawn_child_and_wait(ARGS &&... args) {
static_assert(std::is_base_of<task, typename std::remove_reference<T>::type>::value, "Only pass task subclasses!");
// Assign forced values (for stack and parent management)
// TODO: Move this after construction
T sub_task{std::forward<ARGS>(args)...};
sub_task.parent_ = nullptr;
sub_task.deque_state_ = thread_state::get()->deque_.save_state();
......@@ -13,11 +13,6 @@ task::task() :
deque_state_{0} {}
task::task(const task &other) :
deque_state_{other.deque_state_} {}
void task::execute() {
PROFILE_WORK_BLOCK("execute task")
auto last_executing = thread_state::get()->current_task_;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment