diff --git a/lib/pls/CMakeLists.txt b/lib/pls/CMakeLists.txt index f30cd59..4b3201d 100644 --- a/lib/pls/CMakeLists.txt +++ b/lib/pls/CMakeLists.txt @@ -34,8 +34,10 @@ add_library(pls STATIC include/pls/internal/scheduling/scheduler_impl.h include/pls/internal/scheduling/task.h src/internal/scheduling/task.cpp include/pls/internal/scheduling/scheduler_memory.h src/internal/scheduling/scheduler_memory.cpp - include/pls/internal/scheduling/lambda_task.h include/pls/internal/data_structures/deque.h) + include/pls/internal/scheduling/lambda_task.h include/pls/internal/data_structures/deque.h + # include/pls/algorithms/parallel_scan.h include/pls/algorithms/parallel_scan_impl.h) + ) # Add everything in `./include` to be in the include path of this project target_include_directories(pls PUBLIC diff --git a/lib/pls/include/pls/algorithms/invoke_parallel_impl.h b/lib/pls/include/pls/algorithms/invoke_parallel_impl.h index ef634bd..a25336e 100644 --- a/lib/pls/include/pls/algorithms/invoke_parallel_impl.h +++ b/lib/pls/include/pls/algorithms/invoke_parallel_impl.h @@ -11,27 +11,27 @@ namespace pls { namespace algorithm { template -void invoke_parallel(const Function1 &function1, const Function2 &function2) { +void invoke_parallel(Function1 &&function1, Function2 &&function2) { using namespace ::pls::internal::scheduling; - auto sub_task_1 = lambda_task_by_reference(function1); - auto sub_task_2 = lambda_task_by_reference(function2); + using task_1_t = lambda_task_by_value; + using task_2_t = lambda_task_by_value; - scheduler::spawn_child(sub_task_2); - scheduler::spawn_child_and_wait(sub_task_1); + scheduler::spawn_child(std::forward(function2)); + scheduler::spawn_child_and_wait(std::forward(function1)); } template -void invoke_parallel(const Function1 &function1, const Function2 &function2, const Function3 &function3) { +void invoke_parallel(Function1 &&function1, Function2 &&function2, Function3 &&function3) { using namespace ::pls::internal::scheduling; - auto sub_task_1 = lambda_task_by_reference(function1); - auto sub_task_2 = lambda_task_by_reference(function2); - auto sub_task_3 = lambda_task_by_reference(function3); + using task_1_t = lambda_task_by_value; + using task_2_t = lambda_task_by_value; + using task_3_t = lambda_task_by_value; - scheduler::spawn_child(sub_task_3); - scheduler::spawn_child(sub_task_2); - scheduler::spawn_child_and_wait(sub_task_1); + scheduler::spawn_child(std::forward(function3)); + scheduler::spawn_child(std::forward(function2)); + scheduler::spawn_child_and_wait(std::forward(function1)); } } diff --git a/lib/pls/include/pls/algorithms/parallel_for_impl.h b/lib/pls/include/pls/algorithms/parallel_for_impl.h index b787b44..32f9aa5 100644 --- a/lib/pls/include/pls/algorithms/parallel_for_impl.h +++ b/lib/pls/include/pls/algorithms/parallel_for_impl.h @@ -25,14 +25,13 @@ void parallel_for(RandomIt first, RandomIt last, const Function &function) { // Cut in half recursively long middle_index = num_elements / 2; - auto body2 = [=] { parallel_for(first + middle_index, last, function); }; - lambda_task_by_reference second_half_task(body2); - scheduler::spawn_child(second_half_task); - - auto body1 = [=] { parallel_for(first, first + middle_index, function); }; - lambda_task_by_reference first_half_task(body1); - scheduler::spawn_child(first_half_task); - scheduler::wait_for_all(); + auto second_half_body = [=] { parallel_for(first + middle_index, last, function); }; + using second_half_t = lambda_task_by_reference; + scheduler::spawn_child(std::move(second_half_body)); + + auto first_half_body = [=] { parallel_for(first, first + middle_index, function); }; + using first_half_t = lambda_task_by_reference; + scheduler::spawn_child_and_wait(std::move(first_half_body)); } } diff --git a/lib/pls/include/pls/internal/data_structures/aligned_stack.h b/lib/pls/include/pls/internal/data_structures/aligned_stack.h index 2e04702..f1ab8eb 100644 --- a/lib/pls/include/pls/internal/data_structures/aligned_stack.h +++ b/lib/pls/include/pls/internal/data_structures/aligned_stack.h @@ -40,8 +40,8 @@ class aligned_stack { aligned_stack(pointer_t memory_region, std::size_t size); aligned_stack(char *memory_region, std::size_t size); - template - T *push(const T &object); + template + T *push(ARGS &&... args); template void *push(); template diff --git a/lib/pls/include/pls/internal/data_structures/aligned_stack_impl.h b/lib/pls/include/pls/internal/data_structures/aligned_stack_impl.h index 849971a..ed7d25c 100644 --- a/lib/pls/include/pls/internal/data_structures/aligned_stack_impl.h +++ b/lib/pls/include/pls/internal/data_structures/aligned_stack_impl.h @@ -6,10 +6,10 @@ namespace pls { namespace internal { namespace data_structures { -template -T *aligned_stack::push(const T &object) { - // Copy-Construct - return new(push < T > ())T(object); +template +T *aligned_stack::push(ARGS &&... args) { + // Perfect-Forward construct + return new(push < T > ())T(std::forward(args)...); } template diff --git a/lib/pls/include/pls/internal/data_structures/work_stealing_deque.h b/lib/pls/include/pls/internal/data_structures/work_stealing_deque.h index 4c89b6b..caec50f 100644 --- a/lib/pls/include/pls/internal/data_structures/work_stealing_deque.h +++ b/lib/pls/include/pls/internal/data_structures/work_stealing_deque.h @@ -81,8 +81,8 @@ class work_stealing_deque { tail_{other.tail_.load()}, previous_tail_{other.previous_tail_} {} - template - T *push_tail(const T &new_item); + template + T *push_tail(const Function &after_creation, ARGS &&... args); Item *pop_tail(); Item *pop_head(); @@ -94,8 +94,8 @@ class work_stealing_deque { work_stealing_deque_item *item_at(offset_t offset); offset_t current_stack_offset(); - template - std::pair *allocate_item(const T &new_item); + template + std::pair *allocate_item(ARGS &&... args); }; } diff --git a/lib/pls/include/pls/internal/data_structures/work_stealing_deque_impl.h b/lib/pls/include/pls/internal/data_structures/work_stealing_deque_impl.h index 95987a5..45cebdf 100644 --- a/lib/pls/include/pls/internal/data_structures/work_stealing_deque_impl.h +++ b/lib/pls/include/pls/internal/data_structures/work_stealing_deque_impl.h @@ -23,28 +23,30 @@ offset_t work_stealing_deque::current_stack_offset() { } template -template -std::pair *work_stealing_deque::allocate_item(const T &new_item) { +template +std::pair *work_stealing_deque::allocate_item(ARGS &&... args) { // 'Union' type to push both on stack using pair_t = std::pair; // Allocate space on stack auto new_pair = reinterpret_cast(stack_->push()); // Initialize memory on stack new((void *) &(new_pair->first)) work_stealing_deque_item(); - new((void *) &(new_pair->second)) T(new_item); + new((void *) &(new_pair->second)) T(std::forward(args)...); return new_pair; } template -template -T *work_stealing_deque::push_tail(const T &new_item) { +template +T *work_stealing_deque::push_tail(const Function &after_creation, ARGS &&... args) { static_assert(std::is_same::value || std::is_base_of::value, "Must only push types of onto work_stealing_deque"); offset_t local_tail = tail_; - auto new_pair = allocate_item(new_item); + auto new_pair = allocate_item(std::forward(args)...); + after_creation(&(new_pair->second)); // callback for time after creation but before being visible to others + // Prepare current tail to point to correct next items auto tail_deque_item = item_at(local_tail); tail_deque_item->set_data(&(new_pair->second)); diff --git a/lib/pls/include/pls/internal/scheduling/scheduler.h b/lib/pls/include/pls/internal/scheduling/scheduler.h index fb9eb20..242b912 100644 --- a/lib/pls/include/pls/internal/scheduling/scheduler.h +++ b/lib/pls/include/pls/internal/scheduling/scheduler.h @@ -79,19 +79,21 @@ class scheduler { * Helper to spawn a child on the currently running task. * * @tparam T type of the new task - * @param sub_task the new task to be spawned + * @tparam ARGS Constructor argument types + * @param args constructor arguments */ - template - static void spawn_child(T &sub_task); + template + static void spawn_child(ARGS &&... args); /** * Helper to spawn a child on the currently running task and waiting for it (skipping over the task-deque). * * @tparam T type of the new task - * @param sub_task the new task to be spawned + * @tparam ARGS Constructor argument types + * @param args constructor arguments */ - template - static void spawn_child_and_wait(T &sub_task); + template + static void spawn_child_and_wait(ARGS &&... args); /** * Helper to wait for all children of the currently executing task. diff --git a/lib/pls/include/pls/internal/scheduling/scheduler_impl.h b/lib/pls/include/pls/internal/scheduling/scheduler_impl.h index 50f7b04..1eb8c95 100644 --- a/lib/pls/include/pls/internal/scheduling/scheduler_impl.h +++ b/lib/pls/include/pls/internal/scheduling/scheduler_impl.h @@ -40,14 +40,14 @@ void scheduler::perform_work(Function work_section) { } } -template -void scheduler::spawn_child(T &sub_task) { - thread_state::get()->current_task_->spawn_child(sub_task); +template +void scheduler::spawn_child(ARGS &&... args) { + thread_state::get()->current_task_->spawn_child(std::forward(args)...); } -template -void scheduler::spawn_child_and_wait(T &sub_task) { - thread_state::get()->current_task_->spawn_child_and_wait(sub_task); +template +void scheduler::spawn_child_and_wait(ARGS &&... args) { + thread_state::get()->current_task_->spawn_child_and_wait(std::forward(args)...); } } diff --git a/lib/pls/include/pls/internal/scheduling/task.h b/lib/pls/include/pls/internal/scheduling/task.h index 7bb4a4b..c29cd3d 100644 --- a/lib/pls/include/pls/internal/scheduling/task.h +++ b/lib/pls/include/pls/internal/scheduling/task.h @@ -24,48 +24,48 @@ class task { data_structures::deque::state deque_state_; protected: - // TODO: Double Check with copy and move constructors, try to minimize overhead while keeping a clean API. explicit task(); - task(const task &other); /** * Overwrite this with the actual behaviour of concrete tasks. */ virtual void execute_internal() = 0; - template - void spawn_child(T &&sub_task); - template - void spawn_child_and_wait(T &&sub_task); + template + void spawn_child(ARGS &&... args); + template + void spawn_child_and_wait(ARGS &&... args); void wait_for_all(); private: void execute(); }; -template -void task::spawn_child(T &&sub_task) { +template +void task::spawn_child(ARGS &&... args) { PROFILE_FORK_JOIN_STEALING("spawn_child") static_assert(std::is_base_of::type>::value, "Only pass task subclasses!"); // Keep our refcount up to date ref_count_++; - // Assign forced values (for stack and parent management) - sub_task.parent_ = this; - sub_task.deque_state_ = thread_state::get()->deque_.save_state(); - // Push on our deque - const T const_task = sub_task; - thread_state::get()->deque_.push_tail(const_task); + auto deque_state = thread_state::get()->deque_.save_state(); + thread_state::get()->deque_.push_tail([this, deque_state](T *item) { + // Assign forced values (for stack and parent management) + item->parent_ = this; + item->deque_state_ = deque_state; + }, std::forward(args)...); } -template -void task::spawn_child_and_wait(T &&sub_task) { +template +void task::spawn_child_and_wait(ARGS &&... args) { PROFILE_FORK_JOIN_STEALING("spawn_child_wait") static_assert(std::is_base_of::type>::value, "Only pass task subclasses!"); // Assign forced values (for stack and parent management) + // TODO: Move this after construction + T sub_task{std::forward(args)...}; sub_task.parent_ = nullptr; sub_task.deque_state_ = thread_state::get()->deque_.save_state(); PROFILE_END_BLOCK diff --git a/lib/pls/src/internal/scheduling/task.cpp b/lib/pls/src/internal/scheduling/task.cpp index 5a4b1f4..8b20667 100644 --- a/lib/pls/src/internal/scheduling/task.cpp +++ b/lib/pls/src/internal/scheduling/task.cpp @@ -13,11 +13,6 @@ task::task() : parent_{nullptr}, deque_state_{0} {} -task::task(const task &other) : - ref_count_{0}, - parent_{other.parent_}, - deque_state_{other.deque_state_} {} - void task::execute() { PROFILE_WORK_BLOCK("execute task") auto last_executing = thread_state::get()->current_task_;