From cbdc5e4f370a2b7543ca36a05632de624ceaec37 Mon Sep 17 00:00:00 2001 From: FritzFlorian Date: Fri, 29 Mar 2019 15:57:39 +0100 Subject: [PATCH] Add internal stealing for TBB like scheduler.. --- NOTES.md | 25 +++++++++++++++++++++++++ lib/pls/CMakeLists.txt | 3 ++- lib/pls/include/pls/internal/base/aligned_stack.h | 19 +++++++++++++++++-- lib/pls/include/pls/internal/scheduling/scheduler.h | 2 +- lib/pls/include/pls/internal/scheduling/tbb_task.h | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------ lib/pls/src/internal/scheduling/tbb_task.cpp | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------- 6 files changed, 196 insertions(+), 42 deletions(-) diff --git a/NOTES.md b/NOTES.md index 3015736..3e66c3d 100644 --- a/NOTES.md +++ b/NOTES.md @@ -4,6 +4,31 @@ A collection of stuff that we noticed during development. Useful later on two write a project report and to go back in time to find out why certain decisions where made. +## 28.03.2018 - custom new operators + +When initializing sub_tasks we want to place them on our custom +'stack like' data structure per thread. We looked at TBB's API +and noticed them somehow implicitly setting parent relationships +in the new operator. After further investigation we see that the +initialization in this manner is a 'hack' to avoid passing +of references and counters. + +It can be found at the bottom of the `task.h` file: + +```C++ +inline void *operator new( size_t bytes, const tbb::internal::allocate_child_proxy& p ) { + return &p.allocate(bytes); +} + +inline void operator delete( void* task, const tbb::internal::allocate_child_proxy& p ) { + p.free( *static_cast(task) ); +} +``` + +It simlpy constructs a temp 'allocator type' passed as the second +argument to new. This type then is called in new and +allocates the memory required. + ## 27.03.2019 - atomics C++ 11 offers atomics, however these require careful usage diff --git a/lib/pls/CMakeLists.txt b/lib/pls/CMakeLists.txt index eca5449..bba3227 100644 --- a/lib/pls/CMakeLists.txt +++ b/lib/pls/CMakeLists.txt @@ -12,7 +12,8 @@ add_library(pls STATIC src/internal/base/aligned_stack.cpp include/pls/internal/base/aligned_stack.h include/pls/internal/base/system_details.h src/internal/scheduling/run_on_n_threads_task.cpp include/pls/internal/scheduling/run_on_n_threads_task.h - src/internal/scheduling/tbb_task.cpp include/pls/internal/scheduling/tbb_task.h) + src/internal/scheduling/tbb_task.cpp include/pls/internal/scheduling/tbb_task.h + ) # Add everything in `./include` to be in the include path of this project target_include_directories(pls diff --git a/lib/pls/include/pls/internal/base/aligned_stack.h b/lib/pls/include/pls/internal/base/aligned_stack.h index 9ff993c..8480fe1 100644 --- a/lib/pls/include/pls/internal/base/aligned_stack.h +++ b/lib/pls/include/pls/internal/base/aligned_stack.h @@ -19,6 +19,8 @@ namespace pls { static std::uintptr_t next_alignment(std::uintptr_t size); static char* next_alignment(char* pointer); public: + typedef char* state; + aligned_stack(): memory_start_{nullptr}, memory_end_{nullptr}, head_{nullptr} {}; aligned_stack(char* memory_region, const std::size_t size): @@ -27,7 +29,13 @@ namespace pls { head_{next_alignment(memory_start_)} {} template - T* push(T object) { + T* push(const T& object) { + // Copy-Construct into desired memory location + return new (push())T(object); + } + + template + T* push() { T* result = reinterpret_cast(head_); // Move head to next aligned position after new object @@ -36,7 +44,6 @@ namespace pls { exit(1); // TODO: Exception Handling } - *result = object; return result; } @@ -46,6 +53,14 @@ namespace pls { return *reinterpret_cast(head_); } + + state save_state() { + return head_; + } + + void reset_state(state new_state) { + head_ = new_state; + } }; } } diff --git a/lib/pls/include/pls/internal/scheduling/scheduler.h b/lib/pls/include/pls/internal/scheduling/scheduler.h index e1493ad..0726c06 100644 --- a/lib/pls/include/pls/internal/scheduling/scheduler.h +++ b/lib/pls/include/pls/internal/scheduling/scheduler.h @@ -82,7 +82,7 @@ namespace pls { // TODO: See if we should place this differently (only for performance reasons) template - static void execute_task(Task task, int depth=-1) { + static void execute_task(Task& task, int depth=-1) { static_assert(std::is_base_of::value, "Only pass abstract_task subclasses!"); auto my_state = base::this_thread::state(); diff --git a/lib/pls/include/pls/internal/scheduling/tbb_task.h b/lib/pls/include/pls/internal/scheduling/tbb_task.h index 71c7187..396bcf7 100644 --- a/lib/pls/include/pls/internal/scheduling/tbb_task.h +++ b/lib/pls/include/pls/internal/scheduling/tbb_task.h @@ -2,7 +2,9 @@ #ifndef PLS_TBB_LIKE_TASK_H #define PLS_TBB_LIKE_TASK_H +#include #include "abstract_task.h" +#include "thread_state.h" namespace pls { namespace internal { @@ -11,60 +13,89 @@ namespace pls { class tbb_sub_task { friend class tbb_task; + // Coordinate finishing of sub_tasks std::atomic_uint32_t ref_count_; tbb_sub_task* parent_; + + // Access to TBB scheduling environment tbb_task* tbb_task_; - public: - explicit tbb_sub_task(tbb_sub_task* parent, tbb_task* tbb_task); - ~tbb_sub_task(); - void execute(); + // Double-Ended Queue management + tbb_sub_task* below_; + tbb_sub_task* above_; + // Stack Management (reset stack pointer after wait_for_all() calls) + base::aligned_stack::state stack_state_; protected: + explicit tbb_sub_task(); + tbb_sub_task(const tbb_sub_task& other); + virtual void execute_internal() = 0; // SubClass Implementations: // Do Work - // |-- Spawn Sub Task + // |-- Spawn Sub Task (new subtask; spawn(subtask);) // |-- Spawn Sub task // Do Work // |-- Wait For All // Do Work // |-- Spawn Sub Task - // Currently required to construct child... - // TODO: Allocate child with custom new(...) on stack - tbb_sub_task* parent() { return parent_; } - tbb_task* tbb_task() { return tbb_task_; } - + template + void spawn_child(const T& sub_task); void wait_for_all(); private: - tbb_sub_task* get_local_task(); + void spawn_child_internal(tbb_sub_task* sub_task); + void execute(); + + public: + virtual void test() { + std::cout << "Test" << std::endl; + } }; class tbb_task: public abstract_task { friend class tbb_sub_task; + tbb_sub_task* root_task_; - // TODO: hold stuff for double ended sub-task queue + base::aligned_stack* my_stack_; + + // Double-Ended Queue management + tbb_sub_task* top_; + tbb_sub_task* bottom_; + + // Steal Management + tbb_sub_task* last_stolen_; + + tbb_sub_task* get_local_sub_task(); + tbb_sub_task* get_stolen_sub_task(); + + bool internal_stealing(abstract_task* other_task) override; + bool split_task() override; + public: explicit tbb_task(tbb_sub_task* root_task): - abstract_task{0, 0}, - root_task_{root_task} {}; + abstract_task{0, 0}, + root_task_{root_task}, + top_{nullptr}, + bottom_{nullptr}, + last_stolen_{nullptr} { + my_stack_ = base::this_thread::state()->task_stack_; + root_task_->tbb_task_ = this; + root_task_->stack_state_ = my_stack_->save_state(); + }; void execute() override { root_task_->execute(); } + }; - bool internal_stealing(abstract_task* other_task) override { - auto cast_other_task = reinterpret_cast(other_task); - // TODO: Try to steal from the other sub-task queue - return false; - } + template + void tbb_sub_task::spawn_child(const T& task) { + static_assert(std::is_base_of::value, "Only pass tbb_sub_task subclasses!"); - bool split_task() override { - // TODO: Take an internal task and create a new tbb task from it - return false; - } - }; + T* new_task = tbb_task_->my_stack_->push(task); + spawn_child_internal(new_task); + } } } } diff --git a/lib/pls/src/internal/scheduling/tbb_task.cpp b/lib/pls/src/internal/scheduling/tbb_task.cpp index 543e7b1..81c35f6 100644 --- a/lib/pls/src/internal/scheduling/tbb_task.cpp +++ b/lib/pls/src/internal/scheduling/tbb_task.cpp @@ -1,42 +1,124 @@ +#include #include "pls/internal/scheduling/tbb_task.h" namespace pls { namespace internal { namespace scheduling { - tbb_sub_task::tbb_sub_task(tbb_sub_task *parent, class tbb_task *tbb_task): - ref_count_{0}, - parent_{parent}, - tbb_task_{tbb_task} { - parent->ref_count_++; - } + tbb_sub_task::tbb_sub_task(): + ref_count_{0}, + parent_{nullptr}, + tbb_task_{nullptr}, + below_{nullptr}, + above_{nullptr} {} - tbb_sub_task::~tbb_sub_task() { - wait_for_all(); + tbb_sub_task::tbb_sub_task(const tbb_sub_task& other) { + // Do Nothing, will be inited after this anyways } void tbb_sub_task::execute() { execute_internal(); wait_for_all(); + + if (parent_ != nullptr) { + parent_->ref_count_--; + } } - tbb_sub_task* tbb_sub_task::get_local_task() { - // TODO: get a task from the bottom of our sub-task queue + void tbb_sub_task::spawn_child_internal(tbb_sub_task* sub_task) { + // Keep our refcount up to date + ref_count_++; + + // Assign forced values + sub_task->parent_ = this; + sub_task->tbb_task_ = tbb_task_; + sub_task->stack_state_ = tbb_task_->my_stack_->save_state(); + + // Put sub_task into stealing queue + if (tbb_task_->bottom_ != nullptr) { + tbb_task_->bottom_->below_ = sub_task; + } else { + tbb_task_->top_ = sub_task; + } + sub_task->above_ = tbb_task_->bottom_; + sub_task->below_ = nullptr; + tbb_task_->bottom_ = sub_task; } void tbb_sub_task::wait_for_all() { while (ref_count_ > 0) { - tbb_sub_task* local_task = get_local_task(); + tbb_sub_task* local_task = tbb_task_->get_local_sub_task(); if (local_task != nullptr) { local_task->execute(); - continue; } else { // Try to steal work. - // External steal will be executed explicitly + // External steal will be executed implicitly if success if (tbb_task_->steal_work()) { - // TODO: Internal Success, execute stolen task + tbb_task_->last_stolen_->execute(); } } } + tbb_task_->my_stack_->reset_state(stack_state_); + } + + tbb_sub_task* tbb_task::get_local_sub_task() { + if (bottom_ == nullptr) { + return nullptr; + } + + // Remove from bottom of queue + tbb_sub_task* result = bottom_; + bottom_ = bottom_->above_; + if (bottom_ == nullptr) { + top_ = nullptr; + } else { + bottom_->below_ = nullptr; + } + + return result; + } + + tbb_sub_task* tbb_task::get_stolen_sub_task() { + if (top_ == nullptr) { + return nullptr; + } + + tbb_sub_task* result = top_; + top_ = top_->below_; + if (top_ == nullptr) { + bottom_ = nullptr; + } else { + top_->above_ = nullptr; + } + + return result; + } + + bool tbb_task::internal_stealing(abstract_task* other_task) { + auto cast_other_task = reinterpret_cast(other_task); + + auto stolen_sub_task = cast_other_task->get_stolen_sub_task(); + if (stolen_sub_task == nullptr) { + return false; + } else { + // Make sub-task belong to our tbb_task instance + stolen_sub_task->tbb_task_ = this; + stolen_sub_task->stack_state_ = my_stack_->save_state(); + // We will execute this next without explicitly moving it onto our stack storage + last_stolen_ = stolen_sub_task; + + return true; + } + } + + bool tbb_task::split_task() { + tbb_sub_task* stolen_sub_task = get_stolen_sub_task(); + if (stolen_sub_task == nullptr) { + return false; + } + + tbb_task task{stolen_sub_task}; + scheduler::execute_task(task, depth()); + return true; } } } -- libgit2 0.26.0