Add internal stealing for TBB like scheduler..

cbdc5e4f · FritzFlorian · 32135984 · cbdc5e4f · cbdc5e4f · cbdc5e4f
Commit cbdc5e4f authored Mar 29, 2019 by FritzFlorian
6 changed files
--- a/NOTES.md
+++ b/NOTES.md
@@ -4,6 +4,31 @@ A collection of stuff that we noticed during development.
 Useful later on two write a project report and to go back
 in time to find out why certain decisions where made.

+## 28.03.2018 - custom new operators
+
+When initializing sub_tasks we want to place them on our custom
+'stack like' data structure per thread. We looked at TBB's API
+and noticed them somehow implicitly setting parent relationships
+in the new operator. After further investigation we see that the
+initialization in this manner is a 'hack' to avoid passing
+of references and counters.
+
+It can be found at the bottom of the `task.h` file:
+
+```C++
+inline void *operator new( size_t bytes, const tbb::internal::allocate_child_proxy& p ) {
+    return &p.allocate(bytes);
+}
+
+inline void operator delete( void* task, const tbb::internal::allocate_child_proxy& p ) {
+    p.free( *static_cast<tbb::task*>(task) );
+}
+```
+
+It simlpy constructs a temp 'allocator type' passed as the second
+argument to new. This type then is called in new and
+allocates the memory required.
+
 ## 27.03.2019 - atomics

 C++ 11 offers atomics, however these require careful usage

--- a/lib/pls/CMakeLists.txt
+++ b/lib/pls/CMakeLists.txt
@@ -12,7 +12,8 @@ add_library(pls STATIC
            src/internal/base/aligned_stack.cpp include/pls/internal/base/aligned_stack.h
            include/pls/internal/base/system_details.h
            src/internal/scheduling/run_on_n_threads_task.cpp include/pls/internal/scheduling/run_on_n_threads_task.h
-        src/internal/scheduling/tbb_task.cpp include/pls/internal/scheduling/tbb_task.h)
+            src/internal/scheduling/tbb_task.cpp include/pls/internal/scheduling/tbb_task.h
+        )

 # Add everything in `./include` to be in the include path of this project
 target_include_directories(pls

--- a/lib/pls/include/pls/internal/base/aligned_stack.h
+++ b/lib/pls/include/pls/internal/base/aligned_stack.h
@@ -19,6 +19,8 @@ namespace pls {
                static std::uintptr_t next_alignment(std::uintptr_t size);
                static char* next_alignment(char* pointer);
            public:
+                typedef char* state;
+
                aligned_stack(): memory_start_{nullptr}, memory_end_{nullptr}, head_{nullptr} {};

                aligned_stack(char* memory_region, const std::size_t size):
@@ -27,7 +29,13 @@ namespace pls {
                    head_{next_alignment(memory_start_)} {}

                template<typename T>
-                T* push(T object) {
+                T* push(const T& object) {
+                    // Copy-Construct into desired memory location
+                    return new (push<T>())T(object);
+                }
+
+                template<typename T>
+                T* push() {
                    T* result = reinterpret_cast<T*>(head_);

                    // Move head to next aligned position after new object
@@ -36,7 +44,6 @@ namespace pls {
                        exit(1); // TODO: Exception Handling
                    }

-                    *result = object;
                    return result;
                }

@@ -46,6 +53,14 @@ namespace pls {

                    return *reinterpret_cast<T*>(head_);
                }
+
+                state save_state() {
+                    return head_;
+                }
+
+                void reset_state(state new_state) {
+                    head_ = new_state;
+                }
            };
        }
    }

--- a/lib/pls/include/pls/internal/scheduling/scheduler.h
+++ b/lib/pls/include/pls/internal/scheduling/scheduler.h
@@ -82,7 +82,7 @@ namespace pls {

                // TODO: See if we should place this differently (only for performance reasons)
                template<typename Task>
-                static void execute_task(Task task, int depth=-1) {
+                static void execute_task(Task& task, int depth=-1) {
                    static_assert(std::is_base_of<abstract_task, Task>::value, "Only pass abstract_task subclasses!");

                    auto my_state = base::this_thread::state<thread_state>();

--- a/lib/pls/include/pls/internal/scheduling/tbb_task.h
+++ b/lib/pls/include/pls/internal/scheduling/tbb_task.h
@@ -2,7 +2,9 @@
 #ifndef PLS_TBB_LIKE_TASK_H
 #define PLS_TBB_LIKE_TASK_H

+#include <pls/internal/base/aligned_stack.h>
 #include "abstract_task.h"
+#include "thread_state.h"

 namespace pls {
    namespace internal {
@@ -11,60 +13,89 @@ namespace pls {
            class tbb_sub_task {
                friend class tbb_task;

+                // Coordinate finishing of sub_tasks
                std::atomic_uint32_t ref_count_;
                tbb_sub_task* parent_;
+
+                // Access to TBB scheduling environment
                tbb_task* tbb_task_;

-            public:
-                explicit tbb_sub_task(tbb_sub_task* parent, tbb_task* tbb_task);
-                ~tbb_sub_task();
-                void execute();
+                // Double-Ended Queue management
+                tbb_sub_task* below_;
+                tbb_sub_task* above_;

+                // Stack Management (reset stack pointer after wait_for_all() calls)
+                base::aligned_stack::state stack_state_;
            protected:
+                explicit tbb_sub_task();
+                tbb_sub_task(const tbb_sub_task& other);
+
                virtual void execute_internal() = 0;
                // SubClass Implementations:
                // Do Work
-                // |-- Spawn Sub Task
+                // |-- Spawn Sub Task (new subtask; spawn(subtask);)
                // |-- Spawn Sub task
                // Do Work
                // |-- Wait For All
                // Do  Work
                // |-- Spawn Sub Task

-                // Currently required to construct child...
-                // TODO: Allocate child with custom new(...) on stack
-                tbb_sub_task* parent() { return parent_; }
-                tbb_task* tbb_task() { return tbb_task_; }
-
+                template<typename T>
+                void spawn_child(const T& sub_task);
                void wait_for_all();
            private:
-                tbb_sub_task* get_local_task();
+                void spawn_child_internal(tbb_sub_task* sub_task);
+                void execute();
+
+            public:
+                virtual void test() {
+                    std::cout << "Test" << std::endl;
+                }
            };

            class tbb_task: public abstract_task {
                friend class tbb_sub_task;
+
                tbb_sub_task* root_task_;
-                // TODO: hold stuff for double ended sub-task queue
+                base::aligned_stack* my_stack_;
+
+                // Double-Ended Queue management
+                tbb_sub_task* top_;
+                tbb_sub_task* bottom_;
+
+                // Steal Management
+                tbb_sub_task* last_stolen_;
+
+                tbb_sub_task* get_local_sub_task();
+                tbb_sub_task* get_stolen_sub_task();
+
+                bool internal_stealing(abstract_task* other_task) override;
+                bool split_task() override;

+            public:
                explicit tbb_task(tbb_sub_task* root_task):
-                    abstract_task{0, 0},
-                    root_task_{root_task} {};
+                        abstract_task{0, 0},
+                        root_task_{root_task},
+                        top_{nullptr},
+                        bottom_{nullptr},
+                        last_stolen_{nullptr} {
+                    my_stack_ = base::this_thread::state<thread_state>()->task_stack_;
+                    root_task_->tbb_task_ = this;
+                    root_task_->stack_state_ = my_stack_->save_state();
+                };

                void execute() override {
                    root_task_->execute();
                }
+            };

-                bool internal_stealing(abstract_task* other_task) override {
-                    auto cast_other_task = reinterpret_cast<tbb_task*>(other_task);
-                    // TODO: Try to steal from the other sub-task queue
-                    return false;
-                }
+            template<typename T>
+            void tbb_sub_task::spawn_child(const T& task)  {
+                static_assert(std::is_base_of<tbb_sub_task, T>::value, "Only pass tbb_sub_task subclasses!");

-                bool split_task() override {
-                    // TODO: Take an internal task and create a new tbb task from it
-                    return false;
-                }
-            };
+                T* new_task = tbb_task_->my_stack_->push(task);
+                spawn_child_internal(new_task);
+            }
        }
    }
 }

--- a/lib/pls/src/internal/scheduling/tbb_task.cpp
+++ b/lib/pls/src/internal/scheduling/tbb_task.cpp
+#include <pls/internal/scheduling/scheduler.h>
 #include "pls/internal/scheduling/tbb_task.h"

 namespace pls {
    namespace internal {
        namespace scheduling {
-            tbb_sub_task::tbb_sub_task(tbb_sub_task *parent, class tbb_task *tbb_task):
-                    ref_count_{0},
-                    parent_{parent},
-                    tbb_task_{tbb_task} {
-                parent->ref_count_++;
-            }
+            tbb_sub_task::tbb_sub_task():
+                ref_count_{0},
+                parent_{nullptr},
+                tbb_task_{nullptr},
+                below_{nullptr},
+                above_{nullptr} {}

-            tbb_sub_task::~tbb_sub_task()  {
-                wait_for_all();
+            tbb_sub_task::tbb_sub_task(const tbb_sub_task& other) {
+                // Do Nothing, will be inited after this anyways
            }

            void tbb_sub_task::execute()  {
                execute_internal();
                wait_for_all();
+
+                if (parent_ != nullptr) {
+                    parent_->ref_count_--;
+                }
            }

-            tbb_sub_task* tbb_sub_task::get_local_task() {
-                // TODO: get a task from the bottom of our sub-task queue
+            void tbb_sub_task::spawn_child_internal(tbb_sub_task* sub_task) {
+                // Keep our refcount up to date
+                ref_count_++;
+
+                // Assign forced values
+                sub_task->parent_ = this;
+                sub_task->tbb_task_ = tbb_task_;
+                sub_task->stack_state_ = tbb_task_->my_stack_->save_state();
+
+                // Put sub_task into stealing queue
+                if (tbb_task_->bottom_ != nullptr) {
+                    tbb_task_->bottom_->below_ = sub_task;
+                } else {
+                    tbb_task_->top_ = sub_task;
+                }
+                sub_task->above_ = tbb_task_->bottom_;
+                sub_task->below_ = nullptr;
+                tbb_task_->bottom_ = sub_task;
            }

            void tbb_sub_task::wait_for_all() {
                while (ref_count_ > 0) {
-                    tbb_sub_task* local_task = get_local_task();
+                    tbb_sub_task* local_task = tbb_task_->get_local_sub_task();
                    if (local_task != nullptr) {
                        local_task->execute();
-                        continue;
                    } else {
                        // Try to steal work.
-                        // External steal will be executed explicitly
+                        // External steal will be executed implicitly if success
                        if (tbb_task_->steal_work()) {
-                            // TODO: Internal Success, execute stolen task
+                            tbb_task_->last_stolen_->execute();
                        }
                    }
                }
+                tbb_task_->my_stack_->reset_state(stack_state_);
+            }
+
+            tbb_sub_task* tbb_task::get_local_sub_task() {
+                if (bottom_ == nullptr) {
+                    return nullptr;
+                }
+
+                // Remove from bottom of queue
+                tbb_sub_task* result = bottom_;
+                bottom_ = bottom_->above_;
+                if (bottom_ == nullptr) {
+                    top_ = nullptr;
+                } else {
+                    bottom_->below_ = nullptr;
+                }
+
+                return result;
+            }
+
+            tbb_sub_task* tbb_task::get_stolen_sub_task() {
+                if (top_ == nullptr) {
+                    return nullptr;
+                }
+
+                tbb_sub_task* result = top_;
+                top_ = top_->below_;
+                if (top_ == nullptr) {
+                    bottom_ = nullptr;
+                } else {
+                    top_->above_ = nullptr;
+                }
+
+                return result;
+            }
+
+            bool tbb_task::internal_stealing(abstract_task* other_task) {
+                auto cast_other_task = reinterpret_cast<tbb_task*>(other_task);
+
+                auto stolen_sub_task = cast_other_task->get_stolen_sub_task();
+                if (stolen_sub_task == nullptr) {
+                    return false;
+                } else {
+                    // Make sub-task belong to our tbb_task instance
+                    stolen_sub_task->tbb_task_ = this;
+                    stolen_sub_task->stack_state_ = my_stack_->save_state();
+                    // We will execute this next without explicitly moving it onto our stack storage
+                    last_stolen_ = stolen_sub_task;
+
+                    return true;
+                }
+            }
+
+            bool tbb_task::split_task() {
+                tbb_sub_task* stolen_sub_task = get_stolen_sub_task();
+                if (stolen_sub_task == nullptr) {
+                    return false;
+                }
+
+                tbb_task task{stolen_sub_task};
+                scheduler::execute_task(task, depth());
+                return true;
            }
        }
    }