diff --git a/lib/pls/include/pls/internal/base/spin_lock.h b/lib/pls/include/pls/internal/base/spin_lock.h index cfd3143..c2b98c8 100644 --- a/lib/pls/include/pls/internal/base/spin_lock.h +++ b/lib/pls/include/pls/internal/base/spin_lock.h @@ -17,9 +17,7 @@ namespace pls { public: spin_lock(): flag_{ATOMIC_FLAG_INIT}, yield_at_tries_{1024} {}; - spin_lock(const spin_lock& other): flag_{ATOMIC_FLAG_INIT}, yield_at_tries_{other.yield_at_tries_} { - std::cout << "Spinlock Moved!" << std::endl; - } + spin_lock(const spin_lock& other): flag_{ATOMIC_FLAG_INIT}, yield_at_tries_{other.yield_at_tries_} {} void lock(); void unlock(); diff --git a/lib/pls/include/pls/internal/scheduling/scheduler.h b/lib/pls/include/pls/internal/scheduling/scheduler.h index 0726c06..164eaff 100644 --- a/lib/pls/include/pls/internal/scheduling/scheduler.h +++ b/lib/pls/include/pls/internal/scheduling/scheduler.h @@ -65,19 +65,28 @@ namespace pls { template void perform_work(Function work_section) { root_task master{work_section}; - root_worker_task worker{&master}; // Push root task on stacks - memory_->thread_state_for(0)->root_task_ = &master; - memory_->thread_state_for(0)->current_task_ = &master; + auto new_master = memory_->task_stack_for(0)->push(master); + memory_->thread_state_for(0)->root_task_ = new_master; + memory_->thread_state_for(0)->current_task_ = new_master; for (unsigned int i = 1; i < num_threads_; i++) { - memory_->thread_state_for(i)->root_task_ = &worker; - memory_->thread_state_for(i)->current_task_ = &worker; + root_worker_task worker{new_master}; + auto new_worker = memory_->task_stack_for(0)->push(worker); + memory_->thread_state_for(i)->root_task_ = new_worker; + memory_->thread_state_for(i)->current_task_ = new_worker; } // Perform and wait for work sync_barrier_.wait(); // Trigger threads to wake up sync_barrier_.wait(); // Wait for threads to finish + + // Clean up stack + memory_->task_stack_for(0)->pop(); + for (unsigned int i = 1; i < num_threads_; i++) { + root_worker_task worker{new_master}; + memory_->task_stack_for(0)->pop(); + } } // TODO: See if we should place this differently (only for performance reasons) @@ -86,24 +95,31 @@ namespace pls { static_assert(std::is_base_of::value, "Only pass abstract_task subclasses!"); auto my_state = base::this_thread::state(); - auto current_task = my_state->current_task_; + abstract_task* old_task; + abstract_task* new_task; // Init Task { std::lock_guard lock{my_state->lock_}; - task.set_depth(depth >= 0 ? depth : current_task->depth() + 1); - my_state->current_task_ = &task; - current_task->set_child(&task); + old_task = my_state->current_task_; + new_task = my_state->task_stack_->push(task); + + new_task->set_depth(depth >= 0 ? depth : old_task->depth() + 1); + my_state->current_task_ = new_task; + old_task->set_child(new_task); } // Run Task - task.execute(); + new_task->execute(); // Teardown state back to before the task was executed { std::lock_guard lock{my_state->lock_}; - current_task->set_child(nullptr); - my_state->current_task_ = current_task; + + old_task->set_child(nullptr); + my_state->current_task_ = old_task; + + my_state->task_stack_->pop(); } } diff --git a/lib/pls/include/pls/internal/scheduling/tbb_task.h b/lib/pls/include/pls/internal/scheduling/tbb_task.h index 70d34da..fd70430 100644 --- a/lib/pls/include/pls/internal/scheduling/tbb_task.h +++ b/lib/pls/include/pls/internal/scheduling/tbb_task.h @@ -74,13 +74,15 @@ namespace pls { abstract_task{0, 0}, root_task_{root_task}, deque_{}, - last_stolen_{nullptr} { + last_stolen_{nullptr} {}; + + void execute() override { + // Bind this instance to our OS thread my_stack_ = base::this_thread::state()->task_stack_; root_task_->tbb_task_ = this; root_task_->stack_state_ = my_stack_->save_state(); - }; - void execute() override { + // Execute it on our OS thread until its finished root_task_->execute(); } }; diff --git a/lib/pls/src/internal/base/spin_lock.cpp b/lib/pls/src/internal/base/spin_lock.cpp index 4c61216..c99019b 100644 --- a/lib/pls/src/internal/base/spin_lock.cpp +++ b/lib/pls/src/internal/base/spin_lock.cpp @@ -8,18 +8,16 @@ namespace pls { // also act as a strict memory fence. void spin_lock::lock() { int tries = 0; - while (flag_.test_and_set(std::memory_order_acquire)) { + while (flag_.test_and_set(std::memory_order_seq_cst)) { tries++; if (tries % yield_at_tries_ == 0) { this_thread::yield(); } } - std::atomic_thread_fence(std::memory_order_seq_cst); } void spin_lock::unlock() { - flag_.clear(std::memory_order_release); - std::atomic_thread_fence(std::memory_order_seq_cst); + flag_.clear(std::memory_order_seq_cst); } } } diff --git a/lib/pls/src/internal/scheduling/abstract_task.cpp b/lib/pls/src/internal/scheduling/abstract_task.cpp index 3cd709d..0694f6f 100644 --- a/lib/pls/src/internal/scheduling/abstract_task.cpp +++ b/lib/pls/src/internal/scheduling/abstract_task.cpp @@ -42,7 +42,8 @@ namespace pls { // Execute 'top level task steal' if possible // (only try deeper tasks to keep depth restricted stealing) while (current_task != nullptr) { - if (current_task->split_task(&target_state->lock_)) { + auto lock = &target_state->lock_; + if (current_task->split_task(lock)) { // internal steal was no success (we did a top level task steal) return false; } diff --git a/lib/pls/src/internal/scheduling/tbb_task.cpp b/lib/pls/src/internal/scheduling/tbb_task.cpp index 87c3cab..4708a62 100644 --- a/lib/pls/src/internal/scheduling/tbb_task.cpp +++ b/lib/pls/src/internal/scheduling/tbb_task.cpp @@ -1,4 +1,4 @@ -#include +#include "pls/internal/scheduling/scheduler.h" #include "pls/internal/scheduling/tbb_task.h" namespace pls { @@ -82,11 +82,12 @@ namespace pls { if (stolen_sub_task == nullptr) { return false; } + tbb_task task{stolen_sub_task}; + // In success case, unlock. // TODO: this locking is complicated and error prone. lock->unlock(); - tbb_task task{stolen_sub_task}; scheduler::execute_task(task, depth()); return true; } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 4c5fa7c..dbe5d58 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,4 +1,4 @@ add_executable(tests main.cpp - base_tests.cpp) + base_tests.cpp scheduling_tests.cpp) target_link_libraries(tests catch2 pls) diff --git a/test/scheduling_tests.cpp b/test/scheduling_tests.cpp new file mode 100644 index 0000000..47ff5e2 --- /dev/null +++ b/test/scheduling_tests.cpp @@ -0,0 +1,79 @@ +#include + +#include + +using namespace pls; + +class once_sub_task: public tbb_sub_task { + std::atomic* counter_; + int children_; + +protected: + void execute_internal() override { + (*counter_)++; + for (int i = 0; i < children_; i++) { + spawn_child(once_sub_task(counter_, children_ - 1)); + } + } + +public: + explicit once_sub_task(std::atomic* counter, int children): + tbb_sub_task(), + counter_{counter}, + children_{children} {} +}; + +class force_steal_sub_task: public tbb_sub_task { + std::atomic* parent_counter_; + std::atomic* overall_counter_; + +protected: + void execute_internal() override { + (*overall_counter_)--; + if (overall_counter_->load() > 0) { + std::atomic counter{1}; + spawn_child(force_steal_sub_task(&counter, overall_counter_)); + while (counter.load() > 0) + ; // Spin... + } + + (*parent_counter_)--; + } + +public: + explicit force_steal_sub_task(std::atomic* parent_counter, std::atomic* overall_counter): + tbb_sub_task(), + parent_counter_{parent_counter}, + overall_counter_{overall_counter} {} +}; + +TEST_CASE( "tbb task are scheduled correctly", "[internal/scheduling/tbb_task.h]") { + static static_scheduler_memory<8, 2 << 12> my_scheduler_memory; + + SECTION("tasks are executed exactly once") { + scheduler my_scheduler{&my_scheduler_memory, 2}; + int start_counter = 4; + int total_tasks = 1 + 4 + 4 * 3 + 4 * 3 * 2 + 4 * 3 * 2 * 1; + std::atomic counter{0}; + + my_scheduler.perform_work([&] (){ + once_sub_task sub_task{&counter, start_counter}; + tbb_task task{&sub_task}; + scheduler::execute_task(task); + }); + + REQUIRE(counter.load() == total_tasks); + my_scheduler.terminate(true); + } + + SECTION("tasks can be stolen") { + scheduler my_scheduler{&my_scheduler_memory, 8}; + my_scheduler.perform_work([&] (){ + std::atomic dummy_parent{1}, overall_counter{8}; + force_steal_sub_task sub_task{&dummy_parent, &overall_counter}; + tbb_task task{&sub_task}; + scheduler::execute_task(task); + }); + my_scheduler.terminate(true); + } +}