Commit 6802681d by FritzFlorian

Move functions using templates to *_impl.h header files.

parent a8a35479
Pipeline #1148 passed with stages
in 3 minutes 34 seconds
...@@ -4,7 +4,19 @@ A collection of stuff that we noticed during development. ...@@ -4,7 +4,19 @@ A collection of stuff that we noticed during development.
Useful later on two write a project report and to go back Useful later on two write a project report and to go back
in time to find out why certain decisions where made. in time to find out why certain decisions where made.
## 09.02.2019 - Cache Alignment ## 11.04.2019 - Notes on C++ Templating
After working more with templating and talking to mike,
it seems like the common way to go is the following:
- If possible, add template arguments to
data containers only (separate from logic).
- If logic and data are coupled (like often with lambdas),
add the declaration of the interface into the normal header
some_class.h and add it's implementation into an extra implementation
file some_class_impl.h that is included at the end of the file.
## 09.04.2019 - Cache Alignment
Aligning the cache needs all parts (both data types with correct alignment Aligning the cache needs all parts (both data types with correct alignment
and base memory with correct alignment). and base memory with correct alignment).
......
...@@ -2,16 +2,19 @@ ...@@ -2,16 +2,19 @@
add_library(pls STATIC add_library(pls STATIC
include/pls/pls.h src/pls.cpp include/pls/pls.h src/pls.cpp
include/pls/algorithms/invoke_parallel.h src/algorithms/invoke_parallel.cpp include/pls/algorithms/invoke_parallel.h
include/pls/algorithms/invoke_parallel_impl.h
include/pls/internal/base/spin_lock.h src/internal/base/spin_lock.cpp include/pls/internal/base/spin_lock.h src/internal/base/spin_lock.cpp
include/pls/internal/base/thread.h src/internal/base/thread.cpp include/pls/internal/base/thread.h src/internal/base/thread.cpp
include/pls/internal/base/thread_impl.h
include/pls/internal/base/barrier.h src/internal/base/barrier.cpp include/pls/internal/base/barrier.h src/internal/base/barrier.cpp
include/pls/internal/base/system_details.h include/pls/internal/base/system_details.h
include/pls/internal/base/error_handling.h include/pls/internal/base/error_handling.h
include/pls/internal/base/alignment.h src/internal/base/alignment.cpp include/pls/internal/base/alignment.h src/internal/base/alignment.cpp
include/pls/internal/data_structures/aligned_stack.h src/internal/data_structures/aligned_stack.cpp include/pls/internal/data_structures/aligned_stack.h src/internal/data_structures/aligned_stack.cpp
include/pls/internal/data_structures/aligned_stack_impl.h
include/pls/internal/data_structures/deque.h src/internal/data_structures/deque.cpp include/pls/internal/data_structures/deque.h src/internal/data_structures/deque.cpp
include/pls/internal/helpers/prohibit_new.h include/pls/internal/helpers/prohibit_new.h
...@@ -22,6 +25,7 @@ add_library(pls STATIC ...@@ -22,6 +25,7 @@ add_library(pls STATIC
include/pls/internal/scheduling/thread_state.h src/internal/scheduling/thread_state.cpp include/pls/internal/scheduling/thread_state.h src/internal/scheduling/thread_state.cpp
include/pls/internal/scheduling/abstract_task.h src/internal/scheduling/abstract_task.cpp include/pls/internal/scheduling/abstract_task.h src/internal/scheduling/abstract_task.cpp
include/pls/internal/scheduling/scheduler.h src/internal/scheduling/scheduler.cpp include/pls/internal/scheduling/scheduler.h src/internal/scheduling/scheduler.cpp
include/pls/internal/scheduling/scheduler_impl.h
include/pls/internal/scheduling/run_on_n_threads_task.h src/internal/scheduling/run_on_n_threads_task.cpp include/pls/internal/scheduling/run_on_n_threads_task.h src/internal/scheduling/run_on_n_threads_task.cpp
include/pls/internal/scheduling/fork_join_task.h src/internal/scheduling/fork_join_task.cpp include/pls/internal/scheduling/fork_join_task.h src/internal/scheduling/fork_join_task.cpp
include/pls/internal/scheduling/scheduler_memory.h src/internal/scheduling/scheduler_memory.cpp include/pls/internal/scheduling/scheduler_memory.h src/internal/scheduling/scheduler_memory.cpp
......
...@@ -7,44 +7,15 @@ ...@@ -7,44 +7,15 @@
namespace pls { namespace pls {
namespace algorithm { namespace algorithm {
namespace internal {
using namespace ::pls::internal::scheduling;
template<typename Body>
inline void run_body(const Body& internal_body, const abstract_task::id& id) {
// Make sure we are in the context of this invoke_parallel instance,
// if not we will spawn it as a new 'fork-join-style' task.
auto current_task = scheduler::current_task();
if (current_task->unique_id() == id) {
auto current_sub_task = reinterpret_cast<fork_join_task*>(current_task)->currently_executing();
internal_body(current_sub_task);
} else {
fork_join_lambda<Body> root_body(&internal_body);
fork_join_task root_task{&root_body, id};
scheduler::execute_task(root_task);
}
}
}
template<typename Function1, typename Function2> template<typename Function1, typename Function2>
void invoke_parallel(const Function1& function1, const Function2& function2) { void invoke_parallel(const Function1& function1, const Function2& function2);
using namespace ::pls::internal::scheduling;
static abstract_task::id id{PLS_UNIQUE_ID, true};
auto internal_body = [&] (fork_join_sub_task* this_task){
auto sub_task_body_1 = [&] (fork_join_sub_task*){ function1(); };
auto sub_task_1 = fork_join_lambda<decltype(sub_task_body_1)>(&sub_task_body_1);
this_task->spawn_child(sub_task_1); template<typename Function1, typename Function2, typename Function3>
function2(); // Execute last function 'inline' without spawning a sub_task object void invoke_parallel(const Function1& function1, const Function2& function2, const Function3& function3);
this_task->wait_for_all();
};
internal::run_body(internal_body, id);
}
// ...and so on, add more if we decide to keep this design // ...and so on, add more if we decide to keep this design
} }
} }
#include "invoke_parallel_impl.h"
#endif //PLS_PARALLEL_INVOKE_H #endif //PLS_PARALLEL_INVOKE_H
#ifndef PLS_INVOKE_PARALLEL_IMPL_H
#define PLS_INVOKE_PARALLEL_IMPL_H
#include "pls/internal/scheduling/fork_join_task.h"
#include "pls/internal/scheduling/scheduler.h"
namespace pls {
namespace algorithm {
namespace internal {
using namespace ::pls::internal::scheduling;
template<typename Body>
inline void run_body(const Body& internal_body, const abstract_task::id& id) {
// Make sure we are in the context of this invoke_parallel instance,
// if not we will spawn it as a new 'fork-join-style' task.
auto current_task = scheduler::current_task();
if (current_task->unique_id() == id) {
auto current_sub_task = reinterpret_cast<fork_join_task*>(current_task)->currently_executing();
internal_body(current_sub_task);
} else {
fork_join_lambda<Body> root_body(&internal_body);
fork_join_task root_task{&root_body, id};
scheduler::execute_task(root_task);
}
}
}
template<typename Function1, typename Function2>
void invoke_parallel(const Function1& function1, const Function2& function2) {
using namespace ::pls::internal::scheduling;
static abstract_task::id id{PLS_UNIQUE_ID, true};
auto internal_body = [&] (fork_join_sub_task* this_task){
auto sub_task_body_1 = [&] (fork_join_sub_task*){ function1(); };
auto sub_task_1 = fork_join_lambda<decltype(sub_task_body_1)>(&sub_task_body_1);
this_task->spawn_child(sub_task_1);
function2(); // Execute last function 'inline' without spawning a sub_task object
this_task->wait_for_all();
};
internal::run_body(internal_body, id);
}
template<typename Function1, typename Function2, typename Function3>
void invoke_parallel(const Function1& function1, const Function2& function2, const Function3& function3) {
using namespace ::pls::internal::scheduling;
static abstract_task::id id{PLS_UNIQUE_ID, true};
auto internal_body = [&] (fork_join_sub_task* this_task){
auto sub_task_body_1 = [&] (fork_join_sub_task*){ function1(); };
auto sub_task_1 = fork_join_lambda<decltype(sub_task_body_1)>(&sub_task_body_1);
auto sub_task_body_2 = [&] (fork_join_sub_task*){ function2(); };
auto sub_task_2 = fork_join_lambda<decltype(sub_task_body_2)>(&sub_task_body_2);
this_task->spawn_child(sub_task_1);
this_task->spawn_child(sub_task_2);
function3(); // Execute last function 'inline' without spawning a sub_task object
this_task->wait_for_all();
};
internal::run_body(internal_body, id);
}
}
}
#endif //PLS_INVOKE_PARALLEL_IMPL_H
...@@ -14,7 +14,6 @@ namespace pls { ...@@ -14,7 +14,6 @@ namespace pls {
template<typename T> template<typename T>
struct aligned_wrapper { struct aligned_wrapper {
alignas(system_details::CACHE_LINE_SIZE) unsigned char data[sizeof(T)]; alignas(system_details::CACHE_LINE_SIZE) unsigned char data[sizeof(T)];
T* pointer() { return reinterpret_cast<T*>(data); } T* pointer() { return reinterpret_cast<T*>(data); }
}; };
void* allocate_aligned(size_t size); void* allocate_aligned(size_t size);
......
...@@ -49,14 +49,7 @@ namespace pls { ...@@ -49,14 +49,7 @@ namespace pls {
* @return The state pointer hold for this thread. * @return The state pointer hold for this thread.
*/ */
template<typename T> template<typename T>
static T* state() { static T* state();
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
return reinterpret_cast<T*>(pthread_getspecific(local_storage_key_));
#endif
#ifdef PLS_THREAD_SPECIFIC_COMPILER
return reinterpret_cast<T*>(local_state_);
#endif
}
/** /**
* Stores a pointer to the thread local state object. * Stores a pointer to the thread local state object.
...@@ -67,18 +60,11 @@ namespace pls { ...@@ -67,18 +60,11 @@ namespace pls {
* @param state_pointer A pointer to the threads state object. * @param state_pointer A pointer to the threads state object.
*/ */
template<typename T> template<typename T>
static void set_state(T* state_pointer) { static void set_state(T* state_pointer);
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
pthread_setspecific(this_thread::local_storage_key_, (void*)state_pointer);
#endif
#ifdef PLS_THREAD_SPECIFIC_COMPILER
local_state_ = state_pointer;
#endif
}
}; };
/** /**
* Abstraction for starting a function in a sparate thread. * Abstraction for starting a function in a separate thread.
* *
* @tparam Function Lambda being started on the new thread. * @tparam Function Lambda being started on the new thread.
* @tparam State State type held for this thread. * @tparam State State type held for this thread.
...@@ -108,52 +94,13 @@ namespace pls { ...@@ -108,52 +94,13 @@ namespace pls {
// Keep handle to native implementation // Keep handle to native implementation
pthread_t pthread_thread_; pthread_t pthread_thread_;
static void* start_pthread_internal(void* thread_pointer) { static void* start_pthread_internal(void* thread_pointer);
auto my_thread = reinterpret_cast<thread*>(thread_pointer);
Function my_function_copy = my_thread->function_;
State* my_state_pointer_copy = my_thread->state_pointer_;
// Now we have copies of everything we need on the stack.
// The original thread object can be moved freely (no more
// references to its memory location).
my_thread->startup_flag_->clear();
this_thread::set_state(my_state_pointer_copy);
my_function_copy();
// Finished executing the user function
pthread_exit(nullptr);
}
public: public:
thread(): function_{}, state_pointer_{nullptr}, startup_flag_{nullptr}, pthread_thread_{} {} explicit thread(const Function& function, State* state_pointer);
explicit thread(const Function& function, State* state_pointer):
function_{function},
state_pointer_{state_pointer},
startup_flag_{nullptr},
pthread_thread_{} {
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
if (!this_thread::local_storage_key_initialized_) {
pthread_key_create(&this_thread::local_storage_key_, nullptr);
this_thread::local_storage_key_initialized_ = true;
}
#endif
// We only need this during startup, will be destroyed when out of scope
std::atomic_flag startup_flag{ATOMIC_FLAG_INIT};
startup_flag_ = &startup_flag;
startup_flag.test_and_set(); // Set the flag, pthread will clear it when it is safe to return
pthread_create(&pthread_thread_, nullptr, start_pthread_internal, (void *)(this));
while (startup_flag.test_and_set())
; // Busy waiting for the starting flag to clear
}
public: public:
void join() { void join();
pthread_join(pthread_thread_, nullptr);
}
// make object move only // make object move only
thread(thread&&) noexcept = default; thread(thread&&) noexcept = default;
...@@ -164,17 +111,12 @@ namespace pls { ...@@ -164,17 +111,12 @@ namespace pls {
}; };
template<typename Function, typename State> template<typename Function, typename State>
thread<Function, State> start_thread(const Function& function, State* state_pointer) { thread<Function, State> start_thread(const Function& function, State* state_pointer);
return thread<Function, State>(function, state_pointer);
}
template<typename Function> template<typename Function>
thread<Function, void> start_thread(const Function& function) { thread<Function, void> start_thread(const Function& function);
return thread<Function, void>(function, nullptr);
}
} }
} }
} }
#include "thread_impl.h"
#endif //PLS_THREAD_H #endif //PLS_THREAD_H
#ifndef PLS_THREAD_IMPL_H
#define PLS_THREAD_IMPL_H
namespace pls {
namespace internal {
namespace base {
template<typename T>
T* this_thread::state() {
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
return reinterpret_cast<T*>(pthread_getspecific(local_storage_key_));
#endif
#ifdef PLS_THREAD_SPECIFIC_COMPILER
return reinterpret_cast<T*>(local_state_);
#endif
}
template<typename T>
void this_thread::set_state(T* state_pointer) {
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
pthread_setspecific(this_thread::local_storage_key_, (void*)state_pointer);
#endif
#ifdef PLS_THREAD_SPECIFIC_COMPILER
local_state_ = state_pointer;
#endif
}
template<typename Function, typename State>
void* thread<Function, State>::start_pthread_internal(void* thread_pointer) {
auto my_thread = reinterpret_cast<thread*>(thread_pointer);
Function my_function_copy = my_thread->function_;
State* my_state_pointer_copy = my_thread->state_pointer_;
// Now we have copies of everything we need on the stack.
// The original thread object can be moved freely (no more
// references to its memory location).
my_thread->startup_flag_->clear();
this_thread::set_state(my_state_pointer_copy);
my_function_copy();
// Finished executing the user function
pthread_exit(nullptr);
}
template<typename Function, typename State>
thread<Function, State>::thread(const Function& function, State* state_pointer):
function_{function},
state_pointer_{state_pointer},
startup_flag_{nullptr},
pthread_thread_{} {
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
if (!this_thread::local_storage_key_initialized_) {
pthread_key_create(&this_thread::local_storage_key_, nullptr);
this_thread::local_storage_key_initialized_ = true;
}
#endif
// We only need this during startup, will be destroyed when out of scope
std::atomic_flag startup_flag{ATOMIC_FLAG_INIT};
startup_flag_ = &startup_flag;
startup_flag.test_and_set(); // Set the flag, pthread will clear it when it is safe to return
pthread_create(&pthread_thread_, nullptr, start_pthread_internal, (void *)(this));
while (startup_flag.test_and_set())
; // Busy waiting for the starting flag to clear
}
template<typename Function, typename State>
void thread<Function, State>::join() {
pthread_join(pthread_thread_, nullptr);
}
template<typename Function, typename State>
thread<Function, State> start_thread(const Function& function, State* state_pointer) {
return thread<Function, State>(function, state_pointer);
}
template<typename Function>
thread<Function, void> start_thread(const Function& function) {
return thread<Function, void>(function, nullptr);
}
}
}
}
#endif //PLS_THREAD_IMPL_H
...@@ -37,40 +37,18 @@ namespace pls { ...@@ -37,40 +37,18 @@ namespace pls {
aligned_stack(char* memory_region, std::size_t size); aligned_stack(char* memory_region, std::size_t size);
template<typename T> template<typename T>
T* push(const T& object) { T* push(const T& object);
// Copy-Construct
return new ((void*)push<T>())T(object);
}
template<typename T> template<typename T>
void* push() { void* push();
void* result = reinterpret_cast<T*>(head_);
// Move head to next aligned position after new object
head_ = base::alignment::next_alignment(head_ + sizeof(T));
if (head_ >= memory_end_) {
PLS_ERROR("Tried to allocate object on alligned_stack without sufficient memory!");
}
return result;
}
template<typename T> template<typename T>
T pop() { T pop();
head_ = head_ - base::alignment::next_alignment(sizeof(T));
return *reinterpret_cast<T*>(head_);
}
state save_state() { state save_state() const { return head_; }
return head_; void reset_state(state new_state) { head_ = new_state; }
}
void reset_state(state new_state) {
head_ = new_state;
}
}; };
} }
} }
} }
#include "aligned_stack_impl.h"
#endif //PLS_ALIGNED_STACK_H #endif //PLS_ALIGNED_STACK_H
#ifndef PLS_ALIGNED_STACK_IMPL_H
#define PLS_ALIGNED_STACK_IMPL_H
namespace pls {
namespace internal {
namespace data_structures {
template<typename T>
T* aligned_stack::push(const T& object) {
// Copy-Construct
return new ((void*)push<T>())T(object);
}
template<typename T>
void* aligned_stack::push() {
void* result = reinterpret_cast<T*>(head_);
// Move head to next aligned position after new object
head_ = base::alignment::next_alignment(head_ + sizeof(T));
if (head_ >= memory_end_) {
PLS_ERROR("Tried to allocate object on alligned_stack without sufficient memory!");
}
return result;
}
template<typename T>
T aligned_stack::pop() {
head_ = head_ - base::alignment::next_alignment(sizeof(T));
return *reinterpret_cast<T*>(head_);
}
}
}
}
#endif //PLS_ALIGNED_STACK_IMPL_H
...@@ -77,27 +77,9 @@ namespace pls { ...@@ -77,27 +77,9 @@ namespace pls {
bool split_task(base::spin_lock* /*lock*/) override; bool split_task(base::spin_lock* /*lock*/) override;
public: public:
explicit fork_join_task(fork_join_sub_task* root_task, const abstract_task::id& id): explicit fork_join_task(fork_join_sub_task* root_task, const abstract_task::id& id);
abstract_task{0, id}, void execute() override;
root_task_{root_task}, fork_join_sub_task* currently_executing() const;
currently_executing_{nullptr},
my_stack_{nullptr},
deque_{},
last_stolen_{nullptr} {};
void execute() override {
PROFILE_WORK_BLOCK("execute fork_join_task");
// Bind this instance to our OS thread
my_stack_ = base::this_thread::state<thread_state>()->task_stack_;
root_task_->tbb_task_ = this;
root_task_->stack_state_ = my_stack_->save_state();
// Execute it on our OS thread until its finished
root_task_->execute();
}
fork_join_sub_task* currently_executing() const { return currently_executing_; }
}; };
template<typename T> template<typename T>
......
...@@ -34,76 +34,34 @@ namespace pls { ...@@ -34,76 +34,34 @@ namespace pls {
explicit scheduler(scheduler_memory* memory, unsigned int num_threads); explicit scheduler(scheduler_memory* memory, unsigned int num_threads);
~scheduler(); ~scheduler();
/**
* Wakes up the thread pool.
* Code inside the Function lambda can invoke all parallel APIs.
*
* @param work_section generic function or lambda to be executed in the scheduler's context.
*/
template<typename Function> template<typename Function>
void perform_work(Function work_section) { void perform_work(Function work_section);
PROFILE_WORK_BLOCK("scheduler::perform_work")
root_task<Function> master{work_section}; /**
* Executes a top-level-task (children of abstract_task) on this thread.
// Push root task on stacks *
auto new_master = memory_->task_stack_for(0)->push(master); * @param task The task to be executed.
memory_->thread_state_for(0)->root_task_ = new_master; * @param depth Optional: depth of the new task, otherwise set implicitly.
memory_->thread_state_for(0)->current_task_ = new_master; */
for (unsigned int i = 1; i < num_threads_; i++) {
root_worker_task<Function> worker{new_master};
auto new_worker = memory_->task_stack_for(0)->push(worker);
memory_->thread_state_for(i)->root_task_ = new_worker;
memory_->thread_state_for(i)->current_task_ = new_worker;
}
// Perform and wait for work
sync_barrier_.wait(); // Trigger threads to wake up
sync_barrier_.wait(); // Wait for threads to finish
// Clean up stack
memory_->task_stack_for(0)->pop<typeof(master)>();
for (unsigned int i = 1; i < num_threads_; i++) {
root_worker_task<Function> worker{new_master};
memory_->task_stack_for(0)->pop<typeof(worker)>();
}
}
// TODO: See if we should place this differently (only for performance reasons)
template<typename Task> template<typename Task>
static void execute_task(Task& task, int depth=-1) { static void execute_task(Task& task, int depth=-1);
static_assert(std::is_base_of<abstract_task, Task>::value, "Only pass abstract_task subclasses!");
auto my_state = base::this_thread::state<thread_state>();
abstract_task* old_task;
abstract_task* new_task;
// Init Task
{
std::lock_guard<base::spin_lock> lock{my_state->lock_};
old_task = my_state->current_task_;
new_task = my_state->task_stack_->push(task);
new_task->set_depth(depth >= 0 ? depth : old_task->depth() + 1);
my_state->current_task_ = new_task;
old_task->set_child(new_task);
}
// Run Task
new_task->execute();
// Teardown state back to before the task was executed
{
std::lock_guard<base::spin_lock> lock{my_state->lock_};
old_task->set_child(nullptr);
my_state->current_task_ = old_task;
my_state->task_stack_->pop<Task>();
}
}
static abstract_task* current_task() { return base::this_thread::state<thread_state>()->current_task_; } static abstract_task* current_task() { return base::this_thread::state<thread_state>()->current_task_; }
void terminate(bool wait_for_workers=true); void terminate(bool wait_for_workers=true);
unsigned int num_threads() const { return num_threads_; } unsigned int num_threads() const { return num_threads_; }
thread_state* thread_state_for(size_t id) { return memory_->thread_state_for(id); } thread_state* thread_state_for(size_t id) { return memory_->thread_state_for(id); }
}; };
} }
} }
} }
#include "scheduler_impl.h"
#endif //PLS_SCHEDULER_H #endif //PLS_SCHEDULER_H
#ifndef PLS_SCHEDULER_IMPL_H
#define PLS_SCHEDULER_IMPL_H
namespace pls {
namespace internal {
namespace scheduling {
template<typename Function>
void scheduler::perform_work(Function work_section) {
PROFILE_WORK_BLOCK("scheduler::perform_work")
root_task<Function> master{work_section};
// Push root task on stacks
auto new_master = memory_->task_stack_for(0)->push(master);
memory_->thread_state_for(0)->root_task_ = new_master;
memory_->thread_state_for(0)->current_task_ = new_master;
for (unsigned int i = 1; i < num_threads_; i++) {
root_worker_task<Function> worker{new_master};
auto new_worker = memory_->task_stack_for(0)->push(worker);
memory_->thread_state_for(i)->root_task_ = new_worker;
memory_->thread_state_for(i)->current_task_ = new_worker;
}
// Perform and wait for work
sync_barrier_.wait(); // Trigger threads to wake up
sync_barrier_.wait(); // Wait for threads to finish
// Clean up stack
memory_->task_stack_for(0)->pop<typeof(master)>();
for (unsigned int i = 1; i < num_threads_; i++) {
root_worker_task<Function> worker{new_master};
memory_->task_stack_for(0)->pop<typeof(worker)>();
}
}
template<typename Task>
void scheduler::execute_task(Task& task, int depth) {
static_assert(std::is_base_of<abstract_task, Task>::value, "Only pass abstract_task subclasses!");
auto my_state = base::this_thread::state<thread_state>();
abstract_task* old_task;
abstract_task* new_task;
// Init Task
{
std::lock_guard<base::spin_lock> lock{my_state->lock_};
old_task = my_state->current_task_;
new_task = my_state->task_stack_->push(task);
new_task->set_depth(depth >= 0 ? depth : old_task->depth() + 1);
my_state->current_task_ = new_task;
old_task->set_child(new_task);
}
// Run Task
new_task->execute();
// Teardown state back to before the task was executed
{
std::lock_guard<base::spin_lock> lock{my_state->lock_};
old_task->set_child(nullptr);
my_state->current_task_ = old_task;
my_state->task_stack_->pop<Task>();
}
}
}
}
}
#endif //PLS_SCHEDULER_IMPL_H
#include "pls/algorithms/invoke_parallel.h"
...@@ -107,6 +107,28 @@ namespace pls { ...@@ -107,6 +107,28 @@ namespace pls {
scheduler::execute_task(task, depth()); scheduler::execute_task(task, depth());
return true; return true;
} }
void fork_join_task::execute() {
PROFILE_WORK_BLOCK("execute fork_join_task");
// Bind this instance to our OS thread
my_stack_ = base::this_thread::state<thread_state>()->task_stack_;
root_task_->tbb_task_ = this;
root_task_->stack_state_ = my_stack_->save_state();
// Execute it on our OS thread until its finished
root_task_->execute();
}
fork_join_sub_task* fork_join_task::currently_executing() const { return currently_executing_; }
fork_join_task::fork_join_task(fork_join_sub_task* root_task, const abstract_task::id& id):
abstract_task{0, id},
root_task_{root_task},
currently_executing_{nullptr},
my_stack_{nullptr},
deque_{},
last_stolen_{nullptr} {};
} }
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment