Commit aa270645 by FritzFlorian

Reformate code to fit GNU code formating style.

parent 3ff10baa
Pipeline #1157 passed with stages
in 3 minutes 36 seconds
...@@ -73,7 +73,6 @@ complex_vector prepare_input(int input_size) { ...@@ -73,7 +73,6 @@ complex_vector prepare_input(int input_size) {
return data; return data;
} }
int main() { int main() {
PROFILE_ENABLE PROFILE_ENABLE
complex_vector initial_input = prepare_input(INPUT_SIZE); complex_vector initial_input = prepare_input(INPUT_SIZE);
......
...@@ -10,8 +10,9 @@ ...@@ -10,8 +10,9 @@
#include <pls/internal/scheduling/root_task.h> #include <pls/internal/scheduling/root_task.h>
#include <pls/internal/helpers/unique_id.h> #include <pls/internal/helpers/unique_id.h>
int main() { int main() {
std::cout << pls::internal::scheduling::root_task<void(*)>::create_id().type_.hash_code() << std::endl; std::cout << pls::internal::scheduling::root_task<void (*)>::create_id().type_.hash_code() << std::endl;
std::cout << pls::internal::helpers::unique_id::create<pls::internal::scheduling::root_task<void(*)>>().type_.hash_code() << std::endl; std::cout
<< pls::internal::helpers::unique_id::create<pls::internal::scheduling::root_task<void (*)>>().type_.hash_code()
<< std::endl;
} }
...@@ -5,9 +5,8 @@ using namespace pls::internal::base; ...@@ -5,9 +5,8 @@ using namespace pls::internal::base;
int global = 0; int global = 0;
int main() { int main() {
// Try to use every feature, to trigger the prohibited use of new if found somewhere // Try to use every feature, to trigger the prohibited use of new if found somewhere
auto t1 = start_thread([] (){}); auto t1 = start_thread([]() {});
t1.join(); t1.join();
} }
...@@ -6,15 +6,17 @@ ...@@ -6,15 +6,17 @@
#include "pls/internal/scheduling/scheduler.h" #include "pls/internal/scheduling/scheduler.h"
namespace pls { namespace pls {
namespace algorithm { namespace algorithm {
template<typename Function1, typename Function2>
void invoke_parallel(const Function1& function1, const Function2& function2);
template<typename Function1, typename Function2, typename Function3> template<typename Function1, typename Function2>
void invoke_parallel(const Function1& function1, const Function2& function2, const Function3& function3); void invoke_parallel(const Function1 &function1, const Function2 &function2);
// ...and so on, add more if we decide to keep this design template<typename Function1, typename Function2, typename Function3>
} void invoke_parallel(const Function1 &function1, const Function2 &function2, const Function3 &function3);
// ...and so on, add more if we decide to keep this design
}
} }
#include "invoke_parallel_impl.h" #include "invoke_parallel_impl.h"
......
...@@ -7,34 +7,35 @@ ...@@ -7,34 +7,35 @@
#include "pls/internal/helpers/unique_id.h" #include "pls/internal/helpers/unique_id.h"
namespace pls { namespace pls {
namespace algorithm { namespace algorithm {
namespace internal { namespace internal {
using namespace ::pls::internal::scheduling;
using namespace ::pls::internal::scheduling;
template<typename Body> template<typename Body>
inline void run_body(const Body& internal_body, const abstract_task::id& id) { inline void run_body(const Body &internal_body, const abstract_task::id &id) {
// Make sure we are in the context of this invoke_parallel instance, // Make sure we are in the context of this invoke_parallel instance,
// if not we will spawn it as a new 'fork-join-style' task. // if not we will spawn it as a new 'fork-join-style' task.
auto current_task = scheduler::current_task(); auto current_task = scheduler::current_task();
if (current_task->unique_id() == id) { if (current_task->unique_id() == id) {
auto current_sub_task = reinterpret_cast<fork_join_task*>(current_task)->currently_executing(); auto current_sub_task = reinterpret_cast<fork_join_task *>(current_task)->currently_executing();
internal_body(current_sub_task); internal_body(current_sub_task);
} else { } else {
fork_join_lambda<Body> root_body(&internal_body); fork_join_lambda<Body> root_body(&internal_body);
fork_join_task root_task{&root_body, id}; fork_join_task root_task{&root_body, id};
scheduler::execute_task(root_task); scheduler::execute_task(root_task);
} }
} }
} }
template<typename Function1, typename Function2> template<typename Function1, typename Function2>
void invoke_parallel(const Function1& function1, const Function2& function2) { void invoke_parallel(const Function1 &function1, const Function2 &function2) {
using namespace ::pls::internal::scheduling; using namespace ::pls::internal::scheduling;
using namespace ::pls::internal::helpers; using namespace ::pls::internal::helpers;
static abstract_task::id id = unique_id::create<Function1, Function2>(); static abstract_task::id id = unique_id::create<Function1, Function2>();
auto internal_body = [&] (fork_join_sub_task* this_task){ auto internal_body = [&](fork_join_sub_task *this_task) {
auto sub_task_body_1 = [&] (fork_join_sub_task*){ function1(); }; auto sub_task_body_1 = [&](fork_join_sub_task *) { function1(); };
auto sub_task_1 = fork_join_lambda<decltype(sub_task_body_1)>(&sub_task_body_1); auto sub_task_1 = fork_join_lambda<decltype(sub_task_body_1)>(&sub_task_body_1);
this_task->spawn_child(sub_task_1); this_task->spawn_child(sub_task_1);
...@@ -43,18 +44,18 @@ namespace pls { ...@@ -43,18 +44,18 @@ namespace pls {
}; };
internal::run_body(internal_body, id); internal::run_body(internal_body, id);
} }
template<typename Function1, typename Function2, typename Function3> template<typename Function1, typename Function2, typename Function3>
void invoke_parallel(const Function1& function1, const Function2& function2, const Function3& function3) { void invoke_parallel(const Function1 &function1, const Function2 &function2, const Function3 &function3) {
using namespace ::pls::internal::scheduling; using namespace ::pls::internal::scheduling;
using namespace ::pls::internal::helpers; using namespace ::pls::internal::helpers;
static abstract_task::id id = unique_id::create<Function1, Function2, Function3>(); static abstract_task::id id = unique_id::create<Function1, Function2, Function3>();
auto internal_body = [&] (fork_join_sub_task* this_task){ auto internal_body = [&](fork_join_sub_task *this_task) {
auto sub_task_body_1 = [&] (fork_join_sub_task*){ function1(); }; auto sub_task_body_1 = [&](fork_join_sub_task *) { function1(); };
auto sub_task_1 = fork_join_lambda<decltype(sub_task_body_1)>(&sub_task_body_1); auto sub_task_1 = fork_join_lambda<decltype(sub_task_body_1)>(&sub_task_body_1);
auto sub_task_body_2 = [&] (fork_join_sub_task*){ function2(); }; auto sub_task_body_2 = [&](fork_join_sub_task *) { function2(); };
auto sub_task_2 = fork_join_lambda<decltype(sub_task_body_2)>(&sub_task_body_2); auto sub_task_2 = fork_join_lambda<decltype(sub_task_body_2)>(&sub_task_body_2);
this_task->spawn_child(sub_task_1); this_task->spawn_child(sub_task_1);
...@@ -64,8 +65,9 @@ namespace pls { ...@@ -64,8 +65,9 @@ namespace pls {
}; };
internal::run_body(internal_body, id); internal::run_body(internal_body, id);
} }
}
}
} }
#endif //PLS_INVOKE_PARALLEL_IMPL_H #endif //PLS_INVOKE_PARALLEL_IMPL_H
...@@ -8,21 +8,23 @@ ...@@ -8,21 +8,23 @@
#include "system_details.h" #include "system_details.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace base { namespace base {
namespace alignment { namespace alignment {
template<typename T>
struct aligned_wrapper { template<typename T>
struct aligned_wrapper {
alignas(system_details::CACHE_LINE_SIZE) unsigned char data[sizeof(T)]; alignas(system_details::CACHE_LINE_SIZE) unsigned char data[sizeof(T)];
T* pointer() { return reinterpret_cast<T*>(data); } T *pointer() { return reinterpret_cast<T *>(data); }
}; };
void* allocate_aligned(size_t size); void *allocate_aligned(size_t size);
std::uintptr_t next_alignment(std::uintptr_t size); std::uintptr_t next_alignment(std::uintptr_t size);
char* next_alignment(char* pointer); char *next_alignment(char *pointer);
}
} }
} }
}
} }
#endif //PLS_ALIGNMENT_H #endif //PLS_ALIGNMENT_H
...@@ -5,9 +5,10 @@ ...@@ -5,9 +5,10 @@
#include <pthread.h> #include <pthread.h>
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace base { namespace base {
/**
/**
* Provides standard barrier behaviour. * Provides standard barrier behaviour.
* `count` threads have to call `wait()` before any of the `wait()` calls returns, * `count` threads have to call `wait()` before any of the `wait()` calls returns,
* thus blocking all threads until everyone reached the barrier. * thus blocking all threads until everyone reached the barrier.
...@@ -15,7 +16,7 @@ namespace pls { ...@@ -15,7 +16,7 @@ namespace pls {
* PORTABILITY: * PORTABILITY:
* Current implementation is based on pthreads. * Current implementation is based on pthreads.
*/ */
class barrier { class barrier {
pthread_barrier_t barrier_; pthread_barrier_t barrier_;
public: public:
...@@ -23,9 +24,10 @@ namespace pls { ...@@ -23,9 +24,10 @@ namespace pls {
~barrier(); ~barrier();
void wait(); void wait();
}; };
}
} }
}
} }
#endif //PLS_BARRIER_H #endif //PLS_BARRIER_H
...@@ -6,12 +6,14 @@ ...@@ -6,12 +6,14 @@
#include "ttas_spin_lock.h" #include "ttas_spin_lock.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace base { namespace base {
// Default Spin-Lock implementation for this project.
using spin_lock = tas_spin_lock; // Default Spin-Lock implementation for this project.
} using spin_lock = tas_spin_lock;
}
}
}
} }
#endif //PLS_SPINLOCK_H #endif //PLS_SPINLOCK_H
...@@ -5,29 +5,31 @@ ...@@ -5,29 +5,31 @@
#include <cstdint> #include <cstdint>
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace base { namespace base {
/**
/**
* Collection of system details, e.g. hardware cache line size. * Collection of system details, e.g. hardware cache line size.
* *
* PORTABILITY: * PORTABILITY:
* Currently sane default values for x86. * Currently sane default values for x86.
*/ */
namespace system_details { namespace system_details {
/** /**
* Most processors have 64 byte cache lines * Most processors have 64 byte cache lines
*/ */
constexpr std::uintptr_t CACHE_LINE_SIZE = 64; constexpr std::uintptr_t CACHE_LINE_SIZE = 64;
/** /**
* Choose one of the following ways to store thread specific data. * Choose one of the following ways to store thread specific data.
* Try to choose the fastest available on this processor/system. * Try to choose the fastest available on this processor/system.
*/ */
// #define PLS_THREAD_SPECIFIC_PTHREAD // #define PLS_THREAD_SPECIFIC_PTHREAD
#define PLS_THREAD_SPECIFIC_COMPILER #define PLS_THREAD_SPECIFIC_COMPILER
}
} }
} }
}
} }
#endif //PLS_SYSTEM_DETAILS_H #endif //PLS_SYSTEM_DETAILS_H
...@@ -10,30 +10,30 @@ ...@@ -10,30 +10,30 @@
#include "pls/internal/base/thread.h" #include "pls/internal/base/thread.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace base { namespace base {
/**
/**
* A simple set and test_and_set based spin lock implementation. * A simple set and test_and_set based spin lock implementation.
* *
* PORTABILITY: * PORTABILITY:
* Current implementation is based on C++ 11 atomic_flag. * Current implementation is based on C++ 11 atomic_flag.
*/ */
class tas_spin_lock { class tas_spin_lock {
std::atomic_flag flag_; std::atomic_flag flag_;
unsigned int yield_at_tries_; unsigned int yield_at_tries_;
public: public:
tas_spin_lock(): flag_{ATOMIC_FLAG_INIT}, yield_at_tries_{1024} {}; tas_spin_lock() : flag_{ATOMIC_FLAG_INIT}, yield_at_tries_{1024} {};
tas_spin_lock(const tas_spin_lock& other): flag_{ATOMIC_FLAG_INIT}, yield_at_tries_{other.yield_at_tries_} {} tas_spin_lock(const tas_spin_lock &other) : flag_{ATOMIC_FLAG_INIT}, yield_at_tries_{other.yield_at_tries_} {}
void lock(); void lock();
bool try_lock(unsigned int num_tries=1); bool try_lock(unsigned int num_tries = 1);
void unlock(); void unlock();
}; };
}
}
}
}
}
}
#endif //PLS_TAS_SPIN_LOCK_H #endif //PLS_TAS_SPIN_LOCK_H
...@@ -13,11 +13,12 @@ ...@@ -13,11 +13,12 @@
#include "system_details.h" #include "system_details.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace base { namespace base {
using thread_entrypoint = void();
/** using thread_entrypoint = void();
/**
* Static methods than can be performed on the current thread. * Static methods than can be performed on the current thread.
* *
* usage: * usage:
...@@ -27,15 +28,16 @@ namespace pls { ...@@ -27,15 +28,16 @@ namespace pls {
* PORTABILITY: * PORTABILITY:
* Current implementation is based on pthreads. * Current implementation is based on pthreads.
*/ */
class this_thread { class this_thread {
template<typename Function, typename State> template<typename Function, typename State>
friend class thread; friend
class thread;
#ifdef PLS_THREAD_SPECIFIC_PTHREAD #ifdef PLS_THREAD_SPECIFIC_PTHREAD
static pthread_key_t local_storage_key_; static pthread_key_t local_storage_key_;
static bool local_storage_key_initialized_; static bool local_storage_key_initialized_;
#endif #endif
#ifdef PLS_THREAD_SPECIFIC_COMPILER #ifdef PLS_THREAD_SPECIFIC_COMPILER
static __thread void* local_state_; static __thread void *local_state_;
#endif #endif
public: public:
static void yield() { static void yield() {
...@@ -49,7 +51,7 @@ namespace pls { ...@@ -49,7 +51,7 @@ namespace pls {
* @return The state pointer hold for this thread. * @return The state pointer hold for this thread.
*/ */
template<typename T> template<typename T>
static T* state(); static T *state();
/** /**
* Stores a pointer to the thread local state object. * Stores a pointer to the thread local state object.
...@@ -60,10 +62,10 @@ namespace pls { ...@@ -60,10 +62,10 @@ namespace pls {
* @param state_pointer A pointer to the threads state object. * @param state_pointer A pointer to the threads state object.
*/ */
template<typename T> template<typename T>
static void set_state(T* state_pointer); static void set_state(T *state_pointer);
}; };
/** /**
* Abstraction for starting a function in a separate thread. * Abstraction for starting a function in a separate thread.
* *
* @tparam Function Lambda being started on the new thread. * @tparam Function Lambda being started on the new thread.
...@@ -79,43 +81,44 @@ namespace pls { ...@@ -79,43 +81,44 @@ namespace pls {
* PORTABILITY: * PORTABILITY:
* Current implementation is based on pthreads. * Current implementation is based on pthreads.
*/ */
template<typename Function, typename State> template<typename Function, typename State>
class thread { class thread {
friend class this_thread; friend class this_thread;
// Keep a copy of the function (lambda) in this object to make sure it is valid when called! // Keep a copy of the function (lambda) in this object to make sure it is valid when called!
Function function_; Function function_;
State* state_pointer_; State *state_pointer_;
// Wee need to wait for the started function to read // Wee need to wait for the started function to read
// the function_ and state_pointer_ property before returning // the function_ and state_pointer_ property before returning
// from the constructor, as the object might be moved after this. // from the constructor, as the object might be moved after this.
std::atomic_flag* startup_flag_; std::atomic_flag *startup_flag_;
// Keep handle to native implementation // Keep handle to native implementation
pthread_t pthread_thread_; pthread_t pthread_thread_;
static void* start_pthread_internal(void* thread_pointer); static void *start_pthread_internal(void *thread_pointer);
public: public:
explicit thread(const Function& function, State* state_pointer); explicit thread(const Function &function, State *state_pointer);
public: public:
void join(); void join();
// make object move only // make object move only
thread(thread&&) noexcept = default; thread(thread &&) noexcept = default;
thread& operator=(thread&&) noexcept = default; thread &operator=(thread &&) noexcept = default;
thread(const thread&) = delete; thread(const thread &) = delete;
thread& operator=(const thread&) = delete; thread &operator=(const thread &) = delete;
}; };
template<typename Function, typename State> template<typename Function, typename State>
thread<Function, State> start_thread(const Function& function, State* state_pointer); thread<Function, State> start_thread(const Function &function, State *state_pointer);
template<typename Function> template<typename Function>
thread<Function, void> start_thread(const Function& function); thread<Function, void> start_thread(const Function &function);
}
} }
}
} }
#include "thread_impl.h" #include "thread_impl.h"
......
...@@ -3,33 +3,34 @@ ...@@ -3,33 +3,34 @@
#define PLS_THREAD_IMPL_H #define PLS_THREAD_IMPL_H
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace base { namespace base {
template<typename T>
T* this_thread::state() { template<typename T>
T *this_thread::state() {
#ifdef PLS_THREAD_SPECIFIC_PTHREAD #ifdef PLS_THREAD_SPECIFIC_PTHREAD
return reinterpret_cast<T*>(pthread_getspecific(local_storage_key_)); return reinterpret_cast<T*>(pthread_getspecific(local_storage_key_));
#endif #endif
#ifdef PLS_THREAD_SPECIFIC_COMPILER #ifdef PLS_THREAD_SPECIFIC_COMPILER
return reinterpret_cast<T*>(local_state_); return reinterpret_cast<T *>(local_state_);
#endif #endif
} }
template<typename T> template<typename T>
void this_thread::set_state(T* state_pointer) { void this_thread::set_state(T *state_pointer) {
#ifdef PLS_THREAD_SPECIFIC_PTHREAD #ifdef PLS_THREAD_SPECIFIC_PTHREAD
pthread_setspecific(this_thread::local_storage_key_, (void*)state_pointer); pthread_setspecific(this_thread::local_storage_key_, (void*)state_pointer);
#endif #endif
#ifdef PLS_THREAD_SPECIFIC_COMPILER #ifdef PLS_THREAD_SPECIFIC_COMPILER
local_state_ = state_pointer; local_state_ = state_pointer;
#endif #endif
} }
template<typename Function, typename State> template<typename Function, typename State>
void* thread<Function, State>::start_pthread_internal(void* thread_pointer) { void *thread<Function, State>::start_pthread_internal(void *thread_pointer) {
auto my_thread = reinterpret_cast<thread*>(thread_pointer); auto my_thread = reinterpret_cast<thread *>(thread_pointer);
Function my_function_copy = my_thread->function_; Function my_function_copy = my_thread->function_;
State* my_state_pointer_copy = my_thread->state_pointer_; State *my_state_pointer_copy = my_thread->state_pointer_;
// Now we have copies of everything we need on the stack. // Now we have copies of everything we need on the stack.
// The original thread object can be moved freely (no more // The original thread object can be moved freely (no more
...@@ -41,10 +42,10 @@ namespace pls { ...@@ -41,10 +42,10 @@ namespace pls {
// Finished executing the user function // Finished executing the user function
pthread_exit(nullptr); pthread_exit(nullptr);
} }
template<typename Function, typename State> template<typename Function, typename State>
thread<Function, State>::thread(const Function& function, State* state_pointer): thread<Function, State>::thread(const Function &function, State *state_pointer):
function_{function}, function_{function},
state_pointer_{state_pointer}, state_pointer_{state_pointer},
startup_flag_{nullptr}, startup_flag_{nullptr},
...@@ -62,27 +63,27 @@ namespace pls { ...@@ -62,27 +63,27 @@ namespace pls {
startup_flag_ = &startup_flag; startup_flag_ = &startup_flag;
startup_flag.test_and_set(); // Set the flag, pthread will clear it when it is safe to return startup_flag.test_and_set(); // Set the flag, pthread will clear it when it is safe to return
pthread_create(&pthread_thread_, nullptr, start_pthread_internal, (void *)(this)); pthread_create(&pthread_thread_, nullptr, start_pthread_internal, (void *) (this));
while (startup_flag.test_and_set()) while (startup_flag.test_and_set()); // Busy waiting for the starting flag to clear
; // Busy waiting for the starting flag to clear }
}
template<typename Function, typename State> template<typename Function, typename State>
void thread<Function, State>::join() { void thread<Function, State>::join() {
pthread_join(pthread_thread_, nullptr); pthread_join(pthread_thread_, nullptr);
} }
template<typename Function, typename State> template<typename Function, typename State>
thread<Function, State> start_thread(const Function& function, State* state_pointer) { thread<Function, State> start_thread(const Function &function, State *state_pointer) {
return thread<Function, State>(function, state_pointer); return thread<Function, State>(function, state_pointer);
} }
template<typename Function> template<typename Function>
thread<Function, void> start_thread(const Function& function) { thread<Function, void> start_thread(const Function &function) {
return thread<Function, void>(function, nullptr); return thread<Function, void>(function, nullptr);
} }
}
} }
}
} }
#endif //PLS_THREAD_IMPL_H #endif //PLS_THREAD_IMPL_H
...@@ -8,30 +8,28 @@ ...@@ -8,30 +8,28 @@
#include "pls/internal/base/thread.h" #include "pls/internal/base/thread.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace base { namespace base {
/** /**
* A simple set and test_and_set based spin lock implementation. * A simple set and test_and_set based spin lock implementation.
* *
* PORTABILITY: * PORTABILITY:
* Current implementation is based on C++ 11 atomic_flag. * Current implementation is based on C++ 11 atomic_flag.
*/ */
class ttas_spin_lock { class ttas_spin_lock {
std::atomic<int> flag_; std::atomic<int> flag_;
const unsigned int yield_at_tries_; const unsigned int yield_at_tries_;
public: public:
ttas_spin_lock(): flag_{0}, yield_at_tries_{1024} {}; ttas_spin_lock() : flag_{0}, yield_at_tries_{1024} {};
ttas_spin_lock(const ttas_spin_lock& other): flag_{0}, yield_at_tries_{other.yield_at_tries_} {} ttas_spin_lock(const ttas_spin_lock &other) : flag_{0}, yield_at_tries_{other.yield_at_tries_} {}
void lock(); void lock();
bool try_lock(unsigned int num_tries=1); bool try_lock(unsigned int num_tries = 1);
void unlock(); void unlock();
}; };
} }
} }
} }
#endif //PLS_TTAS_SPIN_LOCK_H #endif //PLS_TTAS_SPIN_LOCK_H
...@@ -9,9 +9,10 @@ ...@@ -9,9 +9,10 @@
#include "pls/internal/base/alignment.h" #include "pls/internal/base/alignment.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace data_structures { namespace data_structures {
/**
/**
* Generic stack-like data structure that allows to allocate arbitrary objects in a given memory region. * Generic stack-like data structure that allows to allocate arbitrary objects in a given memory region.
* The objects will be stored aligned in the stack, making the storage cache friendly and very fast * The objects will be stored aligned in the stack, making the storage cache friendly and very fast
* (as long as one can live with the stack restrictions). * (as long as one can live with the stack restrictions).
...@@ -23,31 +24,33 @@ namespace pls { ...@@ -23,31 +24,33 @@ namespace pls {
* T* pointer = stack.push(some_object); // Copy-Constrict the object on top of stack * T* pointer = stack.push(some_object); // Copy-Constrict the object on top of stack
* stack.pop<T>(); // Deconstruct the top object of type T * stack.pop<T>(); // Deconstruct the top object of type T
*/ */
class aligned_stack { class aligned_stack {
// Keep bounds of our memory block // Keep bounds of our memory block
char* memory_start_; char *memory_start_;
char* memory_end_; char *memory_end_;
// Current head will always be aligned to cache lines // Current head will always be aligned to cache lines
char* head_; char *head_;
public: public:
typedef char* state; typedef char *state;
aligned_stack(): memory_start_{nullptr}, memory_end_{nullptr}, head_{nullptr} {}; aligned_stack() : memory_start_{nullptr}, memory_end_{nullptr}, head_{nullptr} {};
aligned_stack(char* memory_region, std::size_t size); aligned_stack(char *memory_region, std::size_t size);
template<typename T> template<typename T>
T* push(const T& object); T *push(const T &object);
template<typename T> template<typename T>
void* push(); void *push();
template<typename T> template<typename T>
T pop(); T pop();
state save_state() const { return head_; } state save_state() const { return head_; }
void reset_state(state new_state) { head_ = new_state; } void reset_state(state new_state) { head_ = new_state; }
}; };
}
}
}
}
} }
#include "aligned_stack_impl.h" #include "aligned_stack_impl.h"
......
...@@ -3,17 +3,18 @@ ...@@ -3,17 +3,18 @@
#define PLS_ALIGNED_STACK_IMPL_H #define PLS_ALIGNED_STACK_IMPL_H
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace data_structures { namespace data_structures {
template<typename T>
T* aligned_stack::push(const T& object) { template<typename T>
T *aligned_stack::push(const T &object) {
// Copy-Construct // Copy-Construct
return new ((void*)push<T>())T(object); return new((void *) push < T > ())T(object);
} }
template<typename T> template<typename T>
void* aligned_stack::push() { void *aligned_stack::push() {
void* result = reinterpret_cast<T*>(head_); void *result = reinterpret_cast<T *>(head_);
// Move head to next aligned position after new object // Move head to next aligned position after new object
head_ = base::alignment::next_alignment(head_ + sizeof(T)); head_ = base::alignment::next_alignment(head_ + sizeof(T));
...@@ -22,15 +23,16 @@ namespace pls { ...@@ -22,15 +23,16 @@ namespace pls {
} }
return result; return result;
} }
template<typename T> template<typename T>
T aligned_stack::pop() { T aligned_stack::pop() {
head_ = head_ - base::alignment::next_alignment(sizeof(T)); head_ = head_ - base::alignment::next_alignment(sizeof(T));
return *reinterpret_cast<T*>(head_); return *reinterpret_cast<T *>(head_);
} }
}
} }
}
} }
#endif //PLS_ALIGNED_STACK_IMPL_H #endif //PLS_ALIGNED_STACK_IMPL_H
...@@ -5,56 +5,58 @@ ...@@ -5,56 +5,58 @@
#include "pls/internal/base/spin_lock.h" #include "pls/internal/base/spin_lock.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace data_structures { namespace data_structures {
/**
/**
* Turns any object into deque item when inheriting from this. * Turns any object into deque item when inheriting from this.
*/ */
class deque_item { class deque_item {
friend class deque_internal; friend class deque_internal;
deque_item* prev_; deque_item *prev_;
deque_item* next_; deque_item *next_;
}; };
class deque_internal { class deque_internal {
protected: protected:
deque_item* head_; deque_item *head_;
deque_item* tail_; deque_item *tail_;
base::spin_lock lock_; base::spin_lock lock_;
deque_item* pop_head_internal(); deque_item *pop_head_internal();
deque_item* pop_tail_internal(); deque_item *pop_tail_internal();
void push_tail_internal(deque_item *new_item); void push_tail_internal(deque_item *new_item);
}; };
/** /**
* A double linked list based deque. * A double linked list based deque.
* Storage is therefore only needed for the individual items. * Storage is therefore only needed for the individual items.
* *
* @tparam Item The type of items stored in this deque * @tparam Item The type of items stored in this deque
*/ */
template<typename Item> template<typename Item>
class deque: deque_internal { class deque : deque_internal {
public: public:
explicit deque(): deque_internal{} {} explicit deque() : deque_internal{} {}
inline Item* pop_head() { inline Item *pop_head() {
return static_cast<Item*>(pop_head_internal()); return static_cast<Item *>(pop_head_internal());
} }
inline Item* pop_tail() { inline Item *pop_tail() {
return static_cast<Item*>(pop_tail_internal()); return static_cast<Item *>(pop_tail_internal());
} }
inline void push_tail(Item* new_item) { inline void push_tail(Item *new_item) {
push_tail_internal(new_item); push_tail_internal(new_item);
} }
}; };
}
} }
}
} }
#endif //PLS_DEQUE_H #endif //PLS_DEQUE_H
...@@ -9,11 +9,12 @@ ...@@ -9,11 +9,12 @@
#include <iostream> #include <iostream>
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace helpers { namespace helpers {
// TODO: Clean up (separate into small functions and .cpp file)
template<typename Function> // TODO: Clean up (separate into small functions and .cpp file)
void run_mini_benchmark(const Function& lambda, size_t max_threads, unsigned long max_runtime_ms=1000) { template<typename Function>
void run_mini_benchmark(const Function &lambda, size_t max_threads, unsigned long max_runtime_ms = 1000) {
using namespace std; using namespace std;
using namespace pls::internal::scheduling; using namespace pls::internal::scheduling;
...@@ -37,7 +38,7 @@ namespace pls { ...@@ -37,7 +38,7 @@ namespace pls {
}); });
long time = chrono::duration_cast<chrono::microseconds>(end_time - start_time).count(); long time = chrono::duration_cast<chrono::microseconds>(end_time - start_time).count();
double time_per_iteration = (double)time / iterations; double time_per_iteration = (double) time / iterations;
std::cout << time_per_iteration; std::cout << time_per_iteration;
if (num_threads < max_threads) { if (num_threads < max_threads) {
...@@ -45,9 +46,10 @@ namespace pls { ...@@ -45,9 +46,10 @@ namespace pls {
} }
} }
std::cout << std::endl; std::cout << std::endl;
} }
}
} }
}
} }
#endif //PLS_MINI_BENCHMARK_H #endif //PLS_MINI_BENCHMARK_H
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#ifdef NEW_LINK_ERROR #ifdef NEW_LINK_ERROR
// This will cause a linker error if new is used in the code. // This will cause a linker error if new is used in the code.
// We also exit if it is somehow still called. // We also exit if it is somehow still called.
inline void * operator new (std::size_t) { inline void *operator new(std::size_t) {
extern int bare_new_erroneously_called(); extern int bare_new_erroneously_called();
exit(bare_new_erroneously_called() | 1); exit(bare_new_erroneously_called() | 1);
} }
......
...@@ -7,12 +7,13 @@ ...@@ -7,12 +7,13 @@
#include <stdint.h> #include <stdint.h>
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace helpers { namespace helpers {
struct unique_id {
struct unique_id {
const uint32_t id_; const uint32_t id_;
const std::type_info& type_; const std::type_info &type_;
bool operator==(const unique_id& other) const { return id_ == other.id_ && type_ == other.type_; } bool operator==(const unique_id &other) const { return id_ == other.id_ && type_ == other.type_; }
static constexpr unique_id create(const uint32_t id) { static constexpr unique_id create(const uint32_t id) {
return unique_id(id, typeid(void)); return unique_id(id, typeid(void));
...@@ -22,10 +23,11 @@ namespace pls { ...@@ -22,10 +23,11 @@ namespace pls {
return unique_id(UINT32_MAX, typeid(std::tuple<T...>)); return unique_id(UINT32_MAX, typeid(std::tuple<T...>));
} }
private: private:
explicit constexpr unique_id(const uint32_t id, const std::type_info& type): id_{id}, type_{type} {}; explicit constexpr unique_id(const uint32_t id, const std::type_info &type) : id_{id}, type_{type} {};
}; };
}
} }
}
} }
#endif //PLS_UNIQUE_ID_H #endif //PLS_UNIQUE_ID_H
...@@ -6,38 +6,40 @@ ...@@ -6,38 +6,40 @@
#include "pls/internal/helpers/unique_id.h" #include "pls/internal/helpers/unique_id.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace scheduling { namespace scheduling {
class abstract_task {
class abstract_task {
public: public:
using id = helpers::unique_id; using id = helpers::unique_id;
private: private:
unsigned int depth_; unsigned int depth_;
abstract_task::id unique_id_; abstract_task::id unique_id_;
abstract_task* child_task_; abstract_task *child_task_;
public: public:
abstract_task(const unsigned int depth, const abstract_task::id& unique_id): abstract_task(const unsigned int depth, const abstract_task::id &unique_id) :
depth_{depth}, depth_{depth},
unique_id_{unique_id}, unique_id_{unique_id},
child_task_{nullptr} {} child_task_{nullptr} {}
virtual void execute() = 0; virtual void execute() = 0;
void set_child(abstract_task* child_task) { child_task_ = child_task; } void set_child(abstract_task *child_task) { child_task_ = child_task; }
abstract_task* child() { return child_task_; } abstract_task *child() { return child_task_; }
void set_depth(unsigned int depth) { depth_ = depth; } void set_depth(unsigned int depth) { depth_ = depth; }
unsigned int depth() const { return depth_; } unsigned int depth() const { return depth_; }
id unique_id() const { return unique_id_; } id unique_id() const { return unique_id_; }
protected: protected:
virtual bool internal_stealing(abstract_task* other_task) = 0; virtual bool internal_stealing(abstract_task *other_task) = 0;
virtual bool split_task(base::spin_lock* lock) = 0; virtual bool split_task(base::spin_lock *lock) = 0;
bool steal_work(); bool steal_work();
}; };
}
} }
}
} }
#endif //PLS_ABSTRACT_TASK_H #endif //PLS_ABSTRACT_TASK_H
...@@ -11,24 +11,25 @@ ...@@ -11,24 +11,25 @@
#include "thread_state.h" #include "thread_state.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace scheduling { namespace scheduling {
class fork_join_task;
class fork_join_sub_task: public data_structures::deque_item { class fork_join_task;
class fork_join_sub_task : public data_structures::deque_item {
friend class fork_join_task; friend class fork_join_task;
// Coordinate finishing of sub_tasks // Coordinate finishing of sub_tasks
std::atomic_uint32_t ref_count_; std::atomic_uint32_t ref_count_;
fork_join_sub_task* parent_; fork_join_sub_task *parent_;
// Access to TBB scheduling environment // Access to TBB scheduling environment
fork_join_task* tbb_task_; fork_join_task *tbb_task_;
// Stack Management (reset stack pointer after wait_for_all() calls) // Stack Management (reset stack pointer after wait_for_all() calls)
data_structures::aligned_stack::state stack_state_; data_structures::aligned_stack::state stack_state_;
protected: protected:
explicit fork_join_sub_task(); explicit fork_join_sub_task();
fork_join_sub_task(const fork_join_sub_task& other); fork_join_sub_task(const fork_join_sub_task &other);
// Overwritten with behaviour of child tasks // Overwritten with behaviour of child tasks
virtual void execute_internal() = 0; virtual void execute_internal() = 0;
...@@ -36,62 +37,63 @@ namespace pls { ...@@ -36,62 +37,63 @@ namespace pls {
public: public:
// Only use them when actually executing this sub_task (only public for simpler API design) // Only use them when actually executing this sub_task (only public for simpler API design)
template<typename T> template<typename T>
void spawn_child(const T& sub_task); void spawn_child(const T &sub_task);
void wait_for_all(); void wait_for_all();
private: private:
void spawn_child_internal(fork_join_sub_task* sub_task); void spawn_child_internal(fork_join_sub_task *sub_task);
void execute(); void execute();
}; };
template<typename Function> template<typename Function>
class fork_join_lambda: public fork_join_sub_task { class fork_join_lambda : public fork_join_sub_task {
const Function* function_; const Function *function_;
public: public:
explicit fork_join_lambda(const Function* function): function_{function} {}; explicit fork_join_lambda(const Function *function) : function_{function} {};
protected: protected:
void execute_internal() override { void execute_internal() override {
(*function_)(this); (*function_)(this);
} }
}; };
class fork_join_task: public abstract_task { class fork_join_task : public abstract_task {
friend class fork_join_sub_task; friend class fork_join_sub_task;
fork_join_sub_task* root_task_; fork_join_sub_task *root_task_;
fork_join_sub_task* currently_executing_; fork_join_sub_task *currently_executing_;
data_structures::aligned_stack* my_stack_; data_structures::aligned_stack *my_stack_;
// Double-Ended Queue management // Double-Ended Queue management
data_structures::deque<fork_join_sub_task> deque_; data_structures::deque<fork_join_sub_task> deque_;
// Steal Management // Steal Management
fork_join_sub_task* last_stolen_; fork_join_sub_task *last_stolen_;
fork_join_sub_task* get_local_sub_task(); fork_join_sub_task *get_local_sub_task();
fork_join_sub_task* get_stolen_sub_task(); fork_join_sub_task *get_stolen_sub_task();
bool internal_stealing(abstract_task* other_task) override; bool internal_stealing(abstract_task *other_task) override;
bool split_task(base::spin_lock* /*lock*/) override; bool split_task(base::spin_lock * /*lock*/) override;
public: public:
explicit fork_join_task(fork_join_sub_task* root_task, const abstract_task::id& id); explicit fork_join_task(fork_join_sub_task *root_task, const abstract_task::id &id);
void execute() override; void execute() override;
fork_join_sub_task* currently_executing() const; fork_join_sub_task *currently_executing() const;
}; };
template<typename T> template<typename T>
void fork_join_sub_task::spawn_child(const T& task) { void fork_join_sub_task::spawn_child(const T &task) {
PROFILE_FORK_JOIN_STEALING("spawn_child") PROFILE_FORK_JOIN_STEALING("spawn_child")
static_assert(std::is_base_of<fork_join_sub_task, T>::value, "Only pass fork_join_sub_task subclasses!"); static_assert(std::is_base_of<fork_join_sub_task, T>::value, "Only pass fork_join_sub_task subclasses!");
T* new_task = tbb_task_->my_stack_->push(task); T *new_task = tbb_task_->my_stack_->push(task);
spawn_child_internal(new_task); spawn_child_internal(new_task);
} }
}
} }
}
} }
#endif //PLS_TBB_LIKE_TASK_H #endif //PLS_TBB_LIKE_TASK_H
...@@ -10,20 +10,21 @@ ...@@ -10,20 +10,21 @@
#include "abstract_task.h" #include "abstract_task.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace scheduling { namespace scheduling {
template<typename Function>
class root_task : public abstract_task { template<typename Function>
class root_task : public abstract_task {
Function function_; Function function_;
std::atomic_uint8_t finished_; std::atomic_uint8_t finished_;
public: public:
static constexpr auto create_id = helpers::unique_id::create<root_task<Function>>; static constexpr auto create_id = helpers::unique_id::create<root_task<Function>>;
explicit root_task(Function function): explicit root_task(Function function) :
abstract_task{0, create_id()}, abstract_task{0, create_id()},
function_{function}, function_{function},
finished_{0} {} finished_{0} {}
root_task(const root_task& other): root_task(const root_task &other) :
abstract_task{0, create_id()}, abstract_task{0, create_id()},
function_{other.function_}, function_{other.function_},
finished_{0} {} finished_{0} {}
...@@ -38,23 +39,23 @@ namespace pls { ...@@ -38,23 +39,23 @@ namespace pls {
finished_ = 1; finished_ = 1;
} }
bool internal_stealing(abstract_task* /*other_task*/) override { bool internal_stealing(abstract_task * /*other_task*/) override {
return false; return false;
} }
bool split_task(base::spin_lock* /*lock*/) override { bool split_task(base::spin_lock * /*lock*/) override {
return false; return false;
} }
}; };
template<typename Function> template<typename Function>
class root_worker_task : public abstract_task { class root_worker_task : public abstract_task {
root_task<Function>* master_task_; root_task<Function> *master_task_;
public: public:
static constexpr auto create_id = root_task<Function>::create_id; static constexpr auto create_id = root_task<Function>::create_id;
explicit root_worker_task(root_task<Function>* master_task): explicit root_worker_task(root_task<Function> *master_task) :
abstract_task{0, create_id()}, abstract_task{0, create_id()},
master_task_{master_task} {} master_task_{master_task} {}
...@@ -65,16 +66,17 @@ namespace pls { ...@@ -65,16 +66,17 @@ namespace pls {
} while (!master_task_->finished()); } while (!master_task_->finished());
} }
bool internal_stealing(abstract_task* /*other_task*/) override { bool internal_stealing(abstract_task * /*other_task*/) override {
return false; return false;
} }
bool split_task(base::spin_lock* /*lock*/) override { bool split_task(base::spin_lock * /*lock*/) override {
return false; return false;
} }
}; };
}
} }
}
} }
#endif //PLS_ROOT_MASTER_TASK_H #endif //PLS_ROOT_MASTER_TASK_H
...@@ -12,12 +12,14 @@ ...@@ -12,12 +12,14 @@
#include "scheduler.h" #include "scheduler.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace scheduling { namespace scheduling {
template<typename Function>
class run_on_n_threads_task : public abstract_task { template<typename Function>
class run_on_n_threads_task : public abstract_task {
template<typename F> template<typename F>
friend class run_on_n_threads_task_worker; friend
class run_on_n_threads_task_worker;
Function function_; Function function_;
...@@ -38,7 +40,7 @@ namespace pls { ...@@ -38,7 +40,7 @@ namespace pls {
public: public:
static constexpr auto create_id = helpers::unique_id::create<run_on_n_threads_task<Function>>; static constexpr auto create_id = helpers::unique_id::create<run_on_n_threads_task<Function>>;
run_on_n_threads_task(Function function, int num_threads): run_on_n_threads_task(Function function, int num_threads) :
abstract_task{0, create_id()}, abstract_task{0, create_id()},
function_{function}, function_{function},
counter{num_threads - 1} {} counter{num_threads - 1} {}
...@@ -55,21 +57,21 @@ namespace pls { ...@@ -55,21 +57,21 @@ namespace pls {
std::cout << "Finished Master!" << std::endl; std::cout << "Finished Master!" << std::endl;
} }
bool internal_stealing(abstract_task* /*other_task*/) override { bool internal_stealing(abstract_task * /*other_task*/) override {
return false; return false;
} }
bool split_task(base::spin_lock* lock) override; bool split_task(base::spin_lock *lock) override;
}; };
template<typename Function> template<typename Function>
class run_on_n_threads_task_worker : public abstract_task { class run_on_n_threads_task_worker : public abstract_task {
Function function_; Function function_;
run_on_n_threads_task<Function>* root_; run_on_n_threads_task<Function> *root_;
public: public:
static constexpr auto create_id = helpers::unique_id::create<run_on_n_threads_task_worker<Function>>; static constexpr auto create_id = helpers::unique_id::create<run_on_n_threads_task_worker<Function>>;
run_on_n_threads_task_worker(Function function, run_on_n_threads_task<Function>* root): run_on_n_threads_task_worker(Function function, run_on_n_threads_task<Function> *root) :
abstract_task{0, create_id()}, abstract_task{0, create_id()},
function_{function}, function_{function},
root_{root} {} root_{root} {}
...@@ -83,17 +85,17 @@ namespace pls { ...@@ -83,17 +85,17 @@ namespace pls {
} }
} }
bool internal_stealing(abstract_task* /*other_task*/) override { bool internal_stealing(abstract_task * /*other_task*/) override {
return false; return false;
} }
bool split_task(base::spin_lock* /*lock*/) override { bool split_task(base::spin_lock * /*lock*/) override {
return false; return false;
} }
}; };
template<typename Function> template<typename Function>
bool run_on_n_threads_task<Function>::split_task(base::spin_lock* lock) { bool run_on_n_threads_task<Function>::split_task(base::spin_lock *lock) {
if (get_counter() <= 0) { if (get_counter() <= 0) {
return false; return false;
} }
...@@ -105,14 +107,15 @@ namespace pls { ...@@ -105,14 +107,15 @@ namespace pls {
auto task = run_on_n_threads_task_worker<Function>{function_, this}; auto task = run_on_n_threads_task_worker<Function>{function_, this};
scheduler->execute_task(task, depth()); scheduler->execute_task(task, depth());
return true; return true;
} }
template<typename Function> template<typename Function>
run_on_n_threads_task<Function> create_run_on_n_threads_task(Function function, int num_threads) { run_on_n_threads_task<Function> create_run_on_n_threads_task(Function function, int num_threads) {
return run_on_n_threads_task<Function>{function, num_threads}; return run_on_n_threads_task<Function>{function, num_threads};
} }
}
} }
}
} }
#endif //PLS_RUN_ON_N_THREADS_TASK_H #endif //PLS_RUN_ON_N_THREADS_TASK_H
...@@ -17,21 +17,22 @@ ...@@ -17,21 +17,22 @@
#include "scheduler_memory.h" #include "scheduler_memory.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace scheduling { namespace scheduling {
void worker_routine();
using scheduler_thread = base::thread<decltype(&worker_routine), thread_state>;
class scheduler { void worker_routine();
using scheduler_thread = base::thread<decltype(&worker_routine), thread_state>;
class scheduler {
friend void worker_routine(); friend void worker_routine();
const unsigned int num_threads_; const unsigned int num_threads_;
scheduler_memory* memory_; scheduler_memory *memory_;
base::barrier sync_barrier_; base::barrier sync_barrier_;
bool terminated_; bool terminated_;
public: public:
explicit scheduler(scheduler_memory* memory, unsigned int num_threads); explicit scheduler(scheduler_memory *memory, unsigned int num_threads);
~scheduler(); ~scheduler();
/** /**
...@@ -50,17 +51,18 @@ namespace pls { ...@@ -50,17 +51,18 @@ namespace pls {
* @param depth Optional: depth of the new task, otherwise set implicitly. * @param depth Optional: depth of the new task, otherwise set implicitly.
*/ */
template<typename Task> template<typename Task>
static void execute_task(Task& task, int depth=-1); static void execute_task(Task &task, int depth = -1);
static abstract_task* current_task() { return base::this_thread::state<thread_state>()->current_task_; } static abstract_task *current_task() { return base::this_thread::state<thread_state>()->current_task_; }
void terminate(bool wait_for_workers=true); void terminate(bool wait_for_workers = true);
unsigned int num_threads() const { return num_threads_; } unsigned int num_threads() const { return num_threads_; }
thread_state* thread_state_for(size_t id) { return memory_->thread_state_for(id); } thread_state *thread_state_for(size_t id) { return memory_->thread_state_for(id); }
}; };
}
} }
}
} }
#include "scheduler_impl.h" #include "scheduler_impl.h"
......
...@@ -3,10 +3,11 @@ ...@@ -3,10 +3,11 @@
#define PLS_SCHEDULER_IMPL_H #define PLS_SCHEDULER_IMPL_H
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace scheduling { namespace scheduling {
template<typename Function>
void scheduler::perform_work(Function work_section) { template<typename Function>
void scheduler::perform_work(Function work_section) {
PROFILE_WORK_BLOCK("scheduler::perform_work") PROFILE_WORK_BLOCK("scheduler::perform_work")
root_task<Function> master{work_section}; root_task<Function> master{work_section};
...@@ -31,15 +32,15 @@ namespace pls { ...@@ -31,15 +32,15 @@ namespace pls {
root_worker_task<Function> worker{new_master}; root_worker_task<Function> worker{new_master};
memory_->task_stack_for(0)->pop<typeof(worker)>(); memory_->task_stack_for(0)->pop<typeof(worker)>();
} }
} }
template<typename Task> template<typename Task>
void scheduler::execute_task(Task& task, int depth) { void scheduler::execute_task(Task &task, int depth) {
static_assert(std::is_base_of<abstract_task, Task>::value, "Only pass abstract_task subclasses!"); static_assert(std::is_base_of<abstract_task, Task>::value, "Only pass abstract_task subclasses!");
auto my_state = base::this_thread::state<thread_state>(); auto my_state = base::this_thread::state<thread_state>();
abstract_task* old_task; abstract_task *old_task;
abstract_task* new_task; abstract_task *new_task;
// Init Task // Init Task
{ {
...@@ -64,9 +65,10 @@ namespace pls { ...@@ -64,9 +65,10 @@ namespace pls {
my_state->task_stack_->pop<Task>(); my_state->task_stack_->pop<Task>();
} }
} }
}
} }
}
} }
#endif //PLS_SCHEDULER_IMPL_H #endif //PLS_SCHEDULER_IMPL_H
...@@ -7,21 +7,22 @@ ...@@ -7,21 +7,22 @@
#define PLS_SCHEDULER_MEMORY_H #define PLS_SCHEDULER_MEMORY_H
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace scheduling { namespace scheduling {
void worker_routine();
using scheduler_thread = base::thread<decltype(&worker_routine), thread_state>;
class scheduler_memory { void worker_routine();
using scheduler_thread = base::thread<decltype(&worker_routine), thread_state>;
class scheduler_memory {
public: public:
virtual size_t max_threads() const = 0; virtual size_t max_threads() const = 0;
virtual thread_state* thread_state_for(size_t id) = 0; virtual thread_state *thread_state_for(size_t id) = 0;
virtual scheduler_thread* thread_for(size_t id) = 0; virtual scheduler_thread *thread_for(size_t id) = 0;
virtual data_structures::aligned_stack* task_stack_for(size_t id) = 0; virtual data_structures::aligned_stack *task_stack_for(size_t id) = 0;
}; };
template<size_t MAX_THREADS, size_t TASK_STACK_SIZE> template<size_t MAX_THREADS, size_t TASK_STACK_SIZE>
class static_scheduler_memory: public scheduler_memory { class static_scheduler_memory : public scheduler_memory {
// Everyone of these types has to live on its own cache line, // Everyone of these types has to live on its own cache line,
// as each thread uses one of them independently. // as each thread uses one of them independently.
// Therefore it would be a major performance hit if we shared cache lines on these. // Therefore it would be a major performance hit if we shared cache lines on these.
...@@ -38,17 +39,18 @@ namespace pls { ...@@ -38,17 +39,18 @@ namespace pls {
public: public:
static_scheduler_memory() { static_scheduler_memory() {
for (size_t i = 0; i < MAX_THREADS; i++) { for (size_t i = 0; i < MAX_THREADS; i++) {
new ((void*)task_stacks_[i].pointer()) data_structures::aligned_stack(task_stacks_memory_[i].pointer()->data(), TASK_STACK_SIZE); new((void *) task_stacks_[i].pointer()) data_structures::aligned_stack(task_stacks_memory_[i].pointer()->data(),
TASK_STACK_SIZE);
} }
} }
size_t max_threads() const override { return MAX_THREADS; } size_t max_threads() const override { return MAX_THREADS; }
thread_state* thread_state_for(size_t id) override { return thread_states_[id].pointer(); } thread_state *thread_state_for(size_t id) override { return thread_states_[id].pointer(); }
scheduler_thread* thread_for(size_t id) override { return threads_[id].pointer(); } scheduler_thread *thread_for(size_t id) override { return threads_[id].pointer(); }
data_structures::aligned_stack* task_stack_for(size_t id) override { return task_stacks_[id].pointer(); } data_structures::aligned_stack *task_stack_for(size_t id) override { return task_stacks_[id].pointer(); }
}; };
class malloc_scheduler_memory: public scheduler_memory { class malloc_scheduler_memory : public scheduler_memory {
// Everyone of these types has to live on its own cache line, // Everyone of these types has to live on its own cache line,
// as each thread uses one of them independently. // as each thread uses one of them independently.
// Therefore it would be a major performance hit if we shared cache lines on these. // Therefore it would be a major performance hit if we shared cache lines on these.
...@@ -58,21 +60,22 @@ namespace pls { ...@@ -58,21 +60,22 @@ namespace pls {
const size_t num_threads_; const size_t num_threads_;
aligned_thread* threads_; aligned_thread *threads_;
aligned_thread_state * thread_states_; aligned_thread_state *thread_states_;
char** task_stacks_memory_; char **task_stacks_memory_;
aligned_aligned_stack * task_stacks_; aligned_aligned_stack *task_stacks_;
public: public:
explicit malloc_scheduler_memory(size_t num_threads, size_t memory_per_stack = 2 << 16); explicit malloc_scheduler_memory(size_t num_threads, size_t memory_per_stack = 2 << 16);
~malloc_scheduler_memory(); ~malloc_scheduler_memory();
size_t max_threads() const override { return num_threads_; } size_t max_threads() const override { return num_threads_; }
thread_state* thread_state_for(size_t id) override { return thread_states_[id].pointer(); } thread_state *thread_state_for(size_t id) override { return thread_states_[id].pointer(); }
scheduler_thread* thread_for(size_t id) override { return threads_[id].pointer(); } scheduler_thread *thread_for(size_t id) override { return threads_[id].pointer(); }
data_structures::aligned_stack* task_stack_for(size_t id) override { return task_stacks_[id].pointer(); } data_structures::aligned_stack *task_stack_for(size_t id) override { return task_stacks_[id].pointer(); }
}; };
}
} }
}
} }
#endif //PLS_SCHEDULER_MEMORY_H #endif //PLS_SCHEDULER_MEMORY_H
...@@ -8,21 +8,22 @@ ...@@ -8,21 +8,22 @@
#include "abstract_task.h" #include "abstract_task.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace scheduling { namespace scheduling {
// forward declaration
class scheduler; // forward declaration
class scheduler;
struct thread_state {
scheduler* scheduler_; struct thread_state {
abstract_task* root_task_; scheduler *scheduler_;
abstract_task* current_task_; abstract_task *root_task_;
data_structures::aligned_stack* task_stack_; abstract_task *current_task_;
data_structures::aligned_stack *task_stack_;
size_t id_; size_t id_;
base::spin_lock lock_; base::spin_lock lock_;
std::minstd_rand random_; std::minstd_rand random_;
thread_state(): thread_state() :
scheduler_{nullptr}, scheduler_{nullptr},
root_task_{nullptr}, root_task_{nullptr},
current_task_{nullptr}, current_task_{nullptr},
...@@ -30,16 +31,17 @@ namespace pls { ...@@ -30,16 +31,17 @@ namespace pls {
id_{0}, id_{0},
random_{id_} {}; random_{id_} {};
thread_state(scheduler* scheduler, data_structures::aligned_stack* task_stack, unsigned int id): thread_state(scheduler *scheduler, data_structures::aligned_stack *task_stack, unsigned int id) :
scheduler_{scheduler}, scheduler_{scheduler},
root_task_{nullptr}, root_task_{nullptr},
current_task_{nullptr}, current_task_{nullptr},
task_stack_{task_stack}, task_stack_{task_stack},
id_{id}, id_{id},
random_{id_} {} random_{id_} {}
}; };
}
} }
}
} }
#endif //PLS_THREAD_STATE_H #endif //PLS_THREAD_STATE_H
...@@ -8,18 +8,20 @@ ...@@ -8,18 +8,20 @@
#include "pls/internal/helpers/unique_id.h" #include "pls/internal/helpers/unique_id.h"
namespace pls { namespace pls {
using internal::scheduling::static_scheduler_memory;
using internal::scheduling::malloc_scheduler_memory;
using internal::scheduling::scheduler; using internal::scheduling::static_scheduler_memory;
using task_id = internal::scheduling::abstract_task::id; using internal::scheduling::malloc_scheduler_memory;
using unique_id = internal::helpers::unique_id; using internal::scheduling::scheduler;
using task_id = internal::scheduling::abstract_task::id;
using internal::scheduling::fork_join_sub_task; using unique_id = internal::helpers::unique_id;
using internal::scheduling::fork_join_task;
using internal::scheduling::fork_join_sub_task;
using internal::scheduling::fork_join_task;
using algorithm::invoke_parallel;
using algorithm::invoke_parallel;
} }
#endif #endif
...@@ -2,26 +2,28 @@ ...@@ -2,26 +2,28 @@
#include "pls/internal/base/system_details.h" #include "pls/internal/base/system_details.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace base { namespace base {
namespace alignment { namespace alignment {
void* allocate_aligned(size_t size) {
void *allocate_aligned(size_t size) {
return aligned_alloc(system_details::CACHE_LINE_SIZE, size); return aligned_alloc(system_details::CACHE_LINE_SIZE, size);
} }
std::uintptr_t next_alignment(std::uintptr_t size) { std::uintptr_t next_alignment(std::uintptr_t size) {
std::uintptr_t miss_alignment = size % base::system_details::CACHE_LINE_SIZE; std::uintptr_t miss_alignment = size % base::system_details::CACHE_LINE_SIZE;
if (miss_alignment == 0) { if (miss_alignment == 0) {
return size; return size;
} else { } else {
return size + (base::system_details::CACHE_LINE_SIZE - miss_alignment); return size + (base::system_details::CACHE_LINE_SIZE - miss_alignment);
} }
} }
char* next_alignment(char* pointer) { char *next_alignment(char *pointer) {
return reinterpret_cast<char*>(next_alignment(reinterpret_cast<std::uintptr_t >(pointer))); return reinterpret_cast<char *>(next_alignment(reinterpret_cast<std::uintptr_t >(pointer)));
} }
}
} }
} }
}
} }
#include "pls/internal/base/barrier.h" #include "pls/internal/base/barrier.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace base { namespace base {
barrier::barrier(const unsigned int count): barrier_{} {
barrier::barrier(const unsigned int count) : barrier_{} {
pthread_barrier_init(&barrier_, nullptr, count); pthread_barrier_init(&barrier_, nullptr, count);
} }
barrier::~barrier() { barrier::~barrier() {
pthread_barrier_destroy(&barrier_); pthread_barrier_destroy(&barrier_);
} }
void barrier::wait() { void barrier::wait() {
pthread_barrier_wait(&barrier_); pthread_barrier_wait(&barrier_);
} }
}
} }
}
} }
...@@ -2,9 +2,10 @@ ...@@ -2,9 +2,10 @@
#include "pls/internal/base/tas_spin_lock.h" #include "pls/internal/base/tas_spin_lock.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace base { namespace base {
void tas_spin_lock::lock() {
void tas_spin_lock::lock() {
PROFILE_LOCK("Acquire Lock") PROFILE_LOCK("Acquire Lock")
int tries = 0; int tries = 0;
while (flag_.test_and_set(std::memory_order_acquire)) { while (flag_.test_and_set(std::memory_order_acquire)) {
...@@ -13,9 +14,9 @@ namespace pls { ...@@ -13,9 +14,9 @@ namespace pls {
this_thread::yield(); this_thread::yield();
} }
} }
} }
bool tas_spin_lock::try_lock(unsigned int num_tries) { bool tas_spin_lock::try_lock(unsigned int num_tries) {
PROFILE_LOCK("Try Acquire Lock") PROFILE_LOCK("Try Acquire Lock")
while (flag_.test_and_set(std::memory_order_acquire)) { while (flag_.test_and_set(std::memory_order_acquire)) {
num_tries--; num_tries--;
...@@ -24,11 +25,12 @@ namespace pls { ...@@ -24,11 +25,12 @@ namespace pls {
} }
} }
return true; return true;
} }
void tas_spin_lock::unlock() { void tas_spin_lock::unlock() {
flag_.clear(std::memory_order_release); flag_.clear(std::memory_order_release);
} }
}
} }
}
} }
#include "pls/internal/base/thread.h" #include "pls/internal/base/thread.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace base { namespace base {
#ifdef PLS_THREAD_SPECIFIC_PTHREAD #ifdef PLS_THREAD_SPECIFIC_PTHREAD
pthread_key_t this_thread::local_storage_key_ = false; pthread_key_t this_thread::local_storage_key_ = false;
bool this_thread::local_storage_key_initialized_; bool this_thread::local_storage_key_initialized_;
#endif #endif
#ifdef PLS_THREAD_SPECIFIC_COMPILER #ifdef PLS_THREAD_SPECIFIC_COMPILER
__thread void* this_thread::local_state_; __thread void *this_thread::local_state_;
#endif #endif
// implementation in header (C++ templating) // implementation in header (C++ templating)
}
} }
}
} }
...@@ -2,9 +2,10 @@ ...@@ -2,9 +2,10 @@
#include "pls/internal/base/ttas_spin_lock.h" #include "pls/internal/base/ttas_spin_lock.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace base { namespace base {
void ttas_spin_lock::lock() {
void ttas_spin_lock::lock() {
PROFILE_LOCK("Acquire Lock") PROFILE_LOCK("Acquire Lock")
int tries = 0; int tries = 0;
int expected = 0; int expected = 0;
...@@ -19,9 +20,9 @@ namespace pls { ...@@ -19,9 +20,9 @@ namespace pls {
expected = 0; expected = 0;
} while (!flag_.compare_exchange_weak(expected, 1, std::memory_order_acquire)); } while (!flag_.compare_exchange_weak(expected, 1, std::memory_order_acquire));
} }
bool ttas_spin_lock::try_lock(unsigned int num_tries) { bool ttas_spin_lock::try_lock(unsigned int num_tries) {
PROFILE_LOCK("Try Acquire Lock") PROFILE_LOCK("Try Acquire Lock")
int expected = 0; int expected = 0;
...@@ -37,11 +38,12 @@ namespace pls { ...@@ -37,11 +38,12 @@ namespace pls {
} while (!flag_.compare_exchange_weak(expected, 1, std::memory_order_acquire)); } while (!flag_.compare_exchange_weak(expected, 1, std::memory_order_acquire));
return true; return true;
} }
void ttas_spin_lock::unlock() { void ttas_spin_lock::unlock() {
flag_.store(0, std::memory_order_release); flag_.store(0, std::memory_order_release);
} }
}
} }
}
} }
...@@ -2,12 +2,14 @@ ...@@ -2,12 +2,14 @@
#include "pls/internal/base/system_details.h" #include "pls/internal/base/system_details.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace data_structures { namespace data_structures {
aligned_stack::aligned_stack(char* memory_region, const std::size_t size):
aligned_stack::aligned_stack(char *memory_region, const std::size_t size) :
memory_start_{memory_region}, memory_start_{memory_region},
memory_end_{memory_region + size}, memory_end_{memory_region + size},
head_{base::alignment::next_alignment(memory_start_)} {} head_{base::alignment::next_alignment(memory_start_)} {}
}
} }
}
} }
...@@ -3,16 +3,17 @@ ...@@ -3,16 +3,17 @@
#include "pls/internal/data_structures/deque.h" #include "pls/internal/data_structures/deque.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace data_structures { namespace data_structures {
deque_item* deque_internal::pop_head_internal() {
deque_item *deque_internal::pop_head_internal() {
std::lock_guard<base::spin_lock> lock{lock_}; std::lock_guard<base::spin_lock> lock{lock_};
if (head_ == nullptr) { if (head_ == nullptr) {
return nullptr; return nullptr;
} }
deque_item* result = head_; deque_item *result = head_;
head_ = head_->prev_; head_ = head_->prev_;
if (head_ == nullptr) { if (head_ == nullptr) {
tail_ = nullptr; tail_ = nullptr;
...@@ -21,16 +22,16 @@ namespace pls { ...@@ -21,16 +22,16 @@ namespace pls {
} }
return result; return result;
} }
deque_item* deque_internal::pop_tail_internal() { deque_item *deque_internal::pop_tail_internal() {
std::lock_guard<base::spin_lock> lock{lock_}; std::lock_guard<base::spin_lock> lock{lock_};
if (tail_ == nullptr) { if (tail_ == nullptr) {
return nullptr; return nullptr;
} }
deque_item* result = tail_; deque_item *result = tail_;
tail_ = tail_->next_; tail_ = tail_->next_;
if (tail_ == nullptr) { if (tail_ == nullptr) {
head_ = nullptr; head_ = nullptr;
...@@ -39,9 +40,9 @@ namespace pls { ...@@ -39,9 +40,9 @@ namespace pls {
} }
return result; return result;
} }
void deque_internal::push_tail_internal(deque_item *new_item) { void deque_internal::push_tail_internal(deque_item *new_item) {
std::lock_guard<base::spin_lock> lock{lock_}; std::lock_guard<base::spin_lock> lock{lock_};
if (tail_ != nullptr) { if (tail_ != nullptr) {
...@@ -52,7 +53,8 @@ namespace pls { ...@@ -52,7 +53,8 @@ namespace pls {
new_item->next_ = tail_; new_item->next_ = tail_;
new_item->prev_ = nullptr; new_item->prev_ = nullptr;
tail_ = new_item; tail_ = new_item;
} }
}
} }
}
} }
...@@ -5,9 +5,10 @@ ...@@ -5,9 +5,10 @@
#include "pls/internal/scheduling/scheduler.h" #include "pls/internal/scheduling/scheduler.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace scheduling { namespace scheduling {
bool abstract_task::steal_work() {
bool abstract_task::steal_work() {
PROFILE_STEALING("abstract_task::steal_work") PROFILE_STEALING("abstract_task::steal_work")
const auto my_state = base::this_thread::state<thread_state>(); const auto my_state = base::this_thread::state<thread_state>();
const auto my_scheduler = my_state->scheduler_; const auto my_scheduler = my_state->scheduler_;
...@@ -27,7 +28,7 @@ namespace pls { ...@@ -27,7 +28,7 @@ namespace pls {
// Dig down to our level // Dig down to our level
PROFILE_STEALING("Go to our level") PROFILE_STEALING("Go to our level")
abstract_task* current_task = target_state->root_task_; abstract_task *current_task = target_state->root_task_;
while (current_task != nullptr && current_task->depth() < depth()) { while (current_task != nullptr && current_task->depth() < depth()) {
current_task = current_task->child_task_; current_task = current_task->child_task_;
} }
...@@ -70,7 +71,8 @@ namespace pls { ...@@ -70,7 +71,8 @@ namespace pls {
// internal steal was no success // internal steal was no success
return false; return false;
}; }
}
} }
}
} }
...@@ -4,23 +4,24 @@ ...@@ -4,23 +4,24 @@
#include "pls/internal/scheduling/fork_join_task.h" #include "pls/internal/scheduling/fork_join_task.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace scheduling { namespace scheduling {
fork_join_sub_task::fork_join_sub_task():
fork_join_sub_task::fork_join_sub_task() :
data_structures::deque_item{}, data_structures::deque_item{},
ref_count_{0}, ref_count_{0},
parent_{nullptr}, parent_{nullptr},
tbb_task_{nullptr}, tbb_task_{nullptr},
stack_state_{nullptr} {} stack_state_{nullptr} {}
fork_join_sub_task::fork_join_sub_task(const fork_join_sub_task& other): fork_join_sub_task::fork_join_sub_task(const fork_join_sub_task &other) :
data_structures::deque_item(other), data_structures::deque_item(other),
ref_count_{0}, ref_count_{0},
parent_{nullptr}, parent_{nullptr},
tbb_task_{nullptr}, tbb_task_{nullptr},
stack_state_{nullptr} {} stack_state_{nullptr} {}
void fork_join_sub_task::execute() { void fork_join_sub_task::execute() {
PROFILE_WORK_BLOCK("execute sub_task") PROFILE_WORK_BLOCK("execute sub_task")
tbb_task_->currently_executing_ = this; tbb_task_->currently_executing_ = this;
execute_internal(); execute_internal();
...@@ -31,9 +32,9 @@ namespace pls { ...@@ -31,9 +32,9 @@ namespace pls {
if (parent_ != nullptr) { if (parent_ != nullptr) {
parent_->ref_count_--; parent_->ref_count_--;
} }
} }
void fork_join_sub_task::spawn_child_internal(fork_join_sub_task* sub_task) { void fork_join_sub_task::spawn_child_internal(fork_join_sub_task *sub_task) {
// Keep our refcount up to date // Keep our refcount up to date
ref_count_++; ref_count_++;
...@@ -43,12 +44,12 @@ namespace pls { ...@@ -43,12 +44,12 @@ namespace pls {
sub_task->stack_state_ = tbb_task_->my_stack_->save_state(); sub_task->stack_state_ = tbb_task_->my_stack_->save_state();
tbb_task_->deque_.push_tail(sub_task); tbb_task_->deque_.push_tail(sub_task);
} }
void fork_join_sub_task::wait_for_all() { void fork_join_sub_task::wait_for_all() {
while (ref_count_ > 0) { while (ref_count_ > 0) {
PROFILE_STEALING("get local sub task") PROFILE_STEALING("get local sub task")
fork_join_sub_task* local_task = tbb_task_->get_local_sub_task(); fork_join_sub_task *local_task = tbb_task_->get_local_sub_task();
PROFILE_END_BLOCK PROFILE_END_BLOCK
if (local_task != nullptr) { if (local_task != nullptr) {
local_task->execute(); local_task->execute();
...@@ -64,19 +65,19 @@ namespace pls { ...@@ -64,19 +65,19 @@ namespace pls {
} }
} }
tbb_task_->my_stack_->reset_state(stack_state_); tbb_task_->my_stack_->reset_state(stack_state_);
} }
fork_join_sub_task* fork_join_task::get_local_sub_task() { fork_join_sub_task *fork_join_task::get_local_sub_task() {
return deque_.pop_tail(); return deque_.pop_tail();
} }
fork_join_sub_task* fork_join_task::get_stolen_sub_task() { fork_join_sub_task *fork_join_task::get_stolen_sub_task() {
return deque_.pop_head(); return deque_.pop_head();
} }
bool fork_join_task::internal_stealing(abstract_task* other_task) { bool fork_join_task::internal_stealing(abstract_task *other_task) {
PROFILE_STEALING("fork_join_task::internal_stealin") PROFILE_STEALING("fork_join_task::internal_stealin")
auto cast_other_task = reinterpret_cast<fork_join_task*>(other_task); auto cast_other_task = reinterpret_cast<fork_join_task *>(other_task);
auto stolen_sub_task = cast_other_task->get_stolen_sub_task(); auto stolen_sub_task = cast_other_task->get_stolen_sub_task();
if (stolen_sub_task == nullptr) { if (stolen_sub_task == nullptr) {
...@@ -90,11 +91,11 @@ namespace pls { ...@@ -90,11 +91,11 @@ namespace pls {
return true; return true;
} }
} }
bool fork_join_task::split_task(base::spin_lock* lock) { bool fork_join_task::split_task(base::spin_lock *lock) {
PROFILE_STEALING("fork_join_task::split_task") PROFILE_STEALING("fork_join_task::split_task")
fork_join_sub_task* stolen_sub_task = get_stolen_sub_task(); fork_join_sub_task *stolen_sub_task = get_stolen_sub_task();
if (stolen_sub_task == nullptr) { if (stolen_sub_task == nullptr) {
return false; return false;
} }
...@@ -106,9 +107,9 @@ namespace pls { ...@@ -106,9 +107,9 @@ namespace pls {
scheduler::execute_task(task, depth()); scheduler::execute_task(task, depth());
return true; return true;
} }
void fork_join_task::execute() { void fork_join_task::execute() {
PROFILE_WORK_BLOCK("execute fork_join_task"); PROFILE_WORK_BLOCK("execute fork_join_task");
// Bind this instance to our OS thread // Bind this instance to our OS thread
...@@ -118,17 +119,18 @@ namespace pls { ...@@ -118,17 +119,18 @@ namespace pls {
// Execute it on our OS thread until its finished // Execute it on our OS thread until its finished
root_task_->execute(); root_task_->execute();
} }
fork_join_sub_task* fork_join_task::currently_executing() const { return currently_executing_; } fork_join_sub_task *fork_join_task::currently_executing() const { return currently_executing_; }
fork_join_task::fork_join_task(fork_join_sub_task* root_task, const abstract_task::id& id): fork_join_task::fork_join_task(fork_join_sub_task *root_task, const abstract_task::id &id) :
abstract_task{0, id}, abstract_task{0, id},
root_task_{root_task}, root_task_{root_task},
currently_executing_{nullptr}, currently_executing_{nullptr},
my_stack_{nullptr}, my_stack_{nullptr},
deque_{}, deque_{},
last_stolen_{nullptr} {}; last_stolen_{nullptr} {}
}
} }
}
} }
#include "pls/internal/scheduling/root_task.h" #include "pls/internal/scheduling/root_task.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace scheduling { namespace scheduling {
} }
} }
} }
#include "pls/internal/scheduling/run_on_n_threads_task.h" #include "pls/internal/scheduling/run_on_n_threads_task.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace scheduling { namespace scheduling {
} }
} }
} }
...@@ -2,9 +2,10 @@ ...@@ -2,9 +2,10 @@
#include "pls/internal/base/error_handling.h" #include "pls/internal/base/error_handling.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace scheduling { namespace scheduling {
scheduler::scheduler(scheduler_memory* memory, const unsigned int num_threads):
scheduler::scheduler(scheduler_memory *memory, const unsigned int num_threads) :
num_threads_{num_threads}, num_threads_{num_threads},
memory_{memory}, memory_{memory},
sync_barrier_{num_threads + 1}, sync_barrier_{num_threads + 1},
...@@ -15,16 +16,17 @@ namespace pls { ...@@ -15,16 +16,17 @@ namespace pls {
for (unsigned int i = 0; i < num_threads_; i++) { for (unsigned int i = 0; i < num_threads_; i++) {
// Placement new is required, as the memory of `memory_` is not required to be initialized. // Placement new is required, as the memory of `memory_` is not required to be initialized.
new((void*)memory_->thread_state_for(i)) thread_state{this, memory_->task_stack_for(i), i}; new((void *) memory_->thread_state_for(i)) thread_state{this, memory_->task_stack_for(i), i};
new ((void*)memory_->thread_for(i))base::thread<void(*)(), thread_state>(&worker_routine, memory_->thread_state_for(i)); new((void *) memory_->thread_for(i))base::thread<void (*)(), thread_state>(&worker_routine,
} memory_->thread_state_for(i));
} }
}
scheduler::~scheduler() { scheduler::~scheduler() {
terminate(); terminate();
} }
void worker_routine() { void worker_routine() {
auto my_state = base::this_thread::state<thread_state>(); auto my_state = base::this_thread::state<thread_state>();
while (true) { while (true) {
...@@ -40,9 +42,9 @@ namespace pls { ...@@ -40,9 +42,9 @@ namespace pls {
my_state->scheduler_->sync_barrier_.wait(); my_state->scheduler_->sync_barrier_.wait();
} }
} }
void scheduler::terminate(bool wait_for_workers) { void scheduler::terminate(bool wait_for_workers) {
if (terminated_) { if (terminated_) {
return; return;
} }
...@@ -55,7 +57,8 @@ namespace pls { ...@@ -55,7 +57,8 @@ namespace pls {
memory_->thread_for(i)->join(); memory_->thread_for(i)->join();
} }
} }
} }
}
} }
}
} }
#include "pls/internal/scheduling/scheduler_memory.h" #include "pls/internal/scheduling/scheduler_memory.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace scheduling { namespace scheduling {
malloc_scheduler_memory::malloc_scheduler_memory(const size_t num_threads, const size_t memory_per_stack):
malloc_scheduler_memory::malloc_scheduler_memory(const size_t num_threads, const size_t memory_per_stack) :
num_threads_{num_threads} { num_threads_{num_threads} {
threads_ = reinterpret_cast<aligned_thread *>(base::alignment::allocate_aligned(num_threads * sizeof(aligned_thread))); threads_ =
thread_states_ = reinterpret_cast<aligned_thread_state *>(base::alignment::allocate_aligned(num_threads * sizeof(aligned_thread_state))); reinterpret_cast<aligned_thread *>(base::alignment::allocate_aligned(num_threads * sizeof(aligned_thread)));
thread_states_ = reinterpret_cast<aligned_thread_state *>(base::alignment::allocate_aligned(
num_threads * sizeof(aligned_thread_state)));
task_stacks_ = reinterpret_cast<aligned_aligned_stack *>(base::alignment::allocate_aligned(num_threads * sizeof(aligned_aligned_stack))); task_stacks_ = reinterpret_cast<aligned_aligned_stack *>(base::alignment::allocate_aligned(
task_stacks_memory_ = reinterpret_cast<char**>(base::alignment::allocate_aligned(num_threads * sizeof(char*))); num_threads * sizeof(aligned_aligned_stack)));
task_stacks_memory_ = reinterpret_cast<char **>(base::alignment::allocate_aligned(num_threads * sizeof(char *)));
for (size_t i = 0; i < num_threads_; i++) { for (size_t i = 0; i < num_threads_; i++) {
task_stacks_memory_[i] = reinterpret_cast<char*>(base::alignment::allocate_aligned(memory_per_stack)); task_stacks_memory_[i] = reinterpret_cast<char *>(base::alignment::allocate_aligned(memory_per_stack));
new ((void*)task_stacks_[i].pointer()) data_structures::aligned_stack(task_stacks_memory_[i], memory_per_stack); new((void *) task_stacks_[i].pointer()) data_structures::aligned_stack(task_stacks_memory_[i], memory_per_stack);
}
} }
}
malloc_scheduler_memory::~malloc_scheduler_memory() { malloc_scheduler_memory::~malloc_scheduler_memory() {
free(threads_); free(threads_);
free(thread_states_); free(thread_states_);
...@@ -25,7 +29,8 @@ namespace pls { ...@@ -25,7 +29,8 @@ namespace pls {
} }
free(task_stacks_); free(task_stacks_);
free(task_stacks_memory_); free(task_stacks_memory_);
} }
}
} }
}
} }
#include "pls/internal/scheduling/thread_state.h" #include "pls/internal/scheduling/thread_state.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace scheduling { namespace scheduling {
} }
} }
} }
...@@ -13,7 +13,7 @@ static bool base_tests_visited; ...@@ -13,7 +13,7 @@ static bool base_tests_visited;
static int base_tests_local_value_one; static int base_tests_local_value_one;
static vector<int> base_tests_local_value_two; static vector<int> base_tests_local_value_two;
TEST_CASE( "thread creation and joining", "[internal/data_structures/thread.h]") { TEST_CASE("thread creation and joining", "[internal/data_structures/thread.h]") {
base_tests_visited = false; base_tests_visited = false;
auto t1 = start_thread([]() { base_tests_visited = true; }); auto t1 = start_thread([]() { base_tests_visited = true; });
t1.join(); t1.join();
...@@ -21,7 +21,7 @@ TEST_CASE( "thread creation and joining", "[internal/data_structures/thread.h]") ...@@ -21,7 +21,7 @@ TEST_CASE( "thread creation and joining", "[internal/data_structures/thread.h]")
REQUIRE(base_tests_visited); REQUIRE(base_tests_visited);
} }
TEST_CASE( "thread state", "[internal/data_structures/thread.h]") { TEST_CASE("thread state", "[internal/data_structures/thread.h]") {
int state_one = 1; int state_one = 1;
vector<int> state_two{1, 2}; vector<int> state_two{1, 2};
...@@ -36,12 +36,12 @@ TEST_CASE( "thread state", "[internal/data_structures/thread.h]") { ...@@ -36,12 +36,12 @@ TEST_CASE( "thread state", "[internal/data_structures/thread.h]") {
int base_tests_shared_counter; int base_tests_shared_counter;
TEST_CASE( "spinlock protects concurrent counter", "[internal/data_structures/spinlock.h]") { TEST_CASE("spinlock protects concurrent counter", "[internal/data_structures/spinlock.h]") {
constexpr int num_iterations = 1000000; constexpr int num_iterations = 1000000;
base_tests_shared_counter = 0; base_tests_shared_counter = 0;
spin_lock lock{}; spin_lock lock{};
SECTION( "lock can be used by itself" ) { SECTION("lock can be used by itself") {
auto t1 = start_thread([&]() { auto t1 = start_thread([&]() {
for (int i = 0; i < num_iterations; i++) { for (int i = 0; i < num_iterations; i++) {
lock.lock(); lock.lock();
...@@ -63,7 +63,7 @@ TEST_CASE( "spinlock protects concurrent counter", "[internal/data_structures/sp ...@@ -63,7 +63,7 @@ TEST_CASE( "spinlock protects concurrent counter", "[internal/data_structures/sp
REQUIRE(base_tests_shared_counter == 0); REQUIRE(base_tests_shared_counter == 0);
} }
SECTION( "lock can be used with std::lock_guard" ) { SECTION("lock can be used with std::lock_guard") {
auto t1 = start_thread([&]() { auto t1 = start_thread([&]() {
for (int i = 0; i < num_iterations; i++) { for (int i = 0; i < num_iterations; i++) {
std::lock_guard<spin_lock> my_lock{lock}; std::lock_guard<spin_lock> my_lock{lock};
......
...@@ -12,13 +12,12 @@ using namespace pls::internal::data_structures; ...@@ -12,13 +12,12 @@ using namespace pls::internal::data_structures;
using namespace pls::internal::base; using namespace pls::internal::base;
using namespace std; using namespace std;
TEST_CASE("aligned stack stores objects correctly", "[internal/data_structures/aligned_stack.h]") {
TEST_CASE( "aligned stack stores objects correctly", "[internal/data_structures/aligned_stack.h]") {
constexpr long data_size = 1024; constexpr long data_size = 1024;
char data[data_size]; char data[data_size];
aligned_stack stack{data, data_size}; aligned_stack stack{data, data_size};
SECTION( "stack correctly pushes sub linesize objects" ) { SECTION("stack correctly pushes sub linesize objects") {
std::array<char, 5> small_data_one{'a', 'b', 'c', 'd', 'e'}; std::array<char, 5> small_data_one{'a', 'b', 'c', 'd', 'e'};
std::array<char, 64> small_data_two{}; std::array<char, 64> small_data_two{};
std::array<char, 1> small_data_three{'A'}; std::array<char, 1> small_data_three{'A'};
...@@ -32,7 +31,7 @@ TEST_CASE( "aligned stack stores objects correctly", "[internal/data_structures/ ...@@ -32,7 +31,7 @@ TEST_CASE( "aligned stack stores objects correctly", "[internal/data_structures/
REQUIRE(reinterpret_cast<std::uintptr_t>(pointer_three) % system_details::CACHE_LINE_SIZE == 0); REQUIRE(reinterpret_cast<std::uintptr_t>(pointer_three) % system_details::CACHE_LINE_SIZE == 0);
} }
SECTION( "stack correctly pushes above linesize objects" ) { SECTION("stack correctly pushes above linesize objects") {
std::array<char, 5> small_data_one{'a', 'b', 'c', 'd', 'e'}; std::array<char, 5> small_data_one{'a', 'b', 'c', 'd', 'e'};
std::array<char, system_details::CACHE_LINE_SIZE + 10> big_data_one{}; std::array<char, system_details::CACHE_LINE_SIZE + 10> big_data_one{};
...@@ -43,7 +42,7 @@ TEST_CASE( "aligned stack stores objects correctly", "[internal/data_structures/ ...@@ -43,7 +42,7 @@ TEST_CASE( "aligned stack stores objects correctly", "[internal/data_structures/
REQUIRE(reinterpret_cast<std::uintptr_t>(small_pointer_one) % system_details::CACHE_LINE_SIZE == 0); REQUIRE(reinterpret_cast<std::uintptr_t>(small_pointer_one) % system_details::CACHE_LINE_SIZE == 0);
} }
SECTION( "stack correctly stores and retrieves objects" ) { SECTION("stack correctly stores and retrieves objects") {
std::array<char, 5> data_one{'a', 'b', 'c', 'd', 'e'}; std::array<char, 5> data_one{'a', 'b', 'c', 'd', 'e'};
stack.push(data_one); stack.push(data_one);
...@@ -52,7 +51,7 @@ TEST_CASE( "aligned stack stores objects correctly", "[internal/data_structures/ ...@@ -52,7 +51,7 @@ TEST_CASE( "aligned stack stores objects correctly", "[internal/data_structures/
REQUIRE(retrieved_data == std::array<char, 5>{'a', 'b', 'c', 'd', 'e'}); REQUIRE(retrieved_data == std::array<char, 5>{'a', 'b', 'c', 'd', 'e'});
} }
SECTION( "stack can push and pop multiple times with correct alignment" ) { SECTION("stack can push and pop multiple times with correct alignment") {
std::array<char, 5> small_data_one{'a', 'b', 'c', 'd', 'e'}; std::array<char, 5> small_data_one{'a', 'b', 'c', 'd', 'e'};
std::array<char, 64> small_data_two{}; std::array<char, 64> small_data_two{};
std::array<char, 1> small_data_three{'A'}; std::array<char, 1> small_data_three{'A'};
...@@ -76,15 +75,15 @@ TEST_CASE( "aligned stack stores objects correctly", "[internal/data_structures/ ...@@ -76,15 +75,15 @@ TEST_CASE( "aligned stack stores objects correctly", "[internal/data_structures/
} }
} }
TEST_CASE( "deque stores objects correctly", "[internal/data_structures/deque.h]") { TEST_CASE("deque stores objects correctly", "[internal/data_structures/deque.h]") {
class my_item: public deque_item { class my_item : public deque_item {
}; };
deque<my_item> deque; deque<my_item> deque;
my_item one, two, three; my_item one, two, three;
SECTION( "add and remove items form the tail" ) { SECTION("add and remove items form the tail") {
deque.push_tail(&one); deque.push_tail(&one);
deque.push_tail(&two); deque.push_tail(&two);
deque.push_tail(&three); deque.push_tail(&three);
...@@ -94,7 +93,7 @@ TEST_CASE( "deque stores objects correctly", "[internal/data_structures/deque.h] ...@@ -94,7 +93,7 @@ TEST_CASE( "deque stores objects correctly", "[internal/data_structures/deque.h]
REQUIRE(deque.pop_tail() == &one); REQUIRE(deque.pop_tail() == &one);
} }
SECTION( "handles getting empty by popping the tail correctly" ) { SECTION("handles getting empty by popping the tail correctly") {
deque.push_tail(&one); deque.push_tail(&one);
REQUIRE(deque.pop_tail() == &one); REQUIRE(deque.pop_tail() == &one);
...@@ -102,7 +101,7 @@ TEST_CASE( "deque stores objects correctly", "[internal/data_structures/deque.h] ...@@ -102,7 +101,7 @@ TEST_CASE( "deque stores objects correctly", "[internal/data_structures/deque.h]
REQUIRE(deque.pop_tail() == &two); REQUIRE(deque.pop_tail() == &two);
} }
SECTION( "remove items form the head" ) { SECTION("remove items form the head") {
deque.push_tail(&one); deque.push_tail(&one);
deque.push_tail(&two); deque.push_tail(&two);
deque.push_tail(&three); deque.push_tail(&three);
...@@ -112,7 +111,7 @@ TEST_CASE( "deque stores objects correctly", "[internal/data_structures/deque.h] ...@@ -112,7 +111,7 @@ TEST_CASE( "deque stores objects correctly", "[internal/data_structures/deque.h]
REQUIRE(deque.pop_head() == &three); REQUIRE(deque.pop_head() == &three);
} }
SECTION( "handles getting empty by popping the head correctly" ) { SECTION("handles getting empty by popping the head correctly") {
deque.push_tail(&one); deque.push_tail(&one);
REQUIRE(deque.pop_head() == &one); REQUIRE(deque.pop_head() == &one);
...@@ -120,7 +119,7 @@ TEST_CASE( "deque stores objects correctly", "[internal/data_structures/deque.h] ...@@ -120,7 +119,7 @@ TEST_CASE( "deque stores objects correctly", "[internal/data_structures/deque.h]
REQUIRE(deque.pop_head() == &two); REQUIRE(deque.pop_head() == &two);
} }
SECTION( "handles getting empty by popping the head and tail correctly" ) { SECTION("handles getting empty by popping the head and tail correctly") {
deque.push_tail(&one); deque.push_tail(&one);
REQUIRE(deque.pop_tail() == &one); REQUIRE(deque.pop_tail() == &one);
......
...@@ -4,11 +4,11 @@ ...@@ -4,11 +4,11 @@
using namespace pls; using namespace pls;
class once_sub_task: public fork_join_sub_task { class once_sub_task : public fork_join_sub_task {
std::atomic<int>* counter_; std::atomic<int> *counter_;
int children_; int children_;
protected: protected:
void execute_internal() override { void execute_internal() override {
(*counter_)++; (*counter_)++;
for (int i = 0; i < children_; i++) { for (int i = 0; i < children_; i++) {
...@@ -16,38 +16,37 @@ protected: ...@@ -16,38 +16,37 @@ protected:
} }
} }
public: public:
explicit once_sub_task(std::atomic<int>* counter, int children): explicit once_sub_task(std::atomic<int> *counter, int children) :
fork_join_sub_task(), fork_join_sub_task(),
counter_{counter}, counter_{counter},
children_{children} {} children_{children} {}
}; };
class force_steal_sub_task: public fork_join_sub_task { class force_steal_sub_task : public fork_join_sub_task {
std::atomic<int>* parent_counter_; std::atomic<int> *parent_counter_;
std::atomic<int>* overall_counter_; std::atomic<int> *overall_counter_;
protected: protected:
void execute_internal() override { void execute_internal() override {
(*overall_counter_)--; (*overall_counter_)--;
if (overall_counter_->load() > 0) { if (overall_counter_->load() > 0) {
std::atomic<int> counter{1}; std::atomic<int> counter{1};
spawn_child(force_steal_sub_task(&counter, overall_counter_)); spawn_child(force_steal_sub_task(&counter, overall_counter_));
while (counter.load() > 0) while (counter.load() > 0); // Spin...
; // Spin...
} }
(*parent_counter_)--; (*parent_counter_)--;
} }
public: public:
explicit force_steal_sub_task(std::atomic<int>* parent_counter, std::atomic<int>* overall_counter): explicit force_steal_sub_task(std::atomic<int> *parent_counter, std::atomic<int> *overall_counter) :
fork_join_sub_task(), fork_join_sub_task(),
parent_counter_{parent_counter}, parent_counter_{parent_counter},
overall_counter_{overall_counter} {} overall_counter_{overall_counter} {}
}; };
TEST_CASE( "tbb task are scheduled correctly", "[internal/scheduling/fork_join_task.h]") { TEST_CASE("tbb task are scheduled correctly", "[internal/scheduling/fork_join_task.h]") {
malloc_scheduler_memory my_scheduler_memory{8, 2 << 12}; malloc_scheduler_memory my_scheduler_memory{8, 2 << 12};
SECTION("tasks are executed exactly once") { SECTION("tasks are executed exactly once") {
...@@ -56,7 +55,7 @@ TEST_CASE( "tbb task are scheduled correctly", "[internal/scheduling/fork_join_t ...@@ -56,7 +55,7 @@ TEST_CASE( "tbb task are scheduled correctly", "[internal/scheduling/fork_join_t
int total_tasks = 1 + 4 + 4 * 3 + 4 * 3 * 2 + 4 * 3 * 2 * 1; int total_tasks = 1 + 4 + 4 * 3 + 4 * 3 * 2 + 4 * 3 * 2 * 1;
std::atomic<int> counter{0}; std::atomic<int> counter{0};
my_scheduler.perform_work([&] (){ my_scheduler.perform_work([&]() {
once_sub_task sub_task{&counter, start_counter}; once_sub_task sub_task{&counter, start_counter};
fork_join_task task{&sub_task, unique_id::create(42)}; fork_join_task task{&sub_task, unique_id::create(42)};
scheduler::execute_task(task); scheduler::execute_task(task);
...@@ -68,7 +67,7 @@ TEST_CASE( "tbb task are scheduled correctly", "[internal/scheduling/fork_join_t ...@@ -68,7 +67,7 @@ TEST_CASE( "tbb task are scheduled correctly", "[internal/scheduling/fork_join_t
SECTION("tasks can be stolen") { SECTION("tasks can be stolen") {
scheduler my_scheduler{&my_scheduler_memory, 8}; scheduler my_scheduler{&my_scheduler_memory, 8};
my_scheduler.perform_work([&] (){ my_scheduler.perform_work([&]() {
std::atomic<int> dummy_parent{1}, overall_counter{8}; std::atomic<int> dummy_parent{1}, overall_counter{8};
force_steal_sub_task sub_task{&dummy_parent, &overall_counter}; force_steal_sub_task sub_task{&dummy_parent, &overall_counter};
fork_join_task task{&sub_task, unique_id::create(42)}; fork_join_task task{&sub_task, unique_id::create(42)};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment