Commit aa270645 by FritzFlorian

Reformate code to fit GNU code formating style.

parent 3ff10baa
Pipeline #1157 passed with stages
in 3 minutes 36 seconds
......@@ -73,7 +73,6 @@ complex_vector prepare_input(int input_size) {
return data;
}
int main() {
PROFILE_ENABLE
complex_vector initial_input = prepare_input(INPUT_SIZE);
......
......@@ -10,8 +10,9 @@
#include <pls/internal/scheduling/root_task.h>
#include <pls/internal/helpers/unique_id.h>
int main() {
std::cout << pls::internal::scheduling::root_task<void(*)>::create_id().type_.hash_code() << std::endl;
std::cout << pls::internal::helpers::unique_id::create<pls::internal::scheduling::root_task<void(*)>>().type_.hash_code() << std::endl;
std::cout << pls::internal::scheduling::root_task<void (*)>::create_id().type_.hash_code() << std::endl;
std::cout
<< pls::internal::helpers::unique_id::create<pls::internal::scheduling::root_task<void (*)>>().type_.hash_code()
<< std::endl;
}
......@@ -5,9 +5,8 @@ using namespace pls::internal::base;
int global = 0;
int main() {
// Try to use every feature, to trigger the prohibited use of new if found somewhere
auto t1 = start_thread([] (){});
auto t1 = start_thread([]() {});
t1.join();
}
......@@ -6,15 +6,17 @@
#include "pls/internal/scheduling/scheduler.h"
namespace pls {
namespace algorithm {
template<typename Function1, typename Function2>
void invoke_parallel(const Function1& function1, const Function2& function2);
namespace algorithm {
template<typename Function1, typename Function2, typename Function3>
void invoke_parallel(const Function1& function1, const Function2& function2, const Function3& function3);
template<typename Function1, typename Function2>
void invoke_parallel(const Function1 &function1, const Function2 &function2);
// ...and so on, add more if we decide to keep this design
}
template<typename Function1, typename Function2, typename Function3>
void invoke_parallel(const Function1 &function1, const Function2 &function2, const Function3 &function3);
// ...and so on, add more if we decide to keep this design
}
}
#include "invoke_parallel_impl.h"
......
......@@ -7,34 +7,35 @@
#include "pls/internal/helpers/unique_id.h"
namespace pls {
namespace algorithm {
namespace internal {
using namespace ::pls::internal::scheduling;
namespace algorithm {
namespace internal {
using namespace ::pls::internal::scheduling;
template<typename Body>
inline void run_body(const Body& internal_body, const abstract_task::id& id) {
template<typename Body>
inline void run_body(const Body &internal_body, const abstract_task::id &id) {
// Make sure we are in the context of this invoke_parallel instance,
// if not we will spawn it as a new 'fork-join-style' task.
auto current_task = scheduler::current_task();
if (current_task->unique_id() == id) {
auto current_sub_task = reinterpret_cast<fork_join_task*>(current_task)->currently_executing();
auto current_sub_task = reinterpret_cast<fork_join_task *>(current_task)->currently_executing();
internal_body(current_sub_task);
} else {
fork_join_lambda<Body> root_body(&internal_body);
fork_join_task root_task{&root_body, id};
scheduler::execute_task(root_task);
}
}
}
}
}
template<typename Function1, typename Function2>
void invoke_parallel(const Function1& function1, const Function2& function2) {
template<typename Function1, typename Function2>
void invoke_parallel(const Function1 &function1, const Function2 &function2) {
using namespace ::pls::internal::scheduling;
using namespace ::pls::internal::helpers;
static abstract_task::id id = unique_id::create<Function1, Function2>();
auto internal_body = [&] (fork_join_sub_task* this_task){
auto sub_task_body_1 = [&] (fork_join_sub_task*){ function1(); };
auto internal_body = [&](fork_join_sub_task *this_task) {
auto sub_task_body_1 = [&](fork_join_sub_task *) { function1(); };
auto sub_task_1 = fork_join_lambda<decltype(sub_task_body_1)>(&sub_task_body_1);
this_task->spawn_child(sub_task_1);
......@@ -43,18 +44,18 @@ namespace pls {
};
internal::run_body(internal_body, id);
}
}
template<typename Function1, typename Function2, typename Function3>
void invoke_parallel(const Function1& function1, const Function2& function2, const Function3& function3) {
template<typename Function1, typename Function2, typename Function3>
void invoke_parallel(const Function1 &function1, const Function2 &function2, const Function3 &function3) {
using namespace ::pls::internal::scheduling;
using namespace ::pls::internal::helpers;
static abstract_task::id id = unique_id::create<Function1, Function2, Function3>();
auto internal_body = [&] (fork_join_sub_task* this_task){
auto sub_task_body_1 = [&] (fork_join_sub_task*){ function1(); };
auto internal_body = [&](fork_join_sub_task *this_task) {
auto sub_task_body_1 = [&](fork_join_sub_task *) { function1(); };
auto sub_task_1 = fork_join_lambda<decltype(sub_task_body_1)>(&sub_task_body_1);
auto sub_task_body_2 = [&] (fork_join_sub_task*){ function2(); };
auto sub_task_body_2 = [&](fork_join_sub_task *) { function2(); };
auto sub_task_2 = fork_join_lambda<decltype(sub_task_body_2)>(&sub_task_body_2);
this_task->spawn_child(sub_task_1);
......@@ -64,8 +65,9 @@ namespace pls {
};
internal::run_body(internal_body, id);
}
}
}
}
}
#endif //PLS_INVOKE_PARALLEL_IMPL_H
......@@ -8,21 +8,23 @@
#include "system_details.h"
namespace pls {
namespace internal {
namespace base {
namespace alignment {
template<typename T>
struct aligned_wrapper {
namespace internal {
namespace base {
namespace alignment {
template<typename T>
struct aligned_wrapper {
alignas(system_details::CACHE_LINE_SIZE) unsigned char data[sizeof(T)];
T* pointer() { return reinterpret_cast<T*>(data); }
};
void* allocate_aligned(size_t size);
std::uintptr_t next_alignment(std::uintptr_t size);
char* next_alignment(char* pointer);
}
}
}
T *pointer() { return reinterpret_cast<T *>(data); }
};
void *allocate_aligned(size_t size);
std::uintptr_t next_alignment(std::uintptr_t size);
char *next_alignment(char *pointer);
}
}
}
}
#endif //PLS_ALIGNMENT_H
......@@ -5,9 +5,10 @@
#include <pthread.h>
namespace pls {
namespace internal {
namespace base {
/**
namespace internal {
namespace base {
/**
* Provides standard barrier behaviour.
* `count` threads have to call `wait()` before any of the `wait()` calls returns,
* thus blocking all threads until everyone reached the barrier.
......@@ -15,7 +16,7 @@ namespace pls {
* PORTABILITY:
* Current implementation is based on pthreads.
*/
class barrier {
class barrier {
pthread_barrier_t barrier_;
public:
......@@ -23,9 +24,10 @@ namespace pls {
~barrier();
void wait();
};
}
}
};
}
}
}
#endif //PLS_BARRIER_H
......@@ -6,12 +6,14 @@
#include "ttas_spin_lock.h"
namespace pls {
namespace internal {
namespace base {
// Default Spin-Lock implementation for this project.
using spin_lock = tas_spin_lock;
}
}
namespace internal {
namespace base {
// Default Spin-Lock implementation for this project.
using spin_lock = tas_spin_lock;
}
}
}
#endif //PLS_SPINLOCK_H
......@@ -5,29 +5,31 @@
#include <cstdint>
namespace pls {
namespace internal {
namespace base {
/**
namespace internal {
namespace base {
/**
* Collection of system details, e.g. hardware cache line size.
*
* PORTABILITY:
* Currently sane default values for x86.
*/
namespace system_details {
/**
namespace system_details {
/**
* Most processors have 64 byte cache lines
*/
constexpr std::uintptr_t CACHE_LINE_SIZE = 64;
constexpr std::uintptr_t CACHE_LINE_SIZE = 64;
/**
/**
* Choose one of the following ways to store thread specific data.
* Try to choose the fastest available on this processor/system.
*/
// #define PLS_THREAD_SPECIFIC_PTHREAD
#define PLS_THREAD_SPECIFIC_COMPILER
}
}
}
#define PLS_THREAD_SPECIFIC_COMPILER
}
}
}
}
#endif //PLS_SYSTEM_DETAILS_H
......@@ -10,30 +10,30 @@
#include "pls/internal/base/thread.h"
namespace pls {
namespace internal {
namespace base {
/**
namespace internal {
namespace base {
/**
* A simple set and test_and_set based spin lock implementation.
*
* PORTABILITY:
* Current implementation is based on C++ 11 atomic_flag.
*/
class tas_spin_lock {
class tas_spin_lock {
std::atomic_flag flag_;
unsigned int yield_at_tries_;
public:
tas_spin_lock(): flag_{ATOMIC_FLAG_INIT}, yield_at_tries_{1024} {};
tas_spin_lock(const tas_spin_lock& other): flag_{ATOMIC_FLAG_INIT}, yield_at_tries_{other.yield_at_tries_} {}
tas_spin_lock() : flag_{ATOMIC_FLAG_INIT}, yield_at_tries_{1024} {};
tas_spin_lock(const tas_spin_lock &other) : flag_{ATOMIC_FLAG_INIT}, yield_at_tries_{other.yield_at_tries_} {}
void lock();
bool try_lock(unsigned int num_tries=1);
bool try_lock(unsigned int num_tries = 1);
void unlock();
};
}
}
}
};
}
}
}
#endif //PLS_TAS_SPIN_LOCK_H
......@@ -13,11 +13,12 @@
#include "system_details.h"
namespace pls {
namespace internal {
namespace base {
using thread_entrypoint = void();
namespace internal {
namespace base {
/**
using thread_entrypoint = void();
/**
* Static methods than can be performed on the current thread.
*
* usage:
......@@ -27,15 +28,16 @@ namespace pls {
* PORTABILITY:
* Current implementation is based on pthreads.
*/
class this_thread {
class this_thread {
template<typename Function, typename State>
friend class thread;
friend
class thread;
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
static pthread_key_t local_storage_key_;
static bool local_storage_key_initialized_;
#endif
#ifdef PLS_THREAD_SPECIFIC_COMPILER
static __thread void* local_state_;
static __thread void *local_state_;
#endif
public:
static void yield() {
......@@ -49,7 +51,7 @@ namespace pls {
* @return The state pointer hold for this thread.
*/
template<typename T>
static T* state();
static T *state();
/**
* Stores a pointer to the thread local state object.
......@@ -60,10 +62,10 @@ namespace pls {
* @param state_pointer A pointer to the threads state object.
*/
template<typename T>
static void set_state(T* state_pointer);
};
static void set_state(T *state_pointer);
};
/**
/**
* Abstraction for starting a function in a separate thread.
*
* @tparam Function Lambda being started on the new thread.
......@@ -79,43 +81,44 @@ namespace pls {
* PORTABILITY:
* Current implementation is based on pthreads.
*/
template<typename Function, typename State>
class thread {
template<typename Function, typename State>
class thread {
friend class this_thread;
// Keep a copy of the function (lambda) in this object to make sure it is valid when called!
Function function_;
State* state_pointer_;
State *state_pointer_;
// Wee need to wait for the started function to read
// the function_ and state_pointer_ property before returning
// from the constructor, as the object might be moved after this.
std::atomic_flag* startup_flag_;
std::atomic_flag *startup_flag_;
// Keep handle to native implementation
pthread_t pthread_thread_;
static void* start_pthread_internal(void* thread_pointer);
static void *start_pthread_internal(void *thread_pointer);
public:
explicit thread(const Function& function, State* state_pointer);
explicit thread(const Function &function, State *state_pointer);
public:
void join();
// make object move only
thread(thread&&) noexcept = default;
thread& operator=(thread&&) noexcept = default;
thread(thread &&) noexcept = default;
thread &operator=(thread &&) noexcept = default;
thread(const thread&) = delete;
thread& operator=(const thread&) = delete;
};
thread(const thread &) = delete;
thread &operator=(const thread &) = delete;
};
template<typename Function, typename State>
thread<Function, State> start_thread(const Function& function, State* state_pointer);
template<typename Function>
thread<Function, void> start_thread(const Function& function);
}
}
template<typename Function, typename State>
thread<Function, State> start_thread(const Function &function, State *state_pointer);
template<typename Function>
thread<Function, void> start_thread(const Function &function);
}
}
}
#include "thread_impl.h"
......
......@@ -3,33 +3,34 @@
#define PLS_THREAD_IMPL_H
namespace pls {
namespace internal {
namespace base {
template<typename T>
T* this_thread::state() {
namespace internal {
namespace base {
template<typename T>
T *this_thread::state() {
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
return reinterpret_cast<T*>(pthread_getspecific(local_storage_key_));
#endif
#ifdef PLS_THREAD_SPECIFIC_COMPILER
return reinterpret_cast<T*>(local_state_);
return reinterpret_cast<T *>(local_state_);
#endif
}
}
template<typename T>
void this_thread::set_state(T* state_pointer) {
template<typename T>
void this_thread::set_state(T *state_pointer) {
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
pthread_setspecific(this_thread::local_storage_key_, (void*)state_pointer);
#endif
#ifdef PLS_THREAD_SPECIFIC_COMPILER
local_state_ = state_pointer;
#endif
}
}
template<typename Function, typename State>
void* thread<Function, State>::start_pthread_internal(void* thread_pointer) {
auto my_thread = reinterpret_cast<thread*>(thread_pointer);
template<typename Function, typename State>
void *thread<Function, State>::start_pthread_internal(void *thread_pointer) {
auto my_thread = reinterpret_cast<thread *>(thread_pointer);
Function my_function_copy = my_thread->function_;
State* my_state_pointer_copy = my_thread->state_pointer_;
State *my_state_pointer_copy = my_thread->state_pointer_;
// Now we have copies of everything we need on the stack.
// The original thread object can be moved freely (no more
......@@ -41,10 +42,10 @@ namespace pls {
// Finished executing the user function
pthread_exit(nullptr);
}
}
template<typename Function, typename State>
thread<Function, State>::thread(const Function& function, State* state_pointer):
template<typename Function, typename State>
thread<Function, State>::thread(const Function &function, State *state_pointer):
function_{function},
state_pointer_{state_pointer},
startup_flag_{nullptr},
......@@ -62,27 +63,27 @@ namespace pls {
startup_flag_ = &startup_flag;
startup_flag.test_and_set(); // Set the flag, pthread will clear it when it is safe to return
pthread_create(&pthread_thread_, nullptr, start_pthread_internal, (void *)(this));
while (startup_flag.test_and_set())
; // Busy waiting for the starting flag to clear
}
pthread_create(&pthread_thread_, nullptr, start_pthread_internal, (void *) (this));
while (startup_flag.test_and_set()); // Busy waiting for the starting flag to clear
}
template<typename Function, typename State>
void thread<Function, State>::join() {
template<typename Function, typename State>
void thread<Function, State>::join() {
pthread_join(pthread_thread_, nullptr);
}
}
template<typename Function, typename State>
thread<Function, State> start_thread(const Function& function, State* state_pointer) {
template<typename Function, typename State>
thread<Function, State> start_thread(const Function &function, State *state_pointer) {
return thread<Function, State>(function, state_pointer);
}
}
template<typename Function>
thread<Function, void> start_thread(const Function& function) {
template<typename Function>
thread<Function, void> start_thread(const Function &function) {
return thread<Function, void>(function, nullptr);
}
}
}
}
}
}
}
#endif //PLS_THREAD_IMPL_H
......@@ -8,30 +8,28 @@
#include "pls/internal/base/thread.h"
namespace pls {
namespace internal {
namespace base {
/**
namespace internal {
namespace base {
/**
* A simple set and test_and_set based spin lock implementation.
*
* PORTABILITY:
* Current implementation is based on C++ 11 atomic_flag.
*/
class ttas_spin_lock {
class ttas_spin_lock {
std::atomic<int> flag_;
const unsigned int yield_at_tries_;
public:
ttas_spin_lock(): flag_{0}, yield_at_tries_{1024} {};
ttas_spin_lock(const ttas_spin_lock& other): flag_{0}, yield_at_tries_{other.yield_at_tries_} {}
ttas_spin_lock() : flag_{0}, yield_at_tries_{1024} {};
ttas_spin_lock(const ttas_spin_lock &other) : flag_{0}, yield_at_tries_{other.yield_at_tries_} {}
void lock();
bool try_lock(unsigned int num_tries=1);
bool try_lock(unsigned int num_tries = 1);
void unlock();
};
}
}
};
}
}
}
#endif //PLS_TTAS_SPIN_LOCK_H
......@@ -9,9 +9,10 @@
#include "pls/internal/base/alignment.h"
namespace pls {
namespace internal {
namespace data_structures {
/**
namespace internal {
namespace data_structures {
/**
* Generic stack-like data structure that allows to allocate arbitrary objects in a given memory region.
* The objects will be stored aligned in the stack, making the storage cache friendly and very fast
* (as long as one can live with the stack restrictions).
......@@ -23,31 +24,33 @@ namespace pls {
* T* pointer = stack.push(some_object); // Copy-Constrict the object on top of stack
* stack.pop<T>(); // Deconstruct the top object of type T
*/
class aligned_stack {
class aligned_stack {
// Keep bounds of our memory block
char* memory_start_;
char* memory_end_;
char *memory_start_;
char *memory_end_;
// Current head will always be aligned to cache lines
char* head_;
char *head_;
public:
typedef char* state;
typedef char *state;
aligned_stack(): memory_start_{nullptr}, memory_end_{nullptr}, head_{nullptr} {};
aligned_stack(char* memory_region, std::size_t size);
aligned_stack() : memory_start_{nullptr}, memory_end_{nullptr}, head_{nullptr} {};
aligned_stack(char *memory_region, std::size_t size);
template<typename T>
T* push(const T& object);
T *push(const T &object);
template<typename T>
void* push();
void *push();
template<typename T>
T pop();
state save_state() const { return head_; }
void reset_state(state new_state) { head_ = new_state; }
};
}
}
};
}
}
}
#include "aligned_stack_impl.h"
......
......@@ -3,17 +3,18 @@
#define PLS_ALIGNED_STACK_IMPL_H
namespace pls {
namespace internal {
namespace data_structures {
template<typename T>
T* aligned_stack::push(const T& object) {
namespace internal {
namespace data_structures {
template<typename T>
T *aligned_stack::push(const T &object) {
// Copy-Construct
return new ((void*)push<T>())T(object);
}
return new((void *) push < T > ())T(object);
}
template<typename T>
void* aligned_stack::push() {
void* result = reinterpret_cast<T*>(head_);
template<typename T>
void *aligned_stack::push() {
void *result = reinterpret_cast<T *>(head_);
// Move head to next aligned position after new object
head_ = base::alignment::next_alignment(head_ + sizeof(T));
......@@ -22,15 +23,16 @@ namespace pls {
}
return result;
}
}
template<typename T>
T aligned_stack::pop() {
template<typename T>
T aligned_stack::pop() {
head_ = head_ - base::alignment::next_alignment(sizeof(T));
return *reinterpret_cast<T*>(head_);
}
}
}
return *reinterpret_cast<T *>(head_);
}
}
}
}
#endif //PLS_ALIGNED_STACK_IMPL_H
......@@ -5,56 +5,58 @@
#include "pls/internal/base/spin_lock.h"
namespace pls {
namespace internal {
namespace data_structures {
/**
namespace internal {
namespace data_structures {
/**
* Turns any object into deque item when inheriting from this.
*/
class deque_item {
class deque_item {
friend class deque_internal;
deque_item* prev_;
deque_item* next_;
deque_item *prev_;
deque_item *next_;
};
};
class deque_internal {
class deque_internal {
protected:
deque_item* head_;
deque_item* tail_;
deque_item *head_;
deque_item *tail_;
base::spin_lock lock_;
deque_item* pop_head_internal();
deque_item* pop_tail_internal();
deque_item *pop_head_internal();
deque_item *pop_tail_internal();
void push_tail_internal(deque_item *new_item);
};
};
/**
/**
* A double linked list based deque.
* Storage is therefore only needed for the individual items.
*
* @tparam Item The type of items stored in this deque
*/
template<typename Item>
class deque: deque_internal {
template<typename Item>
class deque : deque_internal {
public:
explicit deque(): deque_internal{} {}
explicit deque() : deque_internal{} {}
inline Item* pop_head() {
return static_cast<Item*>(pop_head_internal());
inline Item *pop_head() {
return static_cast<Item *>(pop_head_internal());
}
inline Item* pop_tail() {
return static_cast<Item*>(pop_tail_internal());
inline Item *pop_tail() {
return static_cast<Item *>(pop_tail_internal());
}
inline void push_tail(Item* new_item) {
inline void push_tail(Item *new_item) {
push_tail_internal(new_item);
}
};
}
}
};
}
}
}
#endif //PLS_DEQUE_H
......@@ -9,11 +9,12 @@
#include <iostream>
namespace pls {
namespace internal {
namespace helpers {
// TODO: Clean up (separate into small functions and .cpp file)
template<typename Function>
void run_mini_benchmark(const Function& lambda, size_t max_threads, unsigned long max_runtime_ms=1000) {
namespace internal {
namespace helpers {
// TODO: Clean up (separate into small functions and .cpp file)
template<typename Function>
void run_mini_benchmark(const Function &lambda, size_t max_threads, unsigned long max_runtime_ms = 1000) {
using namespace std;
using namespace pls::internal::scheduling;
......@@ -37,7 +38,7 @@ namespace pls {
});
long time = chrono::duration_cast<chrono::microseconds>(end_time - start_time).count();
double time_per_iteration = (double)time / iterations;
double time_per_iteration = (double) time / iterations;
std::cout << time_per_iteration;
if (num_threads < max_threads) {
......@@ -45,9 +46,10 @@ namespace pls {
}
}
std::cout << std::endl;
}
}
}
}
}
}
}
#endif //PLS_MINI_BENCHMARK_H
......@@ -15,7 +15,7 @@
#ifdef NEW_LINK_ERROR
// This will cause a linker error if new is used in the code.
// We also exit if it is somehow still called.
inline void * operator new (std::size_t) {
inline void *operator new(std::size_t) {
extern int bare_new_erroneously_called();
exit(bare_new_erroneously_called() | 1);
}
......
......@@ -7,12 +7,13 @@
#include <stdint.h>
namespace pls {
namespace internal {
namespace helpers {
struct unique_id {
namespace internal {
namespace helpers {
struct unique_id {
const uint32_t id_;
const std::type_info& type_;
bool operator==(const unique_id& other) const { return id_ == other.id_ && type_ == other.type_; }
const std::type_info &type_;
bool operator==(const unique_id &other) const { return id_ == other.id_ && type_ == other.type_; }
static constexpr unique_id create(const uint32_t id) {
return unique_id(id, typeid(void));
......@@ -22,10 +23,11 @@ namespace pls {
return unique_id(UINT32_MAX, typeid(std::tuple<T...>));
}
private:
explicit constexpr unique_id(const uint32_t id, const std::type_info& type): id_{id}, type_{type} {};
};
}
}
explicit constexpr unique_id(const uint32_t id, const std::type_info &type) : id_{id}, type_{type} {};
};
}
}
}
#endif //PLS_UNIQUE_ID_H
......@@ -6,38 +6,40 @@
#include "pls/internal/helpers/unique_id.h"
namespace pls {
namespace internal {
namespace scheduling {
class abstract_task {
namespace internal {
namespace scheduling {
class abstract_task {
public:
using id = helpers::unique_id;
private:
unsigned int depth_;
abstract_task::id unique_id_;
abstract_task* child_task_;
abstract_task *child_task_;
public:
abstract_task(const unsigned int depth, const abstract_task::id& unique_id):
abstract_task(const unsigned int depth, const abstract_task::id &unique_id) :
depth_{depth},
unique_id_{unique_id},
child_task_{nullptr} {}
virtual void execute() = 0;
void set_child(abstract_task* child_task) { child_task_ = child_task; }
abstract_task* child() { return child_task_; }
void set_child(abstract_task *child_task) { child_task_ = child_task; }
abstract_task *child() { return child_task_; }
void set_depth(unsigned int depth) { depth_ = depth; }
unsigned int depth() const { return depth_; }
id unique_id() const { return unique_id_; }
protected:
virtual bool internal_stealing(abstract_task* other_task) = 0;
virtual bool split_task(base::spin_lock* lock) = 0;
virtual bool internal_stealing(abstract_task *other_task) = 0;
virtual bool split_task(base::spin_lock *lock) = 0;
bool steal_work();
};
}
}
};
}
}
}
#endif //PLS_ABSTRACT_TASK_H
......@@ -11,24 +11,25 @@
#include "thread_state.h"
namespace pls {
namespace internal {
namespace scheduling {
class fork_join_task;
class fork_join_sub_task: public data_structures::deque_item {
namespace internal {
namespace scheduling {
class fork_join_task;
class fork_join_sub_task : public data_structures::deque_item {
friend class fork_join_task;
// Coordinate finishing of sub_tasks
std::atomic_uint32_t ref_count_;
fork_join_sub_task* parent_;
fork_join_sub_task *parent_;
// Access to TBB scheduling environment
fork_join_task* tbb_task_;
fork_join_task *tbb_task_;
// Stack Management (reset stack pointer after wait_for_all() calls)
data_structures::aligned_stack::state stack_state_;
protected:
explicit fork_join_sub_task();
fork_join_sub_task(const fork_join_sub_task& other);
fork_join_sub_task(const fork_join_sub_task &other);
// Overwritten with behaviour of child tasks
virtual void execute_internal() = 0;
......@@ -36,62 +37,63 @@ namespace pls {
public:
// Only use them when actually executing this sub_task (only public for simpler API design)
template<typename T>
void spawn_child(const T& sub_task);
void spawn_child(const T &sub_task);
void wait_for_all();
private:
void spawn_child_internal(fork_join_sub_task* sub_task);
void spawn_child_internal(fork_join_sub_task *sub_task);
void execute();
};
};
template<typename Function>
class fork_join_lambda: public fork_join_sub_task {
const Function* function_;
template<typename Function>
class fork_join_lambda : public fork_join_sub_task {
const Function *function_;
public:
explicit fork_join_lambda(const Function* function): function_{function} {};
explicit fork_join_lambda(const Function *function) : function_{function} {};
protected:
void execute_internal() override {
(*function_)(this);
}
};
};
class fork_join_task: public abstract_task {
class fork_join_task : public abstract_task {
friend class fork_join_sub_task;
fork_join_sub_task* root_task_;
fork_join_sub_task* currently_executing_;
data_structures::aligned_stack* my_stack_;
fork_join_sub_task *root_task_;
fork_join_sub_task *currently_executing_;
data_structures::aligned_stack *my_stack_;
// Double-Ended Queue management
data_structures::deque<fork_join_sub_task> deque_;
// Steal Management
fork_join_sub_task* last_stolen_;
fork_join_sub_task *last_stolen_;
fork_join_sub_task* get_local_sub_task();
fork_join_sub_task* get_stolen_sub_task();
fork_join_sub_task *get_local_sub_task();
fork_join_sub_task *get_stolen_sub_task();
bool internal_stealing(abstract_task* other_task) override;
bool split_task(base::spin_lock* /*lock*/) override;
bool internal_stealing(abstract_task *other_task) override;
bool split_task(base::spin_lock * /*lock*/) override;
public:
explicit fork_join_task(fork_join_sub_task* root_task, const abstract_task::id& id);
explicit fork_join_task(fork_join_sub_task *root_task, const abstract_task::id &id);
void execute() override;
fork_join_sub_task* currently_executing() const;
};
fork_join_sub_task *currently_executing() const;
};
template<typename T>
void fork_join_sub_task::spawn_child(const T& task) {
template<typename T>
void fork_join_sub_task::spawn_child(const T &task) {
PROFILE_FORK_JOIN_STEALING("spawn_child")
static_assert(std::is_base_of<fork_join_sub_task, T>::value, "Only pass fork_join_sub_task subclasses!");
T* new_task = tbb_task_->my_stack_->push(task);
T *new_task = tbb_task_->my_stack_->push(task);
spawn_child_internal(new_task);
}
}
}
}
}
}
}
#endif //PLS_TBB_LIKE_TASK_H
......@@ -10,20 +10,21 @@
#include "abstract_task.h"
namespace pls {
namespace internal {
namespace scheduling {
template<typename Function>
class root_task : public abstract_task {
namespace internal {
namespace scheduling {
template<typename Function>
class root_task : public abstract_task {
Function function_;
std::atomic_uint8_t finished_;
public:
static constexpr auto create_id = helpers::unique_id::create<root_task<Function>>;
explicit root_task(Function function):
explicit root_task(Function function) :
abstract_task{0, create_id()},
function_{function},
finished_{0} {}
root_task(const root_task& other):
root_task(const root_task &other) :
abstract_task{0, create_id()},
function_{other.function_},
finished_{0} {}
......@@ -38,23 +39,23 @@ namespace pls {
finished_ = 1;
}
bool internal_stealing(abstract_task* /*other_task*/) override {
bool internal_stealing(abstract_task * /*other_task*/) override {
return false;
}
bool split_task(base::spin_lock* /*lock*/) override {
bool split_task(base::spin_lock * /*lock*/) override {
return false;
}
};
};
template<typename Function>
class root_worker_task : public abstract_task {
root_task<Function>* master_task_;
template<typename Function>
class root_worker_task : public abstract_task {
root_task<Function> *master_task_;
public:
static constexpr auto create_id = root_task<Function>::create_id;
explicit root_worker_task(root_task<Function>* master_task):
explicit root_worker_task(root_task<Function> *master_task) :
abstract_task{0, create_id()},
master_task_{master_task} {}
......@@ -65,16 +66,17 @@ namespace pls {
} while (!master_task_->finished());
}
bool internal_stealing(abstract_task* /*other_task*/) override {
bool internal_stealing(abstract_task * /*other_task*/) override {
return false;
}
bool split_task(base::spin_lock* /*lock*/) override {
bool split_task(base::spin_lock * /*lock*/) override {
return false;
}
};
}
}
};
}
}
}
#endif //PLS_ROOT_MASTER_TASK_H
......@@ -12,12 +12,14 @@
#include "scheduler.h"
namespace pls {
namespace internal {
namespace scheduling {
template<typename Function>
class run_on_n_threads_task : public abstract_task {
namespace internal {
namespace scheduling {
template<typename Function>
class run_on_n_threads_task : public abstract_task {
template<typename F>
friend class run_on_n_threads_task_worker;
friend
class run_on_n_threads_task_worker;
Function function_;
......@@ -38,7 +40,7 @@ namespace pls {
public:
static constexpr auto create_id = helpers::unique_id::create<run_on_n_threads_task<Function>>;
run_on_n_threads_task(Function function, int num_threads):
run_on_n_threads_task(Function function, int num_threads) :
abstract_task{0, create_id()},
function_{function},
counter{num_threads - 1} {}
......@@ -55,21 +57,21 @@ namespace pls {
std::cout << "Finished Master!" << std::endl;
}
bool internal_stealing(abstract_task* /*other_task*/) override {
bool internal_stealing(abstract_task * /*other_task*/) override {
return false;
}
bool split_task(base::spin_lock* lock) override;
};
bool split_task(base::spin_lock *lock) override;
};
template<typename Function>
class run_on_n_threads_task_worker : public abstract_task {
template<typename Function>
class run_on_n_threads_task_worker : public abstract_task {
Function function_;
run_on_n_threads_task<Function>* root_;
run_on_n_threads_task<Function> *root_;
public:
static constexpr auto create_id = helpers::unique_id::create<run_on_n_threads_task_worker<Function>>;
run_on_n_threads_task_worker(Function function, run_on_n_threads_task<Function>* root):
run_on_n_threads_task_worker(Function function, run_on_n_threads_task<Function> *root) :
abstract_task{0, create_id()},
function_{function},
root_{root} {}
......@@ -83,17 +85,17 @@ namespace pls {
}
}
bool internal_stealing(abstract_task* /*other_task*/) override {
bool internal_stealing(abstract_task * /*other_task*/) override {
return false;
}
bool split_task(base::spin_lock* /*lock*/) override {
bool split_task(base::spin_lock * /*lock*/) override {
return false;
}
};
};
template<typename Function>
bool run_on_n_threads_task<Function>::split_task(base::spin_lock* lock) {
template<typename Function>
bool run_on_n_threads_task<Function>::split_task(base::spin_lock *lock) {
if (get_counter() <= 0) {
return false;
}
......@@ -105,14 +107,15 @@ namespace pls {
auto task = run_on_n_threads_task_worker<Function>{function_, this};
scheduler->execute_task(task, depth());
return true;
}
}
template<typename Function>
run_on_n_threads_task<Function> create_run_on_n_threads_task(Function function, int num_threads) {
template<typename Function>
run_on_n_threads_task<Function> create_run_on_n_threads_task(Function function, int num_threads) {
return run_on_n_threads_task<Function>{function, num_threads};
}
}
}
}
}
}
}
#endif //PLS_RUN_ON_N_THREADS_TASK_H
......@@ -17,21 +17,22 @@
#include "scheduler_memory.h"
namespace pls {
namespace internal {
namespace scheduling {
void worker_routine();
using scheduler_thread = base::thread<decltype(&worker_routine), thread_state>;
namespace internal {
namespace scheduling {
class scheduler {
void worker_routine();
using scheduler_thread = base::thread<decltype(&worker_routine), thread_state>;
class scheduler {
friend void worker_routine();
const unsigned int num_threads_;
scheduler_memory* memory_;
scheduler_memory *memory_;
base::barrier sync_barrier_;
bool terminated_;
public:
explicit scheduler(scheduler_memory* memory, unsigned int num_threads);
explicit scheduler(scheduler_memory *memory, unsigned int num_threads);
~scheduler();
/**
......@@ -50,17 +51,18 @@ namespace pls {
* @param depth Optional: depth of the new task, otherwise set implicitly.
*/
template<typename Task>
static void execute_task(Task& task, int depth=-1);
static void execute_task(Task &task, int depth = -1);
static abstract_task* current_task() { return base::this_thread::state<thread_state>()->current_task_; }
static abstract_task *current_task() { return base::this_thread::state<thread_state>()->current_task_; }
void terminate(bool wait_for_workers=true);
void terminate(bool wait_for_workers = true);
unsigned int num_threads() const { return num_threads_; }
thread_state* thread_state_for(size_t id) { return memory_->thread_state_for(id); }
};
}
}
thread_state *thread_state_for(size_t id) { return memory_->thread_state_for(id); }
};
}
}
}
#include "scheduler_impl.h"
......
......@@ -3,10 +3,11 @@
#define PLS_SCHEDULER_IMPL_H
namespace pls {
namespace internal {
namespace scheduling {
template<typename Function>
void scheduler::perform_work(Function work_section) {
namespace internal {
namespace scheduling {
template<typename Function>
void scheduler::perform_work(Function work_section) {
PROFILE_WORK_BLOCK("scheduler::perform_work")
root_task<Function> master{work_section};
......@@ -31,15 +32,15 @@ namespace pls {
root_worker_task<Function> worker{new_master};
memory_->task_stack_for(0)->pop<typeof(worker)>();
}
}
}
template<typename Task>
void scheduler::execute_task(Task& task, int depth) {
template<typename Task>
void scheduler::execute_task(Task &task, int depth) {
static_assert(std::is_base_of<abstract_task, Task>::value, "Only pass abstract_task subclasses!");
auto my_state = base::this_thread::state<thread_state>();
abstract_task* old_task;
abstract_task* new_task;
abstract_task *old_task;
abstract_task *new_task;
// Init Task
{
......@@ -64,9 +65,10 @@ namespace pls {
my_state->task_stack_->pop<Task>();
}
}
}
}
}
}
}
}
#endif //PLS_SCHEDULER_IMPL_H
......@@ -7,21 +7,22 @@
#define PLS_SCHEDULER_MEMORY_H
namespace pls {
namespace internal {
namespace scheduling {
void worker_routine();
using scheduler_thread = base::thread<decltype(&worker_routine), thread_state>;
namespace internal {
namespace scheduling {
class scheduler_memory {
void worker_routine();
using scheduler_thread = base::thread<decltype(&worker_routine), thread_state>;
class scheduler_memory {
public:
virtual size_t max_threads() const = 0;
virtual thread_state* thread_state_for(size_t id) = 0;
virtual scheduler_thread* thread_for(size_t id) = 0;
virtual data_structures::aligned_stack* task_stack_for(size_t id) = 0;
};
virtual thread_state *thread_state_for(size_t id) = 0;
virtual scheduler_thread *thread_for(size_t id) = 0;
virtual data_structures::aligned_stack *task_stack_for(size_t id) = 0;
};
template<size_t MAX_THREADS, size_t TASK_STACK_SIZE>
class static_scheduler_memory: public scheduler_memory {
template<size_t MAX_THREADS, size_t TASK_STACK_SIZE>
class static_scheduler_memory : public scheduler_memory {
// Everyone of these types has to live on its own cache line,
// as each thread uses one of them independently.
// Therefore it would be a major performance hit if we shared cache lines on these.
......@@ -38,17 +39,18 @@ namespace pls {
public:
static_scheduler_memory() {
for (size_t i = 0; i < MAX_THREADS; i++) {
new ((void*)task_stacks_[i].pointer()) data_structures::aligned_stack(task_stacks_memory_[i].pointer()->data(), TASK_STACK_SIZE);
new((void *) task_stacks_[i].pointer()) data_structures::aligned_stack(task_stacks_memory_[i].pointer()->data(),
TASK_STACK_SIZE);
}
}
size_t max_threads() const override { return MAX_THREADS; }
thread_state* thread_state_for(size_t id) override { return thread_states_[id].pointer(); }
scheduler_thread* thread_for(size_t id) override { return threads_[id].pointer(); }
data_structures::aligned_stack* task_stack_for(size_t id) override { return task_stacks_[id].pointer(); }
};
thread_state *thread_state_for(size_t id) override { return thread_states_[id].pointer(); }
scheduler_thread *thread_for(size_t id) override { return threads_[id].pointer(); }
data_structures::aligned_stack *task_stack_for(size_t id) override { return task_stacks_[id].pointer(); }
};
class malloc_scheduler_memory: public scheduler_memory {
class malloc_scheduler_memory : public scheduler_memory {
// Everyone of these types has to live on its own cache line,
// as each thread uses one of them independently.
// Therefore it would be a major performance hit if we shared cache lines on these.
......@@ -58,21 +60,22 @@ namespace pls {
const size_t num_threads_;
aligned_thread* threads_;
aligned_thread_state * thread_states_;
char** task_stacks_memory_;
aligned_aligned_stack * task_stacks_;
aligned_thread *threads_;
aligned_thread_state *thread_states_;
char **task_stacks_memory_;
aligned_aligned_stack *task_stacks_;
public:
explicit malloc_scheduler_memory(size_t num_threads, size_t memory_per_stack = 2 << 16);
~malloc_scheduler_memory();
size_t max_threads() const override { return num_threads_; }
thread_state* thread_state_for(size_t id) override { return thread_states_[id].pointer(); }
scheduler_thread* thread_for(size_t id) override { return threads_[id].pointer(); }
data_structures::aligned_stack* task_stack_for(size_t id) override { return task_stacks_[id].pointer(); }
};
}
}
thread_state *thread_state_for(size_t id) override { return thread_states_[id].pointer(); }
scheduler_thread *thread_for(size_t id) override { return threads_[id].pointer(); }
data_structures::aligned_stack *task_stack_for(size_t id) override { return task_stacks_[id].pointer(); }
};
}
}
}
#endif //PLS_SCHEDULER_MEMORY_H
......@@ -8,21 +8,22 @@
#include "abstract_task.h"
namespace pls {
namespace internal {
namespace scheduling {
// forward declaration
class scheduler;
struct thread_state {
scheduler* scheduler_;
abstract_task* root_task_;
abstract_task* current_task_;
data_structures::aligned_stack* task_stack_;
namespace internal {
namespace scheduling {
// forward declaration
class scheduler;
struct thread_state {
scheduler *scheduler_;
abstract_task *root_task_;
abstract_task *current_task_;
data_structures::aligned_stack *task_stack_;
size_t id_;
base::spin_lock lock_;
std::minstd_rand random_;
thread_state():
thread_state() :
scheduler_{nullptr},
root_task_{nullptr},
current_task_{nullptr},
......@@ -30,16 +31,17 @@ namespace pls {
id_{0},
random_{id_} {};
thread_state(scheduler* scheduler, data_structures::aligned_stack* task_stack, unsigned int id):
thread_state(scheduler *scheduler, data_structures::aligned_stack *task_stack, unsigned int id) :
scheduler_{scheduler},
root_task_{nullptr},
current_task_{nullptr},
task_stack_{task_stack},
id_{id},
random_{id_} {}
};
}
}
};
}
}
}
#endif //PLS_THREAD_STATE_H
......@@ -8,18 +8,20 @@
#include "pls/internal/helpers/unique_id.h"
namespace pls {
using internal::scheduling::static_scheduler_memory;
using internal::scheduling::malloc_scheduler_memory;
using internal::scheduling::scheduler;
using task_id = internal::scheduling::abstract_task::id;
using internal::scheduling::static_scheduler_memory;
using internal::scheduling::malloc_scheduler_memory;
using unique_id = internal::helpers::unique_id;
using internal::scheduling::scheduler;
using task_id = internal::scheduling::abstract_task::id;
using internal::scheduling::fork_join_sub_task;
using internal::scheduling::fork_join_task;
using unique_id = internal::helpers::unique_id;
using internal::scheduling::fork_join_sub_task;
using internal::scheduling::fork_join_task;
using algorithm::invoke_parallel;
using algorithm::invoke_parallel;
}
#endif
......@@ -2,26 +2,28 @@
#include "pls/internal/base/system_details.h"
namespace pls {
namespace internal {
namespace base {
namespace alignment {
void* allocate_aligned(size_t size) {
namespace internal {
namespace base {
namespace alignment {
void *allocate_aligned(size_t size) {
return aligned_alloc(system_details::CACHE_LINE_SIZE, size);
}
}
std::uintptr_t next_alignment(std::uintptr_t size) {
std::uintptr_t next_alignment(std::uintptr_t size) {
std::uintptr_t miss_alignment = size % base::system_details::CACHE_LINE_SIZE;
if (miss_alignment == 0) {
return size;
} else {
return size + (base::system_details::CACHE_LINE_SIZE - miss_alignment);
}
}
}
char* next_alignment(char* pointer) {
return reinterpret_cast<char*>(next_alignment(reinterpret_cast<std::uintptr_t >(pointer)));
}
}
}
}
char *next_alignment(char *pointer) {
return reinterpret_cast<char *>(next_alignment(reinterpret_cast<std::uintptr_t >(pointer)));
}
}
}
}
}
#include "pls/internal/base/barrier.h"
namespace pls {
namespace internal {
namespace base {
barrier::barrier(const unsigned int count): barrier_{} {
namespace internal {
namespace base {
barrier::barrier(const unsigned int count) : barrier_{} {
pthread_barrier_init(&barrier_, nullptr, count);
}
}
barrier::~barrier() {
barrier::~barrier() {
pthread_barrier_destroy(&barrier_);
}
}
void barrier::wait() {
void barrier::wait() {
pthread_barrier_wait(&barrier_);
}
}
}
}
}
}
}
......@@ -2,9 +2,10 @@
#include "pls/internal/base/tas_spin_lock.h"
namespace pls {
namespace internal {
namespace base {
void tas_spin_lock::lock() {
namespace internal {
namespace base {
void tas_spin_lock::lock() {
PROFILE_LOCK("Acquire Lock")
int tries = 0;
while (flag_.test_and_set(std::memory_order_acquire)) {
......@@ -13,9 +14,9 @@ namespace pls {
this_thread::yield();
}
}
}
}
bool tas_spin_lock::try_lock(unsigned int num_tries) {
bool tas_spin_lock::try_lock(unsigned int num_tries) {
PROFILE_LOCK("Try Acquire Lock")
while (flag_.test_and_set(std::memory_order_acquire)) {
num_tries--;
......@@ -24,11 +25,12 @@ namespace pls {
}
}
return true;
}
}
void tas_spin_lock::unlock() {
void tas_spin_lock::unlock() {
flag_.clear(std::memory_order_release);
}
}
}
}
}
}
}
#include "pls/internal/base/thread.h"
namespace pls {
namespace internal {
namespace base {
namespace internal {
namespace base {
#ifdef PLS_THREAD_SPECIFIC_PTHREAD
pthread_key_t this_thread::local_storage_key_ = false;
bool this_thread::local_storage_key_initialized_;
pthread_key_t this_thread::local_storage_key_ = false;
bool this_thread::local_storage_key_initialized_;
#endif
#ifdef PLS_THREAD_SPECIFIC_COMPILER
__thread void* this_thread::local_state_;
__thread void *this_thread::local_state_;
#endif
// implementation in header (C++ templating)
}
}
// implementation in header (C++ templating)
}
}
}
......@@ -2,9 +2,10 @@
#include "pls/internal/base/ttas_spin_lock.h"
namespace pls {
namespace internal {
namespace base {
void ttas_spin_lock::lock() {
namespace internal {
namespace base {
void ttas_spin_lock::lock() {
PROFILE_LOCK("Acquire Lock")
int tries = 0;
int expected = 0;
......@@ -19,9 +20,9 @@ namespace pls {
expected = 0;
} while (!flag_.compare_exchange_weak(expected, 1, std::memory_order_acquire));
}
}
bool ttas_spin_lock::try_lock(unsigned int num_tries) {
bool ttas_spin_lock::try_lock(unsigned int num_tries) {
PROFILE_LOCK("Try Acquire Lock")
int expected = 0;
......@@ -37,11 +38,12 @@ namespace pls {
} while (!flag_.compare_exchange_weak(expected, 1, std::memory_order_acquire));
return true;
}
}
void ttas_spin_lock::unlock() {
void ttas_spin_lock::unlock() {
flag_.store(0, std::memory_order_release);
}
}
}
}
}
}
}
......@@ -2,12 +2,14 @@
#include "pls/internal/base/system_details.h"
namespace pls {
namespace internal {
namespace data_structures {
aligned_stack::aligned_stack(char* memory_region, const std::size_t size):
namespace internal {
namespace data_structures {
aligned_stack::aligned_stack(char *memory_region, const std::size_t size) :
memory_start_{memory_region},
memory_end_{memory_region + size},
head_{base::alignment::next_alignment(memory_start_)} {}
}
}
}
}
}
......@@ -3,16 +3,17 @@
#include "pls/internal/data_structures/deque.h"
namespace pls {
namespace internal {
namespace data_structures {
deque_item* deque_internal::pop_head_internal() {
namespace internal {
namespace data_structures {
deque_item *deque_internal::pop_head_internal() {
std::lock_guard<base::spin_lock> lock{lock_};
if (head_ == nullptr) {
return nullptr;
}
deque_item* result = head_;
deque_item *result = head_;
head_ = head_->prev_;
if (head_ == nullptr) {
tail_ = nullptr;
......@@ -21,16 +22,16 @@ namespace pls {
}
return result;
}
}
deque_item* deque_internal::pop_tail_internal() {
deque_item *deque_internal::pop_tail_internal() {
std::lock_guard<base::spin_lock> lock{lock_};
if (tail_ == nullptr) {
return nullptr;
}
deque_item* result = tail_;
deque_item *result = tail_;
tail_ = tail_->next_;
if (tail_ == nullptr) {
head_ = nullptr;
......@@ -39,9 +40,9 @@ namespace pls {
}
return result;
}
}
void deque_internal::push_tail_internal(deque_item *new_item) {
void deque_internal::push_tail_internal(deque_item *new_item) {
std::lock_guard<base::spin_lock> lock{lock_};
if (tail_ != nullptr) {
......@@ -52,7 +53,8 @@ namespace pls {
new_item->next_ = tail_;
new_item->prev_ = nullptr;
tail_ = new_item;
}
}
}
}
}
}
}
......@@ -5,9 +5,10 @@
#include "pls/internal/scheduling/scheduler.h"
namespace pls {
namespace internal {
namespace scheduling {
bool abstract_task::steal_work() {
namespace internal {
namespace scheduling {
bool abstract_task::steal_work() {
PROFILE_STEALING("abstract_task::steal_work")
const auto my_state = base::this_thread::state<thread_state>();
const auto my_scheduler = my_state->scheduler_;
......@@ -27,7 +28,7 @@ namespace pls {
// Dig down to our level
PROFILE_STEALING("Go to our level")
abstract_task* current_task = target_state->root_task_;
abstract_task *current_task = target_state->root_task_;
while (current_task != nullptr && current_task->depth() < depth()) {
current_task = current_task->child_task_;
}
......@@ -70,7 +71,8 @@ namespace pls {
// internal steal was no success
return false;
};
}
}
}
}
}
}
......@@ -4,23 +4,24 @@
#include "pls/internal/scheduling/fork_join_task.h"
namespace pls {
namespace internal {
namespace scheduling {
fork_join_sub_task::fork_join_sub_task():
namespace internal {
namespace scheduling {
fork_join_sub_task::fork_join_sub_task() :
data_structures::deque_item{},
ref_count_{0},
parent_{nullptr},
tbb_task_{nullptr},
stack_state_{nullptr} {}
fork_join_sub_task::fork_join_sub_task(const fork_join_sub_task& other):
fork_join_sub_task::fork_join_sub_task(const fork_join_sub_task &other) :
data_structures::deque_item(other),
ref_count_{0},
parent_{nullptr},
tbb_task_{nullptr},
stack_state_{nullptr} {}
void fork_join_sub_task::execute() {
void fork_join_sub_task::execute() {
PROFILE_WORK_BLOCK("execute sub_task")
tbb_task_->currently_executing_ = this;
execute_internal();
......@@ -31,9 +32,9 @@ namespace pls {
if (parent_ != nullptr) {
parent_->ref_count_--;
}
}
}
void fork_join_sub_task::spawn_child_internal(fork_join_sub_task* sub_task) {
void fork_join_sub_task::spawn_child_internal(fork_join_sub_task *sub_task) {
// Keep our refcount up to date
ref_count_++;
......@@ -43,12 +44,12 @@ namespace pls {
sub_task->stack_state_ = tbb_task_->my_stack_->save_state();
tbb_task_->deque_.push_tail(sub_task);
}
}
void fork_join_sub_task::wait_for_all() {
void fork_join_sub_task::wait_for_all() {
while (ref_count_ > 0) {
PROFILE_STEALING("get local sub task")
fork_join_sub_task* local_task = tbb_task_->get_local_sub_task();
fork_join_sub_task *local_task = tbb_task_->get_local_sub_task();
PROFILE_END_BLOCK
if (local_task != nullptr) {
local_task->execute();
......@@ -64,19 +65,19 @@ namespace pls {
}
}
tbb_task_->my_stack_->reset_state(stack_state_);
}
}
fork_join_sub_task* fork_join_task::get_local_sub_task() {
fork_join_sub_task *fork_join_task::get_local_sub_task() {
return deque_.pop_tail();
}
}
fork_join_sub_task* fork_join_task::get_stolen_sub_task() {
fork_join_sub_task *fork_join_task::get_stolen_sub_task() {
return deque_.pop_head();
}
}
bool fork_join_task::internal_stealing(abstract_task* other_task) {
bool fork_join_task::internal_stealing(abstract_task *other_task) {
PROFILE_STEALING("fork_join_task::internal_stealin")
auto cast_other_task = reinterpret_cast<fork_join_task*>(other_task);
auto cast_other_task = reinterpret_cast<fork_join_task *>(other_task);
auto stolen_sub_task = cast_other_task->get_stolen_sub_task();
if (stolen_sub_task == nullptr) {
......@@ -90,11 +91,11 @@ namespace pls {
return true;
}
}
}
bool fork_join_task::split_task(base::spin_lock* lock) {
bool fork_join_task::split_task(base::spin_lock *lock) {
PROFILE_STEALING("fork_join_task::split_task")
fork_join_sub_task* stolen_sub_task = get_stolen_sub_task();
fork_join_sub_task *stolen_sub_task = get_stolen_sub_task();
if (stolen_sub_task == nullptr) {
return false;
}
......@@ -106,9 +107,9 @@ namespace pls {
scheduler::execute_task(task, depth());
return true;
}
}
void fork_join_task::execute() {
void fork_join_task::execute() {
PROFILE_WORK_BLOCK("execute fork_join_task");
// Bind this instance to our OS thread
......@@ -118,17 +119,18 @@ namespace pls {
// Execute it on our OS thread until its finished
root_task_->execute();
}
}
fork_join_sub_task* fork_join_task::currently_executing() const { return currently_executing_; }
fork_join_sub_task *fork_join_task::currently_executing() const { return currently_executing_; }
fork_join_task::fork_join_task(fork_join_sub_task* root_task, const abstract_task::id& id):
fork_join_task::fork_join_task(fork_join_sub_task *root_task, const abstract_task::id &id) :
abstract_task{0, id},
root_task_{root_task},
currently_executing_{nullptr},
my_stack_{nullptr},
deque_{},
last_stolen_{nullptr} {};
}
}
last_stolen_{nullptr} {}
}
}
}
#include "pls/internal/scheduling/root_task.h"
namespace pls {
namespace internal {
namespace scheduling {
namespace internal {
namespace scheduling {
}
}
}
}
}
#include "pls/internal/scheduling/run_on_n_threads_task.h"
namespace pls {
namespace internal {
namespace scheduling {
namespace internal {
namespace scheduling {
}
}
}
}
}
......@@ -2,9 +2,10 @@
#include "pls/internal/base/error_handling.h"
namespace pls {
namespace internal {
namespace scheduling {
scheduler::scheduler(scheduler_memory* memory, const unsigned int num_threads):
namespace internal {
namespace scheduling {
scheduler::scheduler(scheduler_memory *memory, const unsigned int num_threads) :
num_threads_{num_threads},
memory_{memory},
sync_barrier_{num_threads + 1},
......@@ -15,16 +16,17 @@ namespace pls {
for (unsigned int i = 0; i < num_threads_; i++) {
// Placement new is required, as the memory of `memory_` is not required to be initialized.
new((void*)memory_->thread_state_for(i)) thread_state{this, memory_->task_stack_for(i), i};
new ((void*)memory_->thread_for(i))base::thread<void(*)(), thread_state>(&worker_routine, memory_->thread_state_for(i));
}
new((void *) memory_->thread_state_for(i)) thread_state{this, memory_->task_stack_for(i), i};
new((void *) memory_->thread_for(i))base::thread<void (*)(), thread_state>(&worker_routine,
memory_->thread_state_for(i));
}
}
scheduler::~scheduler() {
scheduler::~scheduler() {
terminate();
}
}
void worker_routine() {
void worker_routine() {
auto my_state = base::this_thread::state<thread_state>();
while (true) {
......@@ -40,9 +42,9 @@ namespace pls {
my_state->scheduler_->sync_barrier_.wait();
}
}
}
void scheduler::terminate(bool wait_for_workers) {
void scheduler::terminate(bool wait_for_workers) {
if (terminated_) {
return;
}
......@@ -55,7 +57,8 @@ namespace pls {
memory_->thread_for(i)->join();
}
}
}
}
}
}
}
}
}
#include "pls/internal/scheduling/scheduler_memory.h"
namespace pls {
namespace internal {
namespace scheduling {
malloc_scheduler_memory::malloc_scheduler_memory(const size_t num_threads, const size_t memory_per_stack):
namespace internal {
namespace scheduling {
malloc_scheduler_memory::malloc_scheduler_memory(const size_t num_threads, const size_t memory_per_stack) :
num_threads_{num_threads} {
threads_ = reinterpret_cast<aligned_thread *>(base::alignment::allocate_aligned(num_threads * sizeof(aligned_thread)));
thread_states_ = reinterpret_cast<aligned_thread_state *>(base::alignment::allocate_aligned(num_threads * sizeof(aligned_thread_state)));
threads_ =
reinterpret_cast<aligned_thread *>(base::alignment::allocate_aligned(num_threads * sizeof(aligned_thread)));
thread_states_ = reinterpret_cast<aligned_thread_state *>(base::alignment::allocate_aligned(
num_threads * sizeof(aligned_thread_state)));
task_stacks_ = reinterpret_cast<aligned_aligned_stack *>(base::alignment::allocate_aligned(num_threads * sizeof(aligned_aligned_stack)));
task_stacks_memory_ = reinterpret_cast<char**>(base::alignment::allocate_aligned(num_threads * sizeof(char*)));
task_stacks_ = reinterpret_cast<aligned_aligned_stack *>(base::alignment::allocate_aligned(
num_threads * sizeof(aligned_aligned_stack)));
task_stacks_memory_ = reinterpret_cast<char **>(base::alignment::allocate_aligned(num_threads * sizeof(char *)));
for (size_t i = 0; i < num_threads_; i++) {
task_stacks_memory_[i] = reinterpret_cast<char*>(base::alignment::allocate_aligned(memory_per_stack));
new ((void*)task_stacks_[i].pointer()) data_structures::aligned_stack(task_stacks_memory_[i], memory_per_stack);
}
task_stacks_memory_[i] = reinterpret_cast<char *>(base::alignment::allocate_aligned(memory_per_stack));
new((void *) task_stacks_[i].pointer()) data_structures::aligned_stack(task_stacks_memory_[i], memory_per_stack);
}
}
malloc_scheduler_memory::~malloc_scheduler_memory() {
malloc_scheduler_memory::~malloc_scheduler_memory() {
free(threads_);
free(thread_states_);
......@@ -25,7 +29,8 @@ namespace pls {
}
free(task_stacks_);
free(task_stacks_memory_);
}
}
}
}
}
}
}
#include "pls/internal/scheduling/thread_state.h"
namespace pls {
namespace internal {
namespace scheduling {
namespace internal {
namespace scheduling {
}
}
}
}
}
......@@ -13,7 +13,7 @@ static bool base_tests_visited;
static int base_tests_local_value_one;
static vector<int> base_tests_local_value_two;
TEST_CASE( "thread creation and joining", "[internal/data_structures/thread.h]") {
TEST_CASE("thread creation and joining", "[internal/data_structures/thread.h]") {
base_tests_visited = false;
auto t1 = start_thread([]() { base_tests_visited = true; });
t1.join();
......@@ -21,7 +21,7 @@ TEST_CASE( "thread creation and joining", "[internal/data_structures/thread.h]")
REQUIRE(base_tests_visited);
}
TEST_CASE( "thread state", "[internal/data_structures/thread.h]") {
TEST_CASE("thread state", "[internal/data_structures/thread.h]") {
int state_one = 1;
vector<int> state_two{1, 2};
......@@ -36,12 +36,12 @@ TEST_CASE( "thread state", "[internal/data_structures/thread.h]") {
int base_tests_shared_counter;
TEST_CASE( "spinlock protects concurrent counter", "[internal/data_structures/spinlock.h]") {
TEST_CASE("spinlock protects concurrent counter", "[internal/data_structures/spinlock.h]") {
constexpr int num_iterations = 1000000;
base_tests_shared_counter = 0;
spin_lock lock{};
SECTION( "lock can be used by itself" ) {
SECTION("lock can be used by itself") {
auto t1 = start_thread([&]() {
for (int i = 0; i < num_iterations; i++) {
lock.lock();
......@@ -63,7 +63,7 @@ TEST_CASE( "spinlock protects concurrent counter", "[internal/data_structures/sp
REQUIRE(base_tests_shared_counter == 0);
}
SECTION( "lock can be used with std::lock_guard" ) {
SECTION("lock can be used with std::lock_guard") {
auto t1 = start_thread([&]() {
for (int i = 0; i < num_iterations; i++) {
std::lock_guard<spin_lock> my_lock{lock};
......
......@@ -12,13 +12,12 @@ using namespace pls::internal::data_structures;
using namespace pls::internal::base;
using namespace std;
TEST_CASE( "aligned stack stores objects correctly", "[internal/data_structures/aligned_stack.h]") {
TEST_CASE("aligned stack stores objects correctly", "[internal/data_structures/aligned_stack.h]") {
constexpr long data_size = 1024;
char data[data_size];
aligned_stack stack{data, data_size};
SECTION( "stack correctly pushes sub linesize objects" ) {
SECTION("stack correctly pushes sub linesize objects") {
std::array<char, 5> small_data_one{'a', 'b', 'c', 'd', 'e'};
std::array<char, 64> small_data_two{};
std::array<char, 1> small_data_three{'A'};
......@@ -32,7 +31,7 @@ TEST_CASE( "aligned stack stores objects correctly", "[internal/data_structures/
REQUIRE(reinterpret_cast<std::uintptr_t>(pointer_three) % system_details::CACHE_LINE_SIZE == 0);
}
SECTION( "stack correctly pushes above linesize objects" ) {
SECTION("stack correctly pushes above linesize objects") {
std::array<char, 5> small_data_one{'a', 'b', 'c', 'd', 'e'};
std::array<char, system_details::CACHE_LINE_SIZE + 10> big_data_one{};
......@@ -43,7 +42,7 @@ TEST_CASE( "aligned stack stores objects correctly", "[internal/data_structures/
REQUIRE(reinterpret_cast<std::uintptr_t>(small_pointer_one) % system_details::CACHE_LINE_SIZE == 0);
}
SECTION( "stack correctly stores and retrieves objects" ) {
SECTION("stack correctly stores and retrieves objects") {
std::array<char, 5> data_one{'a', 'b', 'c', 'd', 'e'};
stack.push(data_one);
......@@ -52,7 +51,7 @@ TEST_CASE( "aligned stack stores objects correctly", "[internal/data_structures/
REQUIRE(retrieved_data == std::array<char, 5>{'a', 'b', 'c', 'd', 'e'});
}
SECTION( "stack can push and pop multiple times with correct alignment" ) {
SECTION("stack can push and pop multiple times with correct alignment") {
std::array<char, 5> small_data_one{'a', 'b', 'c', 'd', 'e'};
std::array<char, 64> small_data_two{};
std::array<char, 1> small_data_three{'A'};
......@@ -76,15 +75,15 @@ TEST_CASE( "aligned stack stores objects correctly", "[internal/data_structures/
}
}
TEST_CASE( "deque stores objects correctly", "[internal/data_structures/deque.h]") {
class my_item: public deque_item {
TEST_CASE("deque stores objects correctly", "[internal/data_structures/deque.h]") {
class my_item : public deque_item {
};
deque<my_item> deque;
my_item one, two, three;
SECTION( "add and remove items form the tail" ) {
SECTION("add and remove items form the tail") {
deque.push_tail(&one);
deque.push_tail(&two);
deque.push_tail(&three);
......@@ -94,7 +93,7 @@ TEST_CASE( "deque stores objects correctly", "[internal/data_structures/deque.h]
REQUIRE(deque.pop_tail() == &one);
}
SECTION( "handles getting empty by popping the tail correctly" ) {
SECTION("handles getting empty by popping the tail correctly") {
deque.push_tail(&one);
REQUIRE(deque.pop_tail() == &one);
......@@ -102,7 +101,7 @@ TEST_CASE( "deque stores objects correctly", "[internal/data_structures/deque.h]
REQUIRE(deque.pop_tail() == &two);
}
SECTION( "remove items form the head" ) {
SECTION("remove items form the head") {
deque.push_tail(&one);
deque.push_tail(&two);
deque.push_tail(&three);
......@@ -112,7 +111,7 @@ TEST_CASE( "deque stores objects correctly", "[internal/data_structures/deque.h]
REQUIRE(deque.pop_head() == &three);
}
SECTION( "handles getting empty by popping the head correctly" ) {
SECTION("handles getting empty by popping the head correctly") {
deque.push_tail(&one);
REQUIRE(deque.pop_head() == &one);
......@@ -120,7 +119,7 @@ TEST_CASE( "deque stores objects correctly", "[internal/data_structures/deque.h]
REQUIRE(deque.pop_head() == &two);
}
SECTION( "handles getting empty by popping the head and tail correctly" ) {
SECTION("handles getting empty by popping the head and tail correctly") {
deque.push_tail(&one);
REQUIRE(deque.pop_tail() == &one);
......
......@@ -4,11 +4,11 @@
using namespace pls;
class once_sub_task: public fork_join_sub_task {
std::atomic<int>* counter_;
class once_sub_task : public fork_join_sub_task {
std::atomic<int> *counter_;
int children_;
protected:
protected:
void execute_internal() override {
(*counter_)++;
for (int i = 0; i < children_; i++) {
......@@ -16,38 +16,37 @@ protected:
}
}
public:
explicit once_sub_task(std::atomic<int>* counter, int children):
public:
explicit once_sub_task(std::atomic<int> *counter, int children) :
fork_join_sub_task(),
counter_{counter},
children_{children} {}
};
class force_steal_sub_task: public fork_join_sub_task {
std::atomic<int>* parent_counter_;
std::atomic<int>* overall_counter_;
class force_steal_sub_task : public fork_join_sub_task {
std::atomic<int> *parent_counter_;
std::atomic<int> *overall_counter_;
protected:
protected:
void execute_internal() override {
(*overall_counter_)--;
if (overall_counter_->load() > 0) {
std::atomic<int> counter{1};
spawn_child(force_steal_sub_task(&counter, overall_counter_));
while (counter.load() > 0)
; // Spin...
while (counter.load() > 0); // Spin...
}
(*parent_counter_)--;
}
public:
explicit force_steal_sub_task(std::atomic<int>* parent_counter, std::atomic<int>* overall_counter):
public:
explicit force_steal_sub_task(std::atomic<int> *parent_counter, std::atomic<int> *overall_counter) :
fork_join_sub_task(),
parent_counter_{parent_counter},
overall_counter_{overall_counter} {}
};
TEST_CASE( "tbb task are scheduled correctly", "[internal/scheduling/fork_join_task.h]") {
TEST_CASE("tbb task are scheduled correctly", "[internal/scheduling/fork_join_task.h]") {
malloc_scheduler_memory my_scheduler_memory{8, 2 << 12};
SECTION("tasks are executed exactly once") {
......@@ -56,7 +55,7 @@ TEST_CASE( "tbb task are scheduled correctly", "[internal/scheduling/fork_join_t
int total_tasks = 1 + 4 + 4 * 3 + 4 * 3 * 2 + 4 * 3 * 2 * 1;
std::atomic<int> counter{0};
my_scheduler.perform_work([&] (){
my_scheduler.perform_work([&]() {
once_sub_task sub_task{&counter, start_counter};
fork_join_task task{&sub_task, unique_id::create(42)};
scheduler::execute_task(task);
......@@ -68,7 +67,7 @@ TEST_CASE( "tbb task are scheduled correctly", "[internal/scheduling/fork_join_t
SECTION("tasks can be stolen") {
scheduler my_scheduler{&my_scheduler_memory, 8};
my_scheduler.perform_work([&] (){
my_scheduler.perform_work([&]() {
std::atomic<int> dummy_parent{1}, overall_counter{8};
force_steal_sub_task sub_task{&dummy_parent, &overall_counter};
fork_join_task task{&sub_task, unique_id::create(42)};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment