Commit e2e34b02 by FritzFlorian

Restructure and comment on base abstartions.

We do this to properly separate the cache alginment logic in the next step, allowing us to port all cache aligned objects without worrying about portability.
parent 310c33d2
Pipeline #1143 failed with stages
in 39 seconds
...@@ -30,7 +30,7 @@ void combine(complex_vector::iterator data, int n) { ...@@ -30,7 +30,7 @@ void combine(complex_vector::iterator data, int n) {
std::complex<double> odd = data[i + n / 2]; std::complex<double> odd = data[i + n / 2];
// w is the "twiddle-factor". // w is the "twiddle-factor".
// this could be cached, but we run the same 'base' algorithm parallel/serial, // this could be cached, but we run the same 'data_structures' algorithm parallel/serial,
// so it won't impact the performance comparison. // so it won't impact the performance comparison.
std::complex<double> w = exp(std::complex<double>(0, -2. * M_PI * i / n)); std::complex<double> w = exp(std::complex<double>(0, -2. * M_PI * i / n));
......
...@@ -3,69 +3,19 @@ ...@@ -3,69 +3,19 @@
#include <functional> #include <functional>
#include <array> #include <array>
#include <atomic> #include <atomic>
#include <memory>
#include <pls/pls.h> #include <pls/pls.h>
#include <pls/internal/helpers/prohibit_new.h> #include <pls/internal/helpers/prohibit_new.h>
#include <pls/internal/scheduling/thread_state.h>
using namespace pls; using namespace pls;
// Example for static memory allocation (no malloc or free required)
static static_scheduler_memory<8, 2 << 12> my_scheduler_memory;
class fib: public fork_join_sub_task {
static constexpr int CUTOFF = 20;
int num_;
int* result_;
public:
fib(int num, int* result): num_{num}, result_{result} {}
private:
static int fib_serial(int num) {
if (num == 0) {
return 0;
}
if (num == 1) {
return 1;
}
return fib_serial(num - 1) + fib_serial(num - 2);
}
protected:
void execute_internal() override {
if (num_ <= CUTOFF) {
*result_ = fib_serial(num_);
return;
}
int left_result;
int right_result;
spawn_child(fib{num_ - 1, &left_result});
spawn_child(fib{num_ - 2, &right_result});
wait_for_all();
*result_ = left_result + right_result;
}
};
int main() { int main() {
scheduler my_scheduler{&my_scheduler_memory, 4}; using aligned_state = std::aligned_storage<sizeof(internal::scheduling::thread_state), 64>::type;
aligned_state data;
auto start = std::chrono::high_resolution_clock::now();
my_scheduler.perform_work([] (){
int result;
fib fib_sub_task{45, &result};
fork_join_task tbb_task{&fib_sub_task, task_id{1}};
scheduler::execute_task(tbb_task);
std::cout << "Result: " << result << std::endl; std::cout << sizeof(aligned_state) << std::endl;
}); malloc_scheduler_memory sched_memory{8};
auto end = std::chrono::high_resolution_clock::now(); std::cout << (std::uintptr_t)sched_memory.thread_for(0) % 64 << ", " << (std::uintptr_t)sched_memory.thread_for(1) % 64 << ", " << (std::uintptr_t)sched_memory.thread_for(2) % 64 << ", " << std::endl;
long time = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
std::cout << "Startup time in us: " << time << std::endl;
} }
#include <pls/internal/base/thread.h> #include <pls/internal/base/thread.h>
#include <pls/internal/helpers/prohibit_new.h> #include <pls/internal/helpers/prohibit_new.h>
using namespace pls::internal::base; using namespace pls::internal::data_structures;
int global = 0; int global = 0;
......
# List all required files here (cmake best practice to NOT automate this step!) # List all required files here (cmake best practice to NOT automate this step!)
add_library(pls STATIC add_library(pls STATIC
src/pls.cpp include/pls/pls.h include/pls/pls.h src/pls.cpp
src/internal/base/spin_lock.cpp include/pls/internal/base/spin_lock.h
src/internal/base/thread.cpp include/pls/internal/base/thread.h include/pls/algorithms/invoke_parallel.h src/algorithms/invoke_parallel.cpp
include/pls/internal/helpers/prohibit_new.h
src/internal/scheduling/abstract_task.cpp include/pls/internal/scheduling/abstract_task.h include/pls/internal/base/spin_lock.h src/internal/base/spin_lock.cpp
src/internal/scheduling/scheduler.cpp include/pls/internal/scheduling/scheduler.h include/pls/internal/base/thread.h src/internal/base/thread.cpp
src/internal/scheduling/thread_state.cpp include/pls/internal/scheduling/thread_state.h include/pls/internal/base/barrier.h src/internal/base/barrier.cpp
src/internal/base/barrier.cpp include/pls/internal/base/barrier.h
src/internal/scheduling/root_task.cpp include/pls/internal/scheduling/root_task.h
src/internal/base/aligned_stack.cpp include/pls/internal/base/aligned_stack.h
include/pls/internal/base/system_details.h include/pls/internal/base/system_details.h
src/internal/scheduling/run_on_n_threads_task.cpp include/pls/internal/scheduling/run_on_n_threads_task.h
src/internal/scheduling/fork_join_task.cpp include/pls/internal/scheduling/fork_join_task.h
src/internal/base/deque.cpp include/pls/internal/base/deque.h
src/algorithms/invoke_parallel.cpp include/pls/algorithms/invoke_parallel.h
include/pls/internal/base/error_handling.h include/pls/internal/base/error_handling.h
include/pls/internal/scheduling/scheduler_memory.h src/internal/scheduling/scheduler_memory.cpp
include/pls/internal/data_structures/aligned_stack.h src/internal/data_structures/aligned_stack.cpp
include/pls/internal/data_structures/deque.h src/internal/data_structures/deque.cpp
include/pls/internal/helpers/prohibit_new.h
include/pls/internal/helpers/profiler.h include/pls/internal/helpers/profiler.h
include/pls/internal/helpers/mini_benchmark.h) include/pls/internal/helpers/mini_benchmark.h
include/pls/internal/scheduling/root_task.h src/internal/scheduling/root_task.cpp
include/pls/internal/scheduling/thread_state.h src/internal/scheduling/thread_state.cpp
include/pls/internal/scheduling/abstract_task.h src/internal/scheduling/abstract_task.cpp
include/pls/internal/scheduling/scheduler.h src/internal/scheduling/scheduler.cpp
include/pls/internal/scheduling/run_on_n_threads_task.h src/internal/scheduling/run_on_n_threads_task.cpp
include/pls/internal/scheduling/fork_join_task.h src/internal/scheduling/fork_join_task.cpp
include/pls/internal/scheduling/scheduler_memory.h src/internal/scheduling/scheduler_memory.cpp
)
# Add everything in `./include` to be in the include path of this project # Add everything in `./include` to be in the include path of this project
target_include_directories(pls target_include_directories(pls
......
...@@ -7,21 +7,22 @@ ...@@ -7,21 +7,22 @@
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace base { namespace base {
/**
* Provides standard barrier behaviour.
* `count` threads have to call `wait()` before any of the `wait()` calls returns,
* thus blocking all threads until everyone reached the barrier.
*
* PORTABILITY:
* Current implementation is based on pthreads.
*/
class barrier { class barrier {
pthread_barrier_t barrier_; pthread_barrier_t barrier_;
public: public:
explicit barrier(const unsigned int count): barrier_{} { explicit barrier(unsigned int count);
pthread_barrier_init(&barrier_, nullptr, count); ~barrier();
}
~barrier() {
pthread_barrier_destroy(&barrier_);
}
void wait() { void wait();
pthread_barrier_wait(&barrier_);
}
}; };
} }
} }
......
...@@ -4,7 +4,12 @@ ...@@ -4,7 +4,12 @@
#include <iostream> #include <iostream>
// TODO: Figure out proper exception handling /**
* Called when there is an non-recoverable error/invariant in the scheduler.
* This SHOULD NOT HAPPEN AT ANY POINT in production, any instance of this is a bug!
* The implementation can be changed if for example no iostream is available on a system
* (or its inclusion adds too much overhead).
*/
#define PLS_ERROR(msg) std::cout << msg << std::endl; exit(1); #define PLS_ERROR(msg) std::cout << msg << std::endl; exit(1);
#endif //PLS_ERROR_HANDLING_H #endif //PLS_ERROR_HANDLING_H
...@@ -10,6 +10,12 @@ ...@@ -10,6 +10,12 @@
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace base { namespace base {
/**
* A simple set and test_and_set based spin lock implementation.
*
* PORTABILITY:
* Current implementation is based on C++ 11 atomic_flag.
*/
class spin_lock { class spin_lock {
std::atomic_flag flag_; std::atomic_flag flag_;
int yield_at_tries_; int yield_at_tries_;
......
...@@ -7,6 +7,12 @@ ...@@ -7,6 +7,12 @@
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace base { namespace base {
/**
* Collection of system details, e.g. hardware cache line size.
*
* PORTABILITY:
* Currently sane default values for x86.
*/
constexpr std::uintptr_t CACHE_LINE_SIZE = 64; constexpr std::uintptr_t CACHE_LINE_SIZE = 64;
} }
} }
......
...@@ -15,6 +15,16 @@ namespace pls { ...@@ -15,6 +15,16 @@ namespace pls {
namespace base { namespace base {
using thread_entrypoint = void(); using thread_entrypoint = void();
/**
* Static methods than can be performed on the current thread.
*
* usage:
* this_thread::yield();
* T* state = this_thread::state<T>();
*
* PORTABILITY:
* Current implementation is based on pthreads.
*/
class this_thread { class this_thread {
template<typename Function, typename State> template<typename Function, typename State>
friend class thread; friend class thread;
...@@ -51,6 +61,22 @@ namespace pls { ...@@ -51,6 +61,22 @@ namespace pls {
} }
}; };
/**
* Abstraction for starting a function in a sparate thread.
*
* @tparam Function Lambda being started on the new thread.
* @tparam State State type held for this thread.
*
* usage:
* T* state;
* auto thread = start_thread([] {
* // Run on new thread
* }, state);
* thread.join(); // Wait for it to finish
*
* PORTABILITY:
* Current implementation is based on pthreads.
*/
template<typename Function, typename State> template<typename Function, typename State>
class thread { class thread {
friend class this_thread; friend class this_thread;
......
...@@ -9,7 +9,19 @@ ...@@ -9,7 +9,19 @@
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace base { namespace data_structures {
/**
* Generic stack-like data structure that allows to allocate arbitrary objects in a given memory region.
* The objects will be stored aligned in the stack, making the storage cache friendly and very fast
* (as long as one can live with the stack restrictions).
*
* IMPORTANT: Does not call destructors on stored objects! Do not allocate resources in the objects!
*
* Usage:
* aligned_stack stack{pointer_to_memory, size_of_memory};
* T* pointer = stack.push(some_object); // Copy-Constrict the object on top of stack
* stack.pop<T>(); // Deconstruct the top object of type T
*/
class aligned_stack { class aligned_stack {
// Keep bounds of our memory block // Keep bounds of our memory block
char* memory_start_; char* memory_start_;
...@@ -24,15 +36,11 @@ namespace pls { ...@@ -24,15 +36,11 @@ namespace pls {
typedef char* state; typedef char* state;
aligned_stack(): memory_start_{nullptr}, memory_end_{nullptr}, head_{nullptr} {}; aligned_stack(): memory_start_{nullptr}, memory_end_{nullptr}, head_{nullptr} {};
aligned_stack(char* memory_region, const std::size_t size);
aligned_stack(char* memory_region, const std::size_t size):
memory_start_{memory_region},
memory_end_{memory_region + size},
head_{next_alignment(memory_start_)} {}
template<typename T> template<typename T>
T* push(const T& object) { T* push(const T& object) {
// Placement new into desired memory location // Copy-Construct
return new ((void*)push<T>())T(object); return new ((void*)push<T>())T(object);
} }
...@@ -52,7 +60,6 @@ namespace pls { ...@@ -52,7 +60,6 @@ namespace pls {
template<typename T> template<typename T>
T pop() { T pop() {
head_ = head_ - next_alignment(sizeof(T)); head_ = head_ - next_alignment(sizeof(T));
return *reinterpret_cast<T*>(head_); return *reinterpret_cast<T*>(head_);
} }
......
...@@ -2,11 +2,14 @@ ...@@ -2,11 +2,14 @@
#ifndef PLS_DEQUE_H #ifndef PLS_DEQUE_H
#define PLS_DEQUE_H #define PLS_DEQUE_H
#include "spin_lock.h" #include "pls/internal/base/spin_lock.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace base { namespace data_structures {
/**
* Turns any object into deque item when inheriting from this.
*/
class deque_item { class deque_item {
friend class deque_internal; friend class deque_internal;
...@@ -20,13 +23,19 @@ namespace pls { ...@@ -20,13 +23,19 @@ namespace pls {
deque_item* head_; deque_item* head_;
deque_item* tail_; deque_item* tail_;
spin_lock lock_; base::spin_lock lock_;
deque_item* pop_head_internal(); deque_item* pop_head_internal();
deque_item* pop_tail_internal(); deque_item* pop_tail_internal();
void push_tail_internal(deque_item *new_item); void push_tail_internal(deque_item *new_item);
}; };
/**
* A double linked list based deque.
* Storage is therefore only needed for the individual items.
*
* @tparam Item The type of items stored in this deque
*/
template<typename Item> template<typename Item>
class deque: deque_internal { class deque: deque_internal {
public: public:
......
...@@ -4,8 +4,8 @@ ...@@ -4,8 +4,8 @@
#include "pls/internal/helpers/profiler.h" #include "pls/internal/helpers/profiler.h"
#include "pls/internal/base/aligned_stack.h" #include "pls/internal/data_structures/aligned_stack.h"
#include "pls/internal/base/deque.h" #include "pls/internal/data_structures/deque.h"
#include "abstract_task.h" #include "abstract_task.h"
#include "thread_state.h" #include "thread_state.h"
...@@ -14,7 +14,7 @@ namespace pls { ...@@ -14,7 +14,7 @@ namespace pls {
namespace internal { namespace internal {
namespace scheduling { namespace scheduling {
class fork_join_task; class fork_join_task;
class fork_join_sub_task: public base::deque_item { class fork_join_sub_task: public data_structures::deque_item {
friend class fork_join_task; friend class fork_join_task;
// Coordinate finishing of sub_tasks // Coordinate finishing of sub_tasks
...@@ -25,7 +25,7 @@ namespace pls { ...@@ -25,7 +25,7 @@ namespace pls {
fork_join_task* tbb_task_; fork_join_task* tbb_task_;
// Stack Management (reset stack pointer after wait_for_all() calls) // Stack Management (reset stack pointer after wait_for_all() calls)
base::aligned_stack::state stack_state_; data_structures::aligned_stack::state stack_state_;
protected: protected:
explicit fork_join_sub_task(); explicit fork_join_sub_task();
fork_join_sub_task(const fork_join_sub_task& other); fork_join_sub_task(const fork_join_sub_task& other);
...@@ -62,10 +62,10 @@ namespace pls { ...@@ -62,10 +62,10 @@ namespace pls {
fork_join_sub_task* root_task_; fork_join_sub_task* root_task_;
fork_join_sub_task* currently_executing_; fork_join_sub_task* currently_executing_;
base::aligned_stack* my_stack_; data_structures::aligned_stack* my_stack_;
// Double-Ended Queue management // Double-Ended Queue management
base::deque<fork_join_sub_task> deque_; data_structures::deque<fork_join_sub_task> deque_;
// Steal Management // Steal Management
fork_join_sub_task* last_stolen_; fork_join_sub_task* last_stolen_;
......
...@@ -7,7 +7,8 @@ ...@@ -7,7 +7,8 @@
#include "pls/internal/helpers/profiler.h" #include "pls/internal/helpers/profiler.h"
#include "pls/internal/base/aligned_stack.h" #include "pls/internal/data_structures/aligned_stack.h"
#include "pls/internal/base/thread.h" #include "pls/internal/base/thread.h"
#include "pls/internal/base/barrier.h" #include "pls/internal/base/barrier.h"
......
#include "pls/internal/base/aligned_stack.h" #include "pls/internal/data_structures/aligned_stack.h"
#include "pls/internal/base/thread.h" #include "pls/internal/base/thread.h"
#include "thread_state.h" #include "thread_state.h"
...@@ -17,7 +17,7 @@ namespace pls { ...@@ -17,7 +17,7 @@ namespace pls {
virtual size_t max_threads() = 0; virtual size_t max_threads() = 0;
virtual thread_state* thread_state_for(size_t id) = 0; virtual thread_state* thread_state_for(size_t id) = 0;
virtual scheduler_thread* thread_for(size_t id) = 0; virtual scheduler_thread* thread_for(size_t id) = 0;
virtual base::aligned_stack* task_stack_for(size_t id) = 0; virtual data_structures::aligned_stack* task_stack_for(size_t id) = 0;
}; };
template<size_t MAX_THREADS, size_t TASK_STACK_SIZE> template<size_t MAX_THREADS, size_t TASK_STACK_SIZE>
...@@ -25,28 +25,28 @@ namespace pls { ...@@ -25,28 +25,28 @@ namespace pls {
std::array<scheduler_thread, MAX_THREADS> threads_; std::array<scheduler_thread, MAX_THREADS> threads_;
std::array<thread_state, MAX_THREADS> thread_states_; std::array<thread_state, MAX_THREADS> thread_states_;
std::array<std::array<char, TASK_STACK_SIZE>, MAX_THREADS> task_stacks_memory_; std::array<std::array<char, TASK_STACK_SIZE>, MAX_THREADS> task_stacks_memory_;
std::array<base::aligned_stack, MAX_THREADS> task_stacks_; std::array<data_structures::aligned_stack, MAX_THREADS> task_stacks_;
public: public:
static_scheduler_memory() { static_scheduler_memory() {
for (size_t i = 0; i < MAX_THREADS; i++) { for (size_t i = 0; i < MAX_THREADS; i++) {
task_stacks_[i] = base::aligned_stack(task_stacks_memory_[i].data(), TASK_STACK_SIZE); task_stacks_[i] = data_structures::aligned_stack(task_stacks_memory_[i].data(), TASK_STACK_SIZE);
} }
} }
size_t max_threads() override { return MAX_THREADS; } size_t max_threads() override { return MAX_THREADS; }
thread_state* thread_state_for(size_t id) override { return &thread_states_[id]; } thread_state* thread_state_for(size_t id) override { return &thread_states_[id]; }
scheduler_thread* thread_for(size_t id) override { return &threads_[id]; } scheduler_thread* thread_for(size_t id) override { return &threads_[id]; }
base::aligned_stack* task_stack_for(size_t id) override { return &task_stacks_[id]; } data_structures::aligned_stack* task_stack_for(size_t id) override { return &task_stacks_[id]; }
}; };
class malloc_scheduler_memory: public scheduler_memory { class malloc_scheduler_memory: public scheduler_memory {
size_t num_threads_; size_t num_threads_;
scheduler_thread* threads_; alignas(64) scheduler_thread* threads_;
thread_state* thread_states_; thread_state* thread_states_;
char** task_stacks_memory_; char** task_stacks_memory_;
base::aligned_stack* task_stacks_; data_structures::aligned_stack* task_stacks_;
public: public:
explicit malloc_scheduler_memory(size_t num_threads, size_t memory_per_stack = 2 << 16); explicit malloc_scheduler_memory(size_t num_threads, size_t memory_per_stack = 2 << 16);
~malloc_scheduler_memory(); ~malloc_scheduler_memory();
...@@ -54,7 +54,7 @@ namespace pls { ...@@ -54,7 +54,7 @@ namespace pls {
size_t max_threads() override { return num_threads_; } size_t max_threads() override { return num_threads_; }
thread_state* thread_state_for(size_t id) override { return &thread_states_[id]; } thread_state* thread_state_for(size_t id) override { return &thread_states_[id]; }
scheduler_thread* thread_for(size_t id) override { return &threads_[id]; } scheduler_thread* thread_for(size_t id) override { return &threads_[id]; }
base::aligned_stack* task_stack_for(size_t id) override { return &task_stacks_[id]; } data_structures::aligned_stack* task_stack_for(size_t id) override { return &task_stacks_[id]; }
}; };
} }
} }
......
...@@ -4,10 +4,9 @@ ...@@ -4,10 +4,9 @@
#include <random> #include <random>
#include "pls/internal/data_structures/aligned_stack.h"
#include "abstract_task.h" #include "abstract_task.h"
#include "pls/internal/base/aligned_stack.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace scheduling { namespace scheduling {
...@@ -18,7 +17,7 @@ namespace pls { ...@@ -18,7 +17,7 @@ namespace pls {
scheduler* scheduler_; scheduler* scheduler_;
abstract_task* root_task_; abstract_task* root_task_;
abstract_task* current_task_; abstract_task* current_task_;
base::aligned_stack* task_stack_; data_structures::aligned_stack* task_stack_;
size_t id_; size_t id_;
base::spin_lock lock_; base::spin_lock lock_;
std::minstd_rand random_; std::minstd_rand random_;
...@@ -31,7 +30,7 @@ namespace pls { ...@@ -31,7 +30,7 @@ namespace pls {
id_{0}, id_{0},
random_{id_} {}; random_{id_} {};
thread_state(scheduler* scheduler, base::aligned_stack* task_stack, unsigned int id): thread_state(scheduler* scheduler, data_structures::aligned_stack* task_stack, unsigned int id):
scheduler_{scheduler}, scheduler_{scheduler},
root_task_{nullptr}, root_task_{nullptr},
current_task_{nullptr}, current_task_{nullptr},
......
...@@ -3,7 +3,17 @@ ...@@ -3,7 +3,17 @@
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace base { namespace base {
barrier::barrier(const unsigned int count): barrier_{} {
pthread_barrier_init(&barrier_, nullptr, count);
}
barrier::~barrier() {
pthread_barrier_destroy(&barrier_);
}
void barrier::wait() {
pthread_barrier_wait(&barrier_);
}
} }
} }
} }
#include "pls/internal/base/aligned_stack.h" #include "pls/internal/data_structures/aligned_stack.h"
#include "pls/internal/base/system_details.h" #include "pls/internal/base/system_details.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace base { namespace data_structures {
aligned_stack::aligned_stack(char* memory_region, const std::size_t size):
memory_start_{memory_region},
memory_end_{memory_region + size},
head_{next_alignment(memory_start_)} {}
std::uintptr_t aligned_stack::next_alignment(std::uintptr_t size) { std::uintptr_t aligned_stack::next_alignment(std::uintptr_t size) {
std::uintptr_t miss_alignment = size % CACHE_LINE_SIZE; std::uintptr_t miss_alignment = size % base::CACHE_LINE_SIZE;
if (miss_alignment == 0) { if (miss_alignment == 0) {
return size; return size;
} else { } else {
return size + (CACHE_LINE_SIZE - miss_alignment); return size + (base::CACHE_LINE_SIZE - miss_alignment);
} }
} }
......
#include <mutex> #include <mutex>
#include "pls/internal/base/deque.h" #include "pls/internal/data_structures/deque.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace base { namespace data_structures {
deque_item* deque_internal::pop_head_internal() { deque_item* deque_internal::pop_head_internal() {
std::lock_guard<spin_lock> lock{lock_}; std::lock_guard<base::spin_lock> lock{lock_};
if (head_ == nullptr) { if (head_ == nullptr) {
return nullptr; return nullptr;
...@@ -24,7 +24,7 @@ namespace pls { ...@@ -24,7 +24,7 @@ namespace pls {
} }
deque_item* deque_internal::pop_tail_internal() { deque_item* deque_internal::pop_tail_internal() {
std::lock_guard<spin_lock> lock{lock_}; std::lock_guard<base::spin_lock> lock{lock_};
if (tail_ == nullptr) { if (tail_ == nullptr) {
return nullptr; return nullptr;
...@@ -42,7 +42,7 @@ namespace pls { ...@@ -42,7 +42,7 @@ namespace pls {
} }
void deque_internal::push_tail_internal(deque_item *new_item) { void deque_internal::push_tail_internal(deque_item *new_item) {
std::lock_guard<spin_lock> lock{lock_}; std::lock_guard<base::spin_lock> lock{lock_};
if (tail_ != nullptr) { if (tail_ != nullptr) {
tail_->prev_ = new_item; tail_->prev_ = new_item;
......
...@@ -7,14 +7,14 @@ namespace pls { ...@@ -7,14 +7,14 @@ namespace pls {
namespace internal { namespace internal {
namespace scheduling { namespace scheduling {
fork_join_sub_task::fork_join_sub_task(): fork_join_sub_task::fork_join_sub_task():
base::deque_item{}, data_structures::deque_item{},
ref_count_{0}, ref_count_{0},
parent_{nullptr}, parent_{nullptr},
tbb_task_{nullptr}, tbb_task_{nullptr},
stack_state_{nullptr} {} stack_state_{nullptr} {}
fork_join_sub_task::fork_join_sub_task(const fork_join_sub_task& other): fork_join_sub_task::fork_join_sub_task(const fork_join_sub_task& other):
base::deque_item(other), data_structures::deque_item(other),
ref_count_{0}, ref_count_{0},
parent_{nullptr}, parent_{nullptr},
tbb_task_{nullptr}, tbb_task_{nullptr},
......
...@@ -8,11 +8,11 @@ namespace pls { ...@@ -8,11 +8,11 @@ namespace pls {
threads_ = reinterpret_cast<scheduler_thread*>(malloc(num_threads * sizeof(scheduler_thread))); threads_ = reinterpret_cast<scheduler_thread*>(malloc(num_threads * sizeof(scheduler_thread)));
thread_states_ = reinterpret_cast<thread_state*>(malloc(num_threads * sizeof(thread_state))); thread_states_ = reinterpret_cast<thread_state*>(malloc(num_threads * sizeof(thread_state)));
task_stacks_ = reinterpret_cast<base::aligned_stack*>(malloc(num_threads * sizeof(base::aligned_stack))); task_stacks_ = reinterpret_cast<data_structures::aligned_stack*>(malloc(num_threads * sizeof(data_structures::aligned_stack)));
task_stacks_memory_ = reinterpret_cast<char**>(malloc(num_threads * sizeof(char*))); task_stacks_memory_ = reinterpret_cast<char**>(malloc(num_threads * sizeof(char*)));
for (size_t i = 0; i < num_threads_; i++) { for (size_t i = 0; i < num_threads_; i++) {
task_stacks_memory_[i] = reinterpret_cast<char*>(malloc(memory_per_stack)); task_stacks_memory_[i] = reinterpret_cast<char*>(malloc(memory_per_stack));
task_stacks_[i] = base::aligned_stack(task_stacks_memory_[i], memory_per_stack); task_stacks_[i] = data_structures::aligned_stack(task_stacks_memory_[i], memory_per_stack);
} }
} }
......
add_executable(tests add_executable(tests
main.cpp main.cpp
base_tests.cpp scheduling_tests.cpp) base_tests.cpp scheduling_tests.cpp data_structures_test.cpp)
target_link_libraries(tests catch2 pls) target_link_libraries(tests catch2 pls)
#include <catch.hpp> #include <catch.hpp>
#include <pls/internal/base/thread.h> #include <pls/internal/base/thread.h>
#include <pls/internal/base/spin_lock.h> #include <pls/internal/base/spin_lock.h>
#include <pls/internal/base/aligned_stack.h>
#include <pls/internal/base/system_details.h> #include <pls/internal/base/system_details.h>
#include <vector> #include <vector>
#include <mutex> #include <mutex>
#include <pls/internal/base/deque.h>
using namespace pls::internal::base; using namespace pls::internal::base;
using namespace std; using namespace std;
...@@ -15,7 +13,7 @@ static bool base_tests_visited; ...@@ -15,7 +13,7 @@ static bool base_tests_visited;
static int base_tests_local_value_one; static int base_tests_local_value_one;
static vector<int> base_tests_local_value_two; static vector<int> base_tests_local_value_two;
TEST_CASE( "thread creation and joining", "[internal/base/thread.h]") { TEST_CASE( "thread creation and joining", "[internal/data_structures/thread.h]") {
base_tests_visited = false; base_tests_visited = false;
auto t1 = start_thread([]() { base_tests_visited = true; }); auto t1 = start_thread([]() { base_tests_visited = true; });
t1.join(); t1.join();
...@@ -23,7 +21,7 @@ TEST_CASE( "thread creation and joining", "[internal/base/thread.h]") { ...@@ -23,7 +21,7 @@ TEST_CASE( "thread creation and joining", "[internal/base/thread.h]") {
REQUIRE(base_tests_visited); REQUIRE(base_tests_visited);
} }
TEST_CASE( "thread state", "[internal/base/thread.h]") { TEST_CASE( "thread state", "[internal/data_structures/thread.h]") {
int state_one = 1; int state_one = 1;
vector<int> state_two{1, 2}; vector<int> state_two{1, 2};
...@@ -38,7 +36,7 @@ TEST_CASE( "thread state", "[internal/base/thread.h]") { ...@@ -38,7 +36,7 @@ TEST_CASE( "thread state", "[internal/base/thread.h]") {
int base_tests_shared_counter; int base_tests_shared_counter;
TEST_CASE( "spinlock protects concurrent counter", "[internal/base/spinlock.h]") { TEST_CASE( "spinlock protects concurrent counter", "[internal/data_structures/spinlock.h]") {
constexpr int num_iterations = 1000000; constexpr int num_iterations = 1000000;
base_tests_shared_counter = 0; base_tests_shared_counter = 0;
spin_lock lock{}; spin_lock lock{};
...@@ -85,122 +83,3 @@ TEST_CASE( "spinlock protects concurrent counter", "[internal/base/spinlock.h]") ...@@ -85,122 +83,3 @@ TEST_CASE( "spinlock protects concurrent counter", "[internal/base/spinlock.h]")
REQUIRE(base_tests_shared_counter == 0); REQUIRE(base_tests_shared_counter == 0);
} }
} }
TEST_CASE( "aligned stack stores objects correctly", "[internal/base/aligned_stack.h]") {
constexpr long data_size = 1024;
char data[data_size];
aligned_stack stack{data, data_size};
SECTION( "stack correctly pushes sub linesize objects" ) {
std::array<char, 5> small_data_one{'a', 'b', 'c', 'd', 'e'};
std::array<char, 64> small_data_two{};
std::array<char, 1> small_data_three{'A'};
auto pointer_one = stack.push(small_data_one);
auto pointer_two = stack.push(small_data_two);
auto pointer_three = stack.push(small_data_three);
REQUIRE(reinterpret_cast<std::uintptr_t>(pointer_one) % CACHE_LINE_SIZE == 0);
REQUIRE(reinterpret_cast<std::uintptr_t>(pointer_two) % CACHE_LINE_SIZE == 0);
REQUIRE(reinterpret_cast<std::uintptr_t>(pointer_three) % CACHE_LINE_SIZE == 0);
}
SECTION( "stack correctly pushes above linesize objects" ) {
std::array<char, 5> small_data_one{'a', 'b', 'c', 'd', 'e'};
std::array<char, CACHE_LINE_SIZE + 10> big_data_one{};
auto big_pointer_one = stack.push(big_data_one);
auto small_pointer_one = stack.push(small_data_one);
REQUIRE(reinterpret_cast<std::uintptr_t>(big_pointer_one) % CACHE_LINE_SIZE == 0);
REQUIRE(reinterpret_cast<std::uintptr_t>(small_pointer_one) % CACHE_LINE_SIZE == 0);
}
SECTION( "stack correctly stores and retrieves objects" ) {
std::array<char, 5> data_one{'a', 'b', 'c', 'd', 'e'};
stack.push(data_one);
auto retrieved_data = stack.pop<std::array<char, 5>>();
REQUIRE(retrieved_data == std::array<char, 5>{'a', 'b', 'c', 'd', 'e'});
}
SECTION( "stack can push and pop multiple times with correct alignment" ) {
std::array<char, 5> small_data_one{'a', 'b', 'c', 'd', 'e'};
std::array<char, 64> small_data_two{};
std::array<char, 1> small_data_three{'A'};
auto pointer_one = stack.push(small_data_one);
auto pointer_two = stack.push(small_data_two);
auto pointer_three = stack.push(small_data_three);
stack.pop<typeof(small_data_three)>();
stack.pop<typeof(small_data_two)>();
auto pointer_four = stack.push(small_data_two);
auto pointer_five = stack.push(small_data_three);
REQUIRE(reinterpret_cast<std::uintptr_t>(pointer_one) % CACHE_LINE_SIZE == 0);
REQUIRE(reinterpret_cast<std::uintptr_t>(pointer_two) % CACHE_LINE_SIZE == 0);
REQUIRE(reinterpret_cast<std::uintptr_t>(pointer_three) % CACHE_LINE_SIZE == 0);
REQUIRE(reinterpret_cast<std::uintptr_t>(pointer_four) % CACHE_LINE_SIZE == 0);
REQUIRE(reinterpret_cast<std::uintptr_t>(pointer_five) % CACHE_LINE_SIZE == 0);
REQUIRE(pointer_four == pointer_two);
REQUIRE(pointer_five == pointer_three);
}
}
TEST_CASE( "deque stores objects correctly", "[internal/base/deque.h]") {
class my_item: public deque_item {
};
deque<my_item> deque;
my_item one, two, three;
SECTION( "add and remove items form the tail" ) {
deque.push_tail(&one);
deque.push_tail(&two);
deque.push_tail(&three);
REQUIRE(deque.pop_tail() == &three);
REQUIRE(deque.pop_tail() == &two);
REQUIRE(deque.pop_tail() == &one);
}
SECTION( "handles getting empty by popping the tail correctly" ) {
deque.push_tail(&one);
REQUIRE(deque.pop_tail() == &one);
deque.push_tail(&two);
REQUIRE(deque.pop_tail() == &two);
}
SECTION( "remove items form the head" ) {
deque.push_tail(&one);
deque.push_tail(&two);
deque.push_tail(&three);
REQUIRE(deque.pop_head() == &one);
REQUIRE(deque.pop_head() == &two);
REQUIRE(deque.pop_head() == &three);
}
SECTION( "handles getting empty by popping the head correctly" ) {
deque.push_tail(&one);
REQUIRE(deque.pop_head() == &one);
deque.push_tail(&two);
REQUIRE(deque.pop_head() == &two);
}
SECTION( "handles getting empty by popping the head and tail correctly" ) {
deque.push_tail(&one);
REQUIRE(deque.pop_tail() == &one);
deque.push_tail(&two);
REQUIRE(deque.pop_head() == &two);
deque.push_tail(&three);
REQUIRE(deque.pop_tail() == &three);
}
}
#include <catch.hpp>
#include <pls/internal/base/system_details.h>
#include <pls/internal/data_structures/aligned_stack.h>
#include <pls/internal/data_structures/deque.h>
#include <vector>
#include <mutex>
using namespace pls::internal::data_structures;
using namespace pls::internal::base;
using namespace std;
TEST_CASE( "aligned stack stores objects correctly", "[internal/data_structures/aligned_stack.h]") {
constexpr long data_size = 1024;
char data[data_size];
aligned_stack stack{data, data_size};
SECTION( "stack correctly pushes sub linesize objects" ) {
std::array<char, 5> small_data_one{'a', 'b', 'c', 'd', 'e'};
std::array<char, 64> small_data_two{};
std::array<char, 1> small_data_three{'A'};
auto pointer_one = stack.push(small_data_one);
auto pointer_two = stack.push(small_data_two);
auto pointer_three = stack.push(small_data_three);
REQUIRE(reinterpret_cast<std::uintptr_t>(pointer_one) % CACHE_LINE_SIZE == 0);
REQUIRE(reinterpret_cast<std::uintptr_t>(pointer_two) % CACHE_LINE_SIZE == 0);
REQUIRE(reinterpret_cast<std::uintptr_t>(pointer_three) % CACHE_LINE_SIZE == 0);
}
SECTION( "stack correctly pushes above linesize objects" ) {
std::array<char, 5> small_data_one{'a', 'b', 'c', 'd', 'e'};
std::array<char, CACHE_LINE_SIZE + 10> big_data_one{};
auto big_pointer_one = stack.push(big_data_one);
auto small_pointer_one = stack.push(small_data_one);
REQUIRE(reinterpret_cast<std::uintptr_t>(big_pointer_one) % CACHE_LINE_SIZE == 0);
REQUIRE(reinterpret_cast<std::uintptr_t>(small_pointer_one) % CACHE_LINE_SIZE == 0);
}
SECTION( "stack correctly stores and retrieves objects" ) {
std::array<char, 5> data_one{'a', 'b', 'c', 'd', 'e'};
stack.push(data_one);
auto retrieved_data = stack.pop<std::array<char, 5>>();
REQUIRE(retrieved_data == std::array<char, 5>{'a', 'b', 'c', 'd', 'e'});
}
SECTION( "stack can push and pop multiple times with correct alignment" ) {
std::array<char, 5> small_data_one{'a', 'b', 'c', 'd', 'e'};
std::array<char, 64> small_data_two{};
std::array<char, 1> small_data_three{'A'};
auto pointer_one = stack.push(small_data_one);
auto pointer_two = stack.push(small_data_two);
auto pointer_three = stack.push(small_data_three);
stack.pop<typeof(small_data_three)>();
stack.pop<typeof(small_data_two)>();
auto pointer_four = stack.push(small_data_two);
auto pointer_five = stack.push(small_data_three);
REQUIRE(reinterpret_cast<std::uintptr_t>(pointer_one) % CACHE_LINE_SIZE == 0);
REQUIRE(reinterpret_cast<std::uintptr_t>(pointer_two) % CACHE_LINE_SIZE == 0);
REQUIRE(reinterpret_cast<std::uintptr_t>(pointer_three) % CACHE_LINE_SIZE == 0);
REQUIRE(reinterpret_cast<std::uintptr_t>(pointer_four) % CACHE_LINE_SIZE == 0);
REQUIRE(reinterpret_cast<std::uintptr_t>(pointer_five) % CACHE_LINE_SIZE == 0);
REQUIRE(pointer_four == pointer_two);
REQUIRE(pointer_five == pointer_three);
}
}
TEST_CASE( "deque stores objects correctly", "[internal/data_structures/deque.h]") {
class my_item: public deque_item {
};
deque<my_item> deque;
my_item one, two, three;
SECTION( "add and remove items form the tail" ) {
deque.push_tail(&one);
deque.push_tail(&two);
deque.push_tail(&three);
REQUIRE(deque.pop_tail() == &three);
REQUIRE(deque.pop_tail() == &two);
REQUIRE(deque.pop_tail() == &one);
}
SECTION( "handles getting empty by popping the tail correctly" ) {
deque.push_tail(&one);
REQUIRE(deque.pop_tail() == &one);
deque.push_tail(&two);
REQUIRE(deque.pop_tail() == &two);
}
SECTION( "remove items form the head" ) {
deque.push_tail(&one);
deque.push_tail(&two);
deque.push_tail(&three);
REQUIRE(deque.pop_head() == &one);
REQUIRE(deque.pop_head() == &two);
REQUIRE(deque.pop_head() == &three);
}
SECTION( "handles getting empty by popping the head correctly" ) {
deque.push_tail(&one);
REQUIRE(deque.pop_head() == &one);
deque.push_tail(&two);
REQUIRE(deque.pop_head() == &two);
}
SECTION( "handles getting empty by popping the head and tail correctly" ) {
deque.push_tail(&one);
REQUIRE(deque.pop_tail() == &one);
deque.push_tail(&two);
REQUIRE(deque.pop_head() == &two);
deque.push_tail(&three);
REQUIRE(deque.pop_tail() == &three);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment