From e2e34b02e8b0409aacf4078888ff17567689b317 Mon Sep 17 00:00:00 2001 From: FritzFlorian Date: Tue, 9 Apr 2019 12:39:57 +0200 Subject: [PATCH] Restructure and comment on base abstartions. We do this to properly separate the cache alginment logic in the next step, allowing us to port all cache aligned objects without worrying about portability. --- app/benchmark_fft/main.cpp | 2 +- app/playground/main.cpp | 64 +++++++--------------------------------------------------------- app/test_for_new/main.cpp | 2 +- lib/pls/CMakeLists.txt | 44 +++++++++++++++++++++++++------------------- lib/pls/include/pls/internal/base/aligned_stack.h | 71 ----------------------------------------------------------------------- lib/pls/include/pls/internal/base/barrier.h | 21 +++++++++++---------- lib/pls/include/pls/internal/base/deque.h | 51 --------------------------------------------------- lib/pls/include/pls/internal/base/error_handling.h | 7 ++++++- lib/pls/include/pls/internal/base/spin_lock.h | 6 ++++++ lib/pls/include/pls/internal/base/system_details.h | 6 ++++++ lib/pls/include/pls/internal/base/thread.h | 26 ++++++++++++++++++++++++++ lib/pls/include/pls/internal/data_structures/aligned_stack.h | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/pls/include/pls/internal/data_structures/deque.h | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/pls/include/pls/internal/scheduling/fork_join_task.h | 12 ++++++------ lib/pls/include/pls/internal/scheduling/scheduler.h | 3 ++- lib/pls/include/pls/internal/scheduling/scheduler_memory.h | 16 ++++++++-------- lib/pls/include/pls/internal/scheduling/thread_state.h | 7 +++---- lib/pls/src/internal/base/aligned_stack.cpp | 21 --------------------- lib/pls/src/internal/base/barrier.cpp | 10 ++++++++++ lib/pls/src/internal/base/deque.cpp | 58 ---------------------------------------------------------- lib/pls/src/internal/data_structures/aligned_stack.cpp | 26 ++++++++++++++++++++++++++ lib/pls/src/internal/data_structures/deque.cpp | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/pls/src/internal/scheduling/fork_join_task.cpp | 4 ++-- lib/pls/src/internal/scheduling/scheduler_memory.cpp | 4 ++-- test/CMakeLists.txt | 2 +- test/base_tests.cpp | 127 +++---------------------------------------------------------------------------------------------------------------------------- test/data_structures_test.cpp | 133 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 27 files changed, 481 insertions(+), 438 deletions(-) delete mode 100644 lib/pls/include/pls/internal/base/aligned_stack.h delete mode 100644 lib/pls/include/pls/internal/base/deque.h create mode 100644 lib/pls/include/pls/internal/data_structures/aligned_stack.h create mode 100644 lib/pls/include/pls/internal/data_structures/deque.h delete mode 100644 lib/pls/src/internal/base/aligned_stack.cpp delete mode 100644 lib/pls/src/internal/base/deque.cpp create mode 100644 lib/pls/src/internal/data_structures/aligned_stack.cpp create mode 100644 lib/pls/src/internal/data_structures/deque.cpp create mode 100644 test/data_structures_test.cpp diff --git a/app/benchmark_fft/main.cpp b/app/benchmark_fft/main.cpp index e5f3453..0b0f505 100644 --- a/app/benchmark_fft/main.cpp +++ b/app/benchmark_fft/main.cpp @@ -30,7 +30,7 @@ void combine(complex_vector::iterator data, int n) { std::complex odd = data[i + n / 2]; // w is the "twiddle-factor". - // this could be cached, but we run the same 'base' algorithm parallel/serial, + // this could be cached, but we run the same 'data_structures' algorithm parallel/serial, // so it won't impact the performance comparison. std::complex w = exp(std::complex(0, -2. * M_PI * i / n)); diff --git a/app/playground/main.cpp b/app/playground/main.cpp index 56c0c8e..5851538 100644 --- a/app/playground/main.cpp +++ b/app/playground/main.cpp @@ -3,69 +3,19 @@ #include #include #include +#include #include #include +#include using namespace pls; -// Example for static memory allocation (no malloc or free required) -static static_scheduler_memory<8, 2 << 12> my_scheduler_memory; - -class fib: public fork_join_sub_task { - static constexpr int CUTOFF = 20; - - int num_; - int* result_; - -public: - fib(int num, int* result): num_{num}, result_{result} {} - -private: - static int fib_serial(int num) { - if (num == 0) { - return 0; - } - if (num == 1) { - return 1; - } - - return fib_serial(num - 1) + fib_serial(num - 2); - } - -protected: - void execute_internal() override { - if (num_ <= CUTOFF) { - *result_ = fib_serial(num_); - return; - } - - int left_result; - int right_result; - - spawn_child(fib{num_ - 1, &left_result}); - spawn_child(fib{num_ - 2, &right_result}); - - wait_for_all(); - *result_ = left_result + right_result; - } -}; - - int main() { - scheduler my_scheduler{&my_scheduler_memory, 4}; - - auto start = std::chrono::high_resolution_clock::now(); - my_scheduler.perform_work([] (){ - int result; - - fib fib_sub_task{45, &result}; - fork_join_task tbb_task{&fib_sub_task, task_id{1}}; - scheduler::execute_task(tbb_task); + using aligned_state = std::aligned_storage::type; + aligned_state data; - std::cout << "Result: " << result << std::endl; - }); - auto end = std::chrono::high_resolution_clock::now(); - long time = std::chrono::duration_cast(end - start).count(); - std::cout << "Startup time in us: " << time << std::endl; + std::cout << sizeof(aligned_state) << std::endl; + malloc_scheduler_memory sched_memory{8}; + std::cout << (std::uintptr_t)sched_memory.thread_for(0) % 64 << ", " << (std::uintptr_t)sched_memory.thread_for(1) % 64 << ", " << (std::uintptr_t)sched_memory.thread_for(2) % 64 << ", " << std::endl; } diff --git a/app/test_for_new/main.cpp b/app/test_for_new/main.cpp index 2e74529..fc48d64 100644 --- a/app/test_for_new/main.cpp +++ b/app/test_for_new/main.cpp @@ -1,7 +1,7 @@ #include #include -using namespace pls::internal::base; +using namespace pls::internal::data_structures; int global = 0; diff --git a/lib/pls/CMakeLists.txt b/lib/pls/CMakeLists.txt index 6cee207..28218d5 100644 --- a/lib/pls/CMakeLists.txt +++ b/lib/pls/CMakeLists.txt @@ -1,24 +1,30 @@ # List all required files here (cmake best practice to NOT automate this step!) add_library(pls STATIC - src/pls.cpp include/pls/pls.h - src/internal/base/spin_lock.cpp include/pls/internal/base/spin_lock.h - src/internal/base/thread.cpp include/pls/internal/base/thread.h - include/pls/internal/helpers/prohibit_new.h - src/internal/scheduling/abstract_task.cpp include/pls/internal/scheduling/abstract_task.h - src/internal/scheduling/scheduler.cpp include/pls/internal/scheduling/scheduler.h - src/internal/scheduling/thread_state.cpp include/pls/internal/scheduling/thread_state.h - src/internal/base/barrier.cpp include/pls/internal/base/barrier.h - src/internal/scheduling/root_task.cpp include/pls/internal/scheduling/root_task.h - src/internal/base/aligned_stack.cpp include/pls/internal/base/aligned_stack.h - include/pls/internal/base/system_details.h - src/internal/scheduling/run_on_n_threads_task.cpp include/pls/internal/scheduling/run_on_n_threads_task.h - src/internal/scheduling/fork_join_task.cpp include/pls/internal/scheduling/fork_join_task.h - src/internal/base/deque.cpp include/pls/internal/base/deque.h - src/algorithms/invoke_parallel.cpp include/pls/algorithms/invoke_parallel.h - include/pls/internal/base/error_handling.h - include/pls/internal/scheduling/scheduler_memory.h src/internal/scheduling/scheduler_memory.cpp - include/pls/internal/helpers/profiler.h - include/pls/internal/helpers/mini_benchmark.h) + include/pls/pls.h src/pls.cpp + + include/pls/algorithms/invoke_parallel.h src/algorithms/invoke_parallel.cpp + + include/pls/internal/base/spin_lock.h src/internal/base/spin_lock.cpp + include/pls/internal/base/thread.h src/internal/base/thread.cpp + include/pls/internal/base/barrier.h src/internal/base/barrier.cpp + include/pls/internal/base/system_details.h + include/pls/internal/base/error_handling.h + + include/pls/internal/data_structures/aligned_stack.h src/internal/data_structures/aligned_stack.cpp + include/pls/internal/data_structures/deque.h src/internal/data_structures/deque.cpp + + include/pls/internal/helpers/prohibit_new.h + include/pls/internal/helpers/profiler.h + include/pls/internal/helpers/mini_benchmark.h + + include/pls/internal/scheduling/root_task.h src/internal/scheduling/root_task.cpp + include/pls/internal/scheduling/thread_state.h src/internal/scheduling/thread_state.cpp + include/pls/internal/scheduling/abstract_task.h src/internal/scheduling/abstract_task.cpp + include/pls/internal/scheduling/scheduler.h src/internal/scheduling/scheduler.cpp + include/pls/internal/scheduling/run_on_n_threads_task.h src/internal/scheduling/run_on_n_threads_task.cpp + include/pls/internal/scheduling/fork_join_task.h src/internal/scheduling/fork_join_task.cpp + include/pls/internal/scheduling/scheduler_memory.h src/internal/scheduling/scheduler_memory.cpp +) # Add everything in `./include` to be in the include path of this project target_include_directories(pls diff --git a/lib/pls/include/pls/internal/base/aligned_stack.h b/lib/pls/include/pls/internal/base/aligned_stack.h deleted file mode 100644 index 7c16fec..0000000 --- a/lib/pls/include/pls/internal/base/aligned_stack.h +++ /dev/null @@ -1,71 +0,0 @@ - -#ifndef PLS_ALIGNED_STACK_H -#define PLS_ALIGNED_STACK_H - -#include -#include - -#include "pls/internal/base/error_handling.h" - -namespace pls { - namespace internal { - namespace base { - class aligned_stack { - // Keep bounds of our memory block - char* memory_start_; - char* memory_end_; - - // Current head will always be aligned to cache lines - char* head_; - - static std::uintptr_t next_alignment(std::uintptr_t size); - static char* next_alignment(char* pointer); - public: - typedef char* state; - - aligned_stack(): memory_start_{nullptr}, memory_end_{nullptr}, head_{nullptr} {}; - - aligned_stack(char* memory_region, const std::size_t size): - memory_start_{memory_region}, - memory_end_{memory_region + size}, - head_{next_alignment(memory_start_)} {} - - template - T* push(const T& object) { - // Placement new into desired memory location - return new ((void*)push())T(object); - } - - template - void* push() { - void* result = reinterpret_cast(head_); - - // Move head to next aligned position after new object - head_ = next_alignment(head_ + sizeof(T)); - if (head_ >= memory_end_) { - PLS_ERROR("Tried to allocate object on alligned_stack without sufficient memory!"); - } - - return result; - } - - template - T pop() { - head_ = head_ - next_alignment(sizeof(T)); - - return *reinterpret_cast(head_); - } - - state save_state() { - return head_; - } - - void reset_state(state new_state) { - head_ = new_state; - } - }; - } - } -} - -#endif //PLS_ALIGNED_STACK_H diff --git a/lib/pls/include/pls/internal/base/barrier.h b/lib/pls/include/pls/internal/base/barrier.h index f5ea58b..996f0e0 100644 --- a/lib/pls/include/pls/internal/base/barrier.h +++ b/lib/pls/include/pls/internal/base/barrier.h @@ -7,21 +7,22 @@ namespace pls { namespace internal { namespace base { + /** + * Provides standard barrier behaviour. + * `count` threads have to call `wait()` before any of the `wait()` calls returns, + * thus blocking all threads until everyone reached the barrier. + * + * PORTABILITY: + * Current implementation is based on pthreads. + */ class barrier { pthread_barrier_t barrier_; public: - explicit barrier(const unsigned int count): barrier_{} { - pthread_barrier_init(&barrier_, nullptr, count); - } + explicit barrier(unsigned int count); + ~barrier(); - ~barrier() { - pthread_barrier_destroy(&barrier_); - } - - void wait() { - pthread_barrier_wait(&barrier_); - } + void wait(); }; } } diff --git a/lib/pls/include/pls/internal/base/deque.h b/lib/pls/include/pls/internal/base/deque.h deleted file mode 100644 index 5fbe0bb..0000000 --- a/lib/pls/include/pls/internal/base/deque.h +++ /dev/null @@ -1,51 +0,0 @@ - -#ifndef PLS_DEQUE_H -#define PLS_DEQUE_H - -#include "spin_lock.h" - -namespace pls { - namespace internal { - namespace base { - class deque_item { - friend class deque_internal; - - deque_item* prev_; - deque_item* next_; - - }; - - class deque_internal { - protected: - deque_item* head_; - deque_item* tail_; - - spin_lock lock_; - - deque_item* pop_head_internal(); - deque_item* pop_tail_internal(); - void push_tail_internal(deque_item *new_item); - }; - - template - class deque: deque_internal { - public: - explicit deque(): deque_internal{} {} - - inline Item* pop_head() { - return static_cast(pop_head_internal()); - } - - inline Item* pop_tail() { - return static_cast(pop_tail_internal()); - } - - inline void push_tail(Item* new_item) { - push_tail_internal(new_item); - } - }; - } - } -} - -#endif //PLS_DEQUE_H diff --git a/lib/pls/include/pls/internal/base/error_handling.h b/lib/pls/include/pls/internal/base/error_handling.h index d405dde..235964e 100644 --- a/lib/pls/include/pls/internal/base/error_handling.h +++ b/lib/pls/include/pls/internal/base/error_handling.h @@ -4,7 +4,12 @@ #include -// TODO: Figure out proper exception handling +/** + * Called when there is an non-recoverable error/invariant in the scheduler. + * This SHOULD NOT HAPPEN AT ANY POINT in production, any instance of this is a bug! + * The implementation can be changed if for example no iostream is available on a system + * (or its inclusion adds too much overhead). + */ #define PLS_ERROR(msg) std::cout << msg << std::endl; exit(1); #endif //PLS_ERROR_HANDLING_H diff --git a/lib/pls/include/pls/internal/base/spin_lock.h b/lib/pls/include/pls/internal/base/spin_lock.h index c2b98c8..5acaf0a 100644 --- a/lib/pls/include/pls/internal/base/spin_lock.h +++ b/lib/pls/include/pls/internal/base/spin_lock.h @@ -10,6 +10,12 @@ namespace pls { namespace internal { namespace base { + /** + * A simple set and test_and_set based spin lock implementation. + * + * PORTABILITY: + * Current implementation is based on C++ 11 atomic_flag. + */ class spin_lock { std::atomic_flag flag_; int yield_at_tries_; diff --git a/lib/pls/include/pls/internal/base/system_details.h b/lib/pls/include/pls/internal/base/system_details.h index a8dfb72..9df0707 100644 --- a/lib/pls/include/pls/internal/base/system_details.h +++ b/lib/pls/include/pls/internal/base/system_details.h @@ -7,6 +7,12 @@ namespace pls { namespace internal { namespace base { + /** + * Collection of system details, e.g. hardware cache line size. + * + * PORTABILITY: + * Currently sane default values for x86. + */ constexpr std::uintptr_t CACHE_LINE_SIZE = 64; } } diff --git a/lib/pls/include/pls/internal/base/thread.h b/lib/pls/include/pls/internal/base/thread.h index f03be21..6b94043 100644 --- a/lib/pls/include/pls/internal/base/thread.h +++ b/lib/pls/include/pls/internal/base/thread.h @@ -15,6 +15,16 @@ namespace pls { namespace base { using thread_entrypoint = void(); + /** + * Static methods than can be performed on the current thread. + * + * usage: + * this_thread::yield(); + * T* state = this_thread::state(); + * + * PORTABILITY: + * Current implementation is based on pthreads. + */ class this_thread { template friend class thread; @@ -51,6 +61,22 @@ namespace pls { } }; + /** + * Abstraction for starting a function in a sparate thread. + * + * @tparam Function Lambda being started on the new thread. + * @tparam State State type held for this thread. + * + * usage: + * T* state; + * auto thread = start_thread([] { + * // Run on new thread + * }, state); + * thread.join(); // Wait for it to finish + * + * PORTABILITY: + * Current implementation is based on pthreads. + */ template class thread { friend class this_thread; diff --git a/lib/pls/include/pls/internal/data_structures/aligned_stack.h b/lib/pls/include/pls/internal/data_structures/aligned_stack.h new file mode 100644 index 0000000..ae5c2b6 --- /dev/null +++ b/lib/pls/include/pls/internal/data_structures/aligned_stack.h @@ -0,0 +1,78 @@ + +#ifndef PLS_ALIGNED_STACK_H +#define PLS_ALIGNED_STACK_H + +#include +#include + +#include "pls/internal/base/error_handling.h" + +namespace pls { + namespace internal { + namespace data_structures { + /** + * Generic stack-like data structure that allows to allocate arbitrary objects in a given memory region. + * The objects will be stored aligned in the stack, making the storage cache friendly and very fast + * (as long as one can live with the stack restrictions). + * + * IMPORTANT: Does not call destructors on stored objects! Do not allocate resources in the objects! + * + * Usage: + * aligned_stack stack{pointer_to_memory, size_of_memory}; + * T* pointer = stack.push(some_object); // Copy-Constrict the object on top of stack + * stack.pop(); // Deconstruct the top object of type T + */ + class aligned_stack { + // Keep bounds of our memory block + char* memory_start_; + char* memory_end_; + + // Current head will always be aligned to cache lines + char* head_; + + static std::uintptr_t next_alignment(std::uintptr_t size); + static char* next_alignment(char* pointer); + public: + typedef char* state; + + aligned_stack(): memory_start_{nullptr}, memory_end_{nullptr}, head_{nullptr} {}; + aligned_stack(char* memory_region, const std::size_t size); + + template + T* push(const T& object) { + // Copy-Construct + return new ((void*)push())T(object); + } + + template + void* push() { + void* result = reinterpret_cast(head_); + + // Move head to next aligned position after new object + head_ = next_alignment(head_ + sizeof(T)); + if (head_ >= memory_end_) { + PLS_ERROR("Tried to allocate object on alligned_stack without sufficient memory!"); + } + + return result; + } + + template + T pop() { + head_ = head_ - next_alignment(sizeof(T)); + return *reinterpret_cast(head_); + } + + state save_state() { + return head_; + } + + void reset_state(state new_state) { + head_ = new_state; + } + }; + } + } +} + +#endif //PLS_ALIGNED_STACK_H diff --git a/lib/pls/include/pls/internal/data_structures/deque.h b/lib/pls/include/pls/internal/data_structures/deque.h new file mode 100644 index 0000000..8652cc3 --- /dev/null +++ b/lib/pls/include/pls/internal/data_structures/deque.h @@ -0,0 +1,60 @@ + +#ifndef PLS_DEQUE_H +#define PLS_DEQUE_H + +#include "pls/internal/base/spin_lock.h" + +namespace pls { + namespace internal { + namespace data_structures { + /** + * Turns any object into deque item when inheriting from this. + */ + class deque_item { + friend class deque_internal; + + deque_item* prev_; + deque_item* next_; + + }; + + class deque_internal { + protected: + deque_item* head_; + deque_item* tail_; + + base::spin_lock lock_; + + deque_item* pop_head_internal(); + deque_item* pop_tail_internal(); + void push_tail_internal(deque_item *new_item); + }; + + /** + * A double linked list based deque. + * Storage is therefore only needed for the individual items. + * + * @tparam Item The type of items stored in this deque + */ + template + class deque: deque_internal { + public: + explicit deque(): deque_internal{} {} + + inline Item* pop_head() { + return static_cast(pop_head_internal()); + } + + inline Item* pop_tail() { + return static_cast(pop_tail_internal()); + } + + inline void push_tail(Item* new_item) { + push_tail_internal(new_item); + } + }; + } + } +} + +#endif //PLS_DEQUE_H diff --git a/lib/pls/include/pls/internal/scheduling/fork_join_task.h b/lib/pls/include/pls/internal/scheduling/fork_join_task.h index 830772f..efcd395 100644 --- a/lib/pls/include/pls/internal/scheduling/fork_join_task.h +++ b/lib/pls/include/pls/internal/scheduling/fork_join_task.h @@ -4,8 +4,8 @@ #include "pls/internal/helpers/profiler.h" -#include "pls/internal/base/aligned_stack.h" -#include "pls/internal/base/deque.h" +#include "pls/internal/data_structures/aligned_stack.h" +#include "pls/internal/data_structures/deque.h" #include "abstract_task.h" #include "thread_state.h" @@ -14,7 +14,7 @@ namespace pls { namespace internal { namespace scheduling { class fork_join_task; - class fork_join_sub_task: public base::deque_item { + class fork_join_sub_task: public data_structures::deque_item { friend class fork_join_task; // Coordinate finishing of sub_tasks @@ -25,7 +25,7 @@ namespace pls { fork_join_task* tbb_task_; // Stack Management (reset stack pointer after wait_for_all() calls) - base::aligned_stack::state stack_state_; + data_structures::aligned_stack::state stack_state_; protected: explicit fork_join_sub_task(); fork_join_sub_task(const fork_join_sub_task& other); @@ -62,10 +62,10 @@ namespace pls { fork_join_sub_task* root_task_; fork_join_sub_task* currently_executing_; - base::aligned_stack* my_stack_; + data_structures::aligned_stack* my_stack_; // Double-Ended Queue management - base::deque deque_; + data_structures::deque deque_; // Steal Management fork_join_sub_task* last_stolen_; diff --git a/lib/pls/include/pls/internal/scheduling/scheduler.h b/lib/pls/include/pls/internal/scheduling/scheduler.h index 55e72b5..a9e2da5 100644 --- a/lib/pls/include/pls/internal/scheduling/scheduler.h +++ b/lib/pls/include/pls/internal/scheduling/scheduler.h @@ -7,7 +7,8 @@ #include "pls/internal/helpers/profiler.h" -#include "pls/internal/base/aligned_stack.h" +#include "pls/internal/data_structures/aligned_stack.h" + #include "pls/internal/base/thread.h" #include "pls/internal/base/barrier.h" diff --git a/lib/pls/include/pls/internal/scheduling/scheduler_memory.h b/lib/pls/include/pls/internal/scheduling/scheduler_memory.h index c9d233d..dd233e9 100644 --- a/lib/pls/include/pls/internal/scheduling/scheduler_memory.h +++ b/lib/pls/include/pls/internal/scheduling/scheduler_memory.h @@ -1,4 +1,4 @@ -#include "pls/internal/base/aligned_stack.h" +#include "pls/internal/data_structures/aligned_stack.h" #include "pls/internal/base/thread.h" #include "thread_state.h" @@ -17,7 +17,7 @@ namespace pls { virtual size_t max_threads() = 0; virtual thread_state* thread_state_for(size_t id) = 0; virtual scheduler_thread* thread_for(size_t id) = 0; - virtual base::aligned_stack* task_stack_for(size_t id) = 0; + virtual data_structures::aligned_stack* task_stack_for(size_t id) = 0; }; template @@ -25,28 +25,28 @@ namespace pls { std::array threads_; std::array thread_states_; std::array, MAX_THREADS> task_stacks_memory_; - std::array task_stacks_; + std::array task_stacks_; public: static_scheduler_memory() { for (size_t i = 0; i < MAX_THREADS; i++) { - task_stacks_[i] = base::aligned_stack(task_stacks_memory_[i].data(), TASK_STACK_SIZE); + task_stacks_[i] = data_structures::aligned_stack(task_stacks_memory_[i].data(), TASK_STACK_SIZE); } } size_t max_threads() override { return MAX_THREADS; } thread_state* thread_state_for(size_t id) override { return &thread_states_[id]; } scheduler_thread* thread_for(size_t id) override { return &threads_[id]; } - base::aligned_stack* task_stack_for(size_t id) override { return &task_stacks_[id]; } + data_structures::aligned_stack* task_stack_for(size_t id) override { return &task_stacks_[id]; } }; class malloc_scheduler_memory: public scheduler_memory { size_t num_threads_; - scheduler_thread* threads_; + alignas(64) scheduler_thread* threads_; thread_state* thread_states_; char** task_stacks_memory_; - base::aligned_stack* task_stacks_; + data_structures::aligned_stack* task_stacks_; public: explicit malloc_scheduler_memory(size_t num_threads, size_t memory_per_stack = 2 << 16); ~malloc_scheduler_memory(); @@ -54,7 +54,7 @@ namespace pls { size_t max_threads() override { return num_threads_; } thread_state* thread_state_for(size_t id) override { return &thread_states_[id]; } scheduler_thread* thread_for(size_t id) override { return &threads_[id]; } - base::aligned_stack* task_stack_for(size_t id) override { return &task_stacks_[id]; } + data_structures::aligned_stack* task_stack_for(size_t id) override { return &task_stacks_[id]; } }; } } diff --git a/lib/pls/include/pls/internal/scheduling/thread_state.h b/lib/pls/include/pls/internal/scheduling/thread_state.h index 2f9cda9..ee864db 100644 --- a/lib/pls/include/pls/internal/scheduling/thread_state.h +++ b/lib/pls/include/pls/internal/scheduling/thread_state.h @@ -4,10 +4,9 @@ #include +#include "pls/internal/data_structures/aligned_stack.h" #include "abstract_task.h" -#include "pls/internal/base/aligned_stack.h" - namespace pls { namespace internal { namespace scheduling { @@ -18,7 +17,7 @@ namespace pls { scheduler* scheduler_; abstract_task* root_task_; abstract_task* current_task_; - base::aligned_stack* task_stack_; + data_structures::aligned_stack* task_stack_; size_t id_; base::spin_lock lock_; std::minstd_rand random_; @@ -31,7 +30,7 @@ namespace pls { id_{0}, random_{id_} {}; - thread_state(scheduler* scheduler, base::aligned_stack* task_stack, unsigned int id): + thread_state(scheduler* scheduler, data_structures::aligned_stack* task_stack, unsigned int id): scheduler_{scheduler}, root_task_{nullptr}, current_task_{nullptr}, diff --git a/lib/pls/src/internal/base/aligned_stack.cpp b/lib/pls/src/internal/base/aligned_stack.cpp deleted file mode 100644 index 4efe681..0000000 --- a/lib/pls/src/internal/base/aligned_stack.cpp +++ /dev/null @@ -1,21 +0,0 @@ -#include "pls/internal/base/aligned_stack.h" -#include "pls/internal/base/system_details.h" - -namespace pls { - namespace internal { - namespace base { - std::uintptr_t aligned_stack::next_alignment(std::uintptr_t size) { - std::uintptr_t miss_alignment = size % CACHE_LINE_SIZE; - if (miss_alignment == 0) { - return size; - } else { - return size + (CACHE_LINE_SIZE - miss_alignment); - } - } - - char* aligned_stack::next_alignment(char* pointer) { - return reinterpret_cast(next_alignment(reinterpret_cast(pointer))); - } - } - } -} diff --git a/lib/pls/src/internal/base/barrier.cpp b/lib/pls/src/internal/base/barrier.cpp index 038e030..a2893be 100644 --- a/lib/pls/src/internal/base/barrier.cpp +++ b/lib/pls/src/internal/base/barrier.cpp @@ -3,7 +3,17 @@ namespace pls { namespace internal { namespace base { + barrier::barrier(const unsigned int count): barrier_{} { + pthread_barrier_init(&barrier_, nullptr, count); + } + barrier::~barrier() { + pthread_barrier_destroy(&barrier_); + } + + void barrier::wait() { + pthread_barrier_wait(&barrier_); + } } } } diff --git a/lib/pls/src/internal/base/deque.cpp b/lib/pls/src/internal/base/deque.cpp deleted file mode 100644 index 5370a91..0000000 --- a/lib/pls/src/internal/base/deque.cpp +++ /dev/null @@ -1,58 +0,0 @@ -#include - -#include "pls/internal/base/deque.h" - -namespace pls { - namespace internal { - namespace base { - deque_item* deque_internal::pop_head_internal() { - std::lock_guard lock{lock_}; - - if (head_ == nullptr) { - return nullptr; - } - - deque_item* result = head_; - head_ = head_->prev_; - if (head_ == nullptr) { - tail_ = nullptr; - } else { - head_->next_ = nullptr; - } - - return result; - } - - deque_item* deque_internal::pop_tail_internal() { - std::lock_guard lock{lock_}; - - if (tail_ == nullptr) { - return nullptr; - } - - deque_item* result = tail_; - tail_ = tail_->next_; - if (tail_ == nullptr) { - head_ = nullptr; - } else { - tail_->prev_ = nullptr; - } - - return result; - } - - void deque_internal::push_tail_internal(deque_item *new_item) { - std::lock_guard lock{lock_}; - - if (tail_ != nullptr) { - tail_->prev_ = new_item; - } else { - head_ = new_item; - } - new_item->next_ = tail_; - new_item->prev_ = nullptr; - tail_ = new_item; - } - } - } -} diff --git a/lib/pls/src/internal/data_structures/aligned_stack.cpp b/lib/pls/src/internal/data_structures/aligned_stack.cpp new file mode 100644 index 0000000..ff54dd3 --- /dev/null +++ b/lib/pls/src/internal/data_structures/aligned_stack.cpp @@ -0,0 +1,26 @@ +#include "pls/internal/data_structures/aligned_stack.h" +#include "pls/internal/base/system_details.h" + +namespace pls { + namespace internal { + namespace data_structures { + aligned_stack::aligned_stack(char* memory_region, const std::size_t size): + memory_start_{memory_region}, + memory_end_{memory_region + size}, + head_{next_alignment(memory_start_)} {} + + std::uintptr_t aligned_stack::next_alignment(std::uintptr_t size) { + std::uintptr_t miss_alignment = size % base::CACHE_LINE_SIZE; + if (miss_alignment == 0) { + return size; + } else { + return size + (base::CACHE_LINE_SIZE - miss_alignment); + } + } + + char* aligned_stack::next_alignment(char* pointer) { + return reinterpret_cast(next_alignment(reinterpret_cast(pointer))); + } + } + } +} diff --git a/lib/pls/src/internal/data_structures/deque.cpp b/lib/pls/src/internal/data_structures/deque.cpp new file mode 100644 index 0000000..786e04b --- /dev/null +++ b/lib/pls/src/internal/data_structures/deque.cpp @@ -0,0 +1,58 @@ +#include + +#include "pls/internal/data_structures/deque.h" + +namespace pls { + namespace internal { + namespace data_structures { + deque_item* deque_internal::pop_head_internal() { + std::lock_guard lock{lock_}; + + if (head_ == nullptr) { + return nullptr; + } + + deque_item* result = head_; + head_ = head_->prev_; + if (head_ == nullptr) { + tail_ = nullptr; + } else { + head_->next_ = nullptr; + } + + return result; + } + + deque_item* deque_internal::pop_tail_internal() { + std::lock_guard lock{lock_}; + + if (tail_ == nullptr) { + return nullptr; + } + + deque_item* result = tail_; + tail_ = tail_->next_; + if (tail_ == nullptr) { + head_ = nullptr; + } else { + tail_->prev_ = nullptr; + } + + return result; + } + + void deque_internal::push_tail_internal(deque_item *new_item) { + std::lock_guard lock{lock_}; + + if (tail_ != nullptr) { + tail_->prev_ = new_item; + } else { + head_ = new_item; + } + new_item->next_ = tail_; + new_item->prev_ = nullptr; + tail_ = new_item; + } + } + } +} diff --git a/lib/pls/src/internal/scheduling/fork_join_task.cpp b/lib/pls/src/internal/scheduling/fork_join_task.cpp index 1f1360c..164f804 100644 --- a/lib/pls/src/internal/scheduling/fork_join_task.cpp +++ b/lib/pls/src/internal/scheduling/fork_join_task.cpp @@ -7,14 +7,14 @@ namespace pls { namespace internal { namespace scheduling { fork_join_sub_task::fork_join_sub_task(): - base::deque_item{}, + data_structures::deque_item{}, ref_count_{0}, parent_{nullptr}, tbb_task_{nullptr}, stack_state_{nullptr} {} fork_join_sub_task::fork_join_sub_task(const fork_join_sub_task& other): - base::deque_item(other), + data_structures::deque_item(other), ref_count_{0}, parent_{nullptr}, tbb_task_{nullptr}, diff --git a/lib/pls/src/internal/scheduling/scheduler_memory.cpp b/lib/pls/src/internal/scheduling/scheduler_memory.cpp index 9018be9..7201242 100644 --- a/lib/pls/src/internal/scheduling/scheduler_memory.cpp +++ b/lib/pls/src/internal/scheduling/scheduler_memory.cpp @@ -8,11 +8,11 @@ namespace pls { threads_ = reinterpret_cast(malloc(num_threads * sizeof(scheduler_thread))); thread_states_ = reinterpret_cast(malloc(num_threads * sizeof(thread_state))); - task_stacks_ = reinterpret_cast(malloc(num_threads * sizeof(base::aligned_stack))); + task_stacks_ = reinterpret_cast(malloc(num_threads * sizeof(data_structures::aligned_stack))); task_stacks_memory_ = reinterpret_cast(malloc(num_threads * sizeof(char*))); for (size_t i = 0; i < num_threads_; i++) { task_stacks_memory_[i] = reinterpret_cast(malloc(memory_per_stack)); - task_stacks_[i] = base::aligned_stack(task_stacks_memory_[i], memory_per_stack); + task_stacks_[i] = data_structures::aligned_stack(task_stacks_memory_[i], memory_per_stack); } } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index dbe5d58..8e7850d 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,4 +1,4 @@ add_executable(tests main.cpp - base_tests.cpp scheduling_tests.cpp) + base_tests.cpp scheduling_tests.cpp data_structures_test.cpp) target_link_libraries(tests catch2 pls) diff --git a/test/base_tests.cpp b/test/base_tests.cpp index f1764d6..b22cfd4 100644 --- a/test/base_tests.cpp +++ b/test/base_tests.cpp @@ -1,12 +1,10 @@ #include #include #include -#include #include #include #include -#include using namespace pls::internal::base; using namespace std; @@ -15,7 +13,7 @@ static bool base_tests_visited; static int base_tests_local_value_one; static vector base_tests_local_value_two; -TEST_CASE( "thread creation and joining", "[internal/base/thread.h]") { +TEST_CASE( "thread creation and joining", "[internal/data_structures/thread.h]") { base_tests_visited = false; auto t1 = start_thread([]() { base_tests_visited = true; }); t1.join(); @@ -23,7 +21,7 @@ TEST_CASE( "thread creation and joining", "[internal/base/thread.h]") { REQUIRE(base_tests_visited); } -TEST_CASE( "thread state", "[internal/base/thread.h]") { +TEST_CASE( "thread state", "[internal/data_structures/thread.h]") { int state_one = 1; vector state_two{1, 2}; @@ -38,7 +36,7 @@ TEST_CASE( "thread state", "[internal/base/thread.h]") { int base_tests_shared_counter; -TEST_CASE( "spinlock protects concurrent counter", "[internal/base/spinlock.h]") { +TEST_CASE( "spinlock protects concurrent counter", "[internal/data_structures/spinlock.h]") { constexpr int num_iterations = 1000000; base_tests_shared_counter = 0; spin_lock lock{}; @@ -85,122 +83,3 @@ TEST_CASE( "spinlock protects concurrent counter", "[internal/base/spinlock.h]") REQUIRE(base_tests_shared_counter == 0); } } - -TEST_CASE( "aligned stack stores objects correctly", "[internal/base/aligned_stack.h]") { - constexpr long data_size = 1024; - char data[data_size]; - aligned_stack stack{data, data_size}; - - SECTION( "stack correctly pushes sub linesize objects" ) { - std::array small_data_one{'a', 'b', 'c', 'd', 'e'}; - std::array small_data_two{}; - std::array small_data_three{'A'}; - - auto pointer_one = stack.push(small_data_one); - auto pointer_two = stack.push(small_data_two); - auto pointer_three = stack.push(small_data_three); - - REQUIRE(reinterpret_cast(pointer_one) % CACHE_LINE_SIZE == 0); - REQUIRE(reinterpret_cast(pointer_two) % CACHE_LINE_SIZE == 0); - REQUIRE(reinterpret_cast(pointer_three) % CACHE_LINE_SIZE == 0); - } - - SECTION( "stack correctly pushes above linesize objects" ) { - std::array small_data_one{'a', 'b', 'c', 'd', 'e'}; - std::array big_data_one{}; - - auto big_pointer_one = stack.push(big_data_one); - auto small_pointer_one = stack.push(small_data_one); - - REQUIRE(reinterpret_cast(big_pointer_one) % CACHE_LINE_SIZE == 0); - REQUIRE(reinterpret_cast(small_pointer_one) % CACHE_LINE_SIZE == 0); - } - - SECTION( "stack correctly stores and retrieves objects" ) { - std::array data_one{'a', 'b', 'c', 'd', 'e'}; - - stack.push(data_one); - auto retrieved_data = stack.pop>(); - - REQUIRE(retrieved_data == std::array{'a', 'b', 'c', 'd', 'e'}); - } - - SECTION( "stack can push and pop multiple times with correct alignment" ) { - std::array small_data_one{'a', 'b', 'c', 'd', 'e'}; - std::array small_data_two{}; - std::array small_data_three{'A'}; - - auto pointer_one = stack.push(small_data_one); - auto pointer_two = stack.push(small_data_two); - auto pointer_three = stack.push(small_data_three); - stack.pop(); - stack.pop(); - auto pointer_four = stack.push(small_data_two); - auto pointer_five = stack.push(small_data_three); - - REQUIRE(reinterpret_cast(pointer_one) % CACHE_LINE_SIZE == 0); - REQUIRE(reinterpret_cast(pointer_two) % CACHE_LINE_SIZE == 0); - REQUIRE(reinterpret_cast(pointer_three) % CACHE_LINE_SIZE == 0); - REQUIRE(reinterpret_cast(pointer_four) % CACHE_LINE_SIZE == 0); - REQUIRE(reinterpret_cast(pointer_five) % CACHE_LINE_SIZE == 0); - - REQUIRE(pointer_four == pointer_two); - REQUIRE(pointer_five == pointer_three); - } -} - -TEST_CASE( "deque stores objects correctly", "[internal/base/deque.h]") { - class my_item: public deque_item { - - }; - - deque deque; - my_item one, two, three; - - SECTION( "add and remove items form the tail" ) { - deque.push_tail(&one); - deque.push_tail(&two); - deque.push_tail(&three); - - REQUIRE(deque.pop_tail() == &three); - REQUIRE(deque.pop_tail() == &two); - REQUIRE(deque.pop_tail() == &one); - } - - SECTION( "handles getting empty by popping the tail correctly" ) { - deque.push_tail(&one); - REQUIRE(deque.pop_tail() == &one); - - deque.push_tail(&two); - REQUIRE(deque.pop_tail() == &two); - } - - SECTION( "remove items form the head" ) { - deque.push_tail(&one); - deque.push_tail(&two); - deque.push_tail(&three); - - REQUIRE(deque.pop_head() == &one); - REQUIRE(deque.pop_head() == &two); - REQUIRE(deque.pop_head() == &three); - } - - SECTION( "handles getting empty by popping the head correctly" ) { - deque.push_tail(&one); - REQUIRE(deque.pop_head() == &one); - - deque.push_tail(&two); - REQUIRE(deque.pop_head() == &two); - } - - SECTION( "handles getting empty by popping the head and tail correctly" ) { - deque.push_tail(&one); - REQUIRE(deque.pop_tail() == &one); - - deque.push_tail(&two); - REQUIRE(deque.pop_head() == &two); - - deque.push_tail(&three); - REQUIRE(deque.pop_tail() == &three); - } -} diff --git a/test/data_structures_test.cpp b/test/data_structures_test.cpp new file mode 100644 index 0000000..616b85f --- /dev/null +++ b/test/data_structures_test.cpp @@ -0,0 +1,133 @@ +#include + +#include + +#include +#include + +#include +#include + +using namespace pls::internal::data_structures; +using namespace pls::internal::base; +using namespace std; + + +TEST_CASE( "aligned stack stores objects correctly", "[internal/data_structures/aligned_stack.h]") { + constexpr long data_size = 1024; + char data[data_size]; + aligned_stack stack{data, data_size}; + + SECTION( "stack correctly pushes sub linesize objects" ) { + std::array small_data_one{'a', 'b', 'c', 'd', 'e'}; + std::array small_data_two{}; + std::array small_data_three{'A'}; + + auto pointer_one = stack.push(small_data_one); + auto pointer_two = stack.push(small_data_two); + auto pointer_three = stack.push(small_data_three); + + REQUIRE(reinterpret_cast(pointer_one) % CACHE_LINE_SIZE == 0); + REQUIRE(reinterpret_cast(pointer_two) % CACHE_LINE_SIZE == 0); + REQUIRE(reinterpret_cast(pointer_three) % CACHE_LINE_SIZE == 0); + } + + SECTION( "stack correctly pushes above linesize objects" ) { + std::array small_data_one{'a', 'b', 'c', 'd', 'e'}; + std::array big_data_one{}; + + auto big_pointer_one = stack.push(big_data_one); + auto small_pointer_one = stack.push(small_data_one); + + REQUIRE(reinterpret_cast(big_pointer_one) % CACHE_LINE_SIZE == 0); + REQUIRE(reinterpret_cast(small_pointer_one) % CACHE_LINE_SIZE == 0); + } + + SECTION( "stack correctly stores and retrieves objects" ) { + std::array data_one{'a', 'b', 'c', 'd', 'e'}; + + stack.push(data_one); + auto retrieved_data = stack.pop>(); + + REQUIRE(retrieved_data == std::array{'a', 'b', 'c', 'd', 'e'}); + } + + SECTION( "stack can push and pop multiple times with correct alignment" ) { + std::array small_data_one{'a', 'b', 'c', 'd', 'e'}; + std::array small_data_two{}; + std::array small_data_three{'A'}; + + auto pointer_one = stack.push(small_data_one); + auto pointer_two = stack.push(small_data_two); + auto pointer_three = stack.push(small_data_three); + stack.pop(); + stack.pop(); + auto pointer_four = stack.push(small_data_two); + auto pointer_five = stack.push(small_data_three); + + REQUIRE(reinterpret_cast(pointer_one) % CACHE_LINE_SIZE == 0); + REQUIRE(reinterpret_cast(pointer_two) % CACHE_LINE_SIZE == 0); + REQUIRE(reinterpret_cast(pointer_three) % CACHE_LINE_SIZE == 0); + REQUIRE(reinterpret_cast(pointer_four) % CACHE_LINE_SIZE == 0); + REQUIRE(reinterpret_cast(pointer_five) % CACHE_LINE_SIZE == 0); + + REQUIRE(pointer_four == pointer_two); + REQUIRE(pointer_five == pointer_three); + } +} + +TEST_CASE( "deque stores objects correctly", "[internal/data_structures/deque.h]") { + class my_item: public deque_item { + + }; + + deque deque; + my_item one, two, three; + + SECTION( "add and remove items form the tail" ) { + deque.push_tail(&one); + deque.push_tail(&two); + deque.push_tail(&three); + + REQUIRE(deque.pop_tail() == &three); + REQUIRE(deque.pop_tail() == &two); + REQUIRE(deque.pop_tail() == &one); + } + + SECTION( "handles getting empty by popping the tail correctly" ) { + deque.push_tail(&one); + REQUIRE(deque.pop_tail() == &one); + + deque.push_tail(&two); + REQUIRE(deque.pop_tail() == &two); + } + + SECTION( "remove items form the head" ) { + deque.push_tail(&one); + deque.push_tail(&two); + deque.push_tail(&three); + + REQUIRE(deque.pop_head() == &one); + REQUIRE(deque.pop_head() == &two); + REQUIRE(deque.pop_head() == &three); + } + + SECTION( "handles getting empty by popping the head correctly" ) { + deque.push_tail(&one); + REQUIRE(deque.pop_head() == &one); + + deque.push_tail(&two); + REQUIRE(deque.pop_head() == &two); + } + + SECTION( "handles getting empty by popping the head and tail correctly" ) { + deque.push_tail(&one); + REQUIRE(deque.pop_tail() == &one); + + deque.push_tail(&two); + REQUIRE(deque.pop_head() == &two); + + deque.push_tail(&three); + REQUIRE(deque.pop_tail() == &three); + } +} -- libgit2 0.26.0