diff --git a/app/benchmark_fft/main.cpp b/app/benchmark_fft/main.cpp index e5f3453..0b0f505 100644 --- a/app/benchmark_fft/main.cpp +++ b/app/benchmark_fft/main.cpp @@ -30,7 +30,7 @@ void combine(complex_vector::iterator data, int n) { std::complex odd = data[i + n / 2]; // w is the "twiddle-factor". - // this could be cached, but we run the same 'base' algorithm parallel/serial, + // this could be cached, but we run the same 'data_structures' algorithm parallel/serial, // so it won't impact the performance comparison. std::complex w = exp(std::complex(0, -2. * M_PI * i / n)); diff --git a/app/playground/main.cpp b/app/playground/main.cpp index 56c0c8e..5851538 100644 --- a/app/playground/main.cpp +++ b/app/playground/main.cpp @@ -3,69 +3,19 @@ #include #include #include +#include #include #include +#include using namespace pls; -// Example for static memory allocation (no malloc or free required) -static static_scheduler_memory<8, 2 << 12> my_scheduler_memory; - -class fib: public fork_join_sub_task { - static constexpr int CUTOFF = 20; - - int num_; - int* result_; - -public: - fib(int num, int* result): num_{num}, result_{result} {} - -private: - static int fib_serial(int num) { - if (num == 0) { - return 0; - } - if (num == 1) { - return 1; - } - - return fib_serial(num - 1) + fib_serial(num - 2); - } - -protected: - void execute_internal() override { - if (num_ <= CUTOFF) { - *result_ = fib_serial(num_); - return; - } - - int left_result; - int right_result; - - spawn_child(fib{num_ - 1, &left_result}); - spawn_child(fib{num_ - 2, &right_result}); - - wait_for_all(); - *result_ = left_result + right_result; - } -}; - - int main() { - scheduler my_scheduler{&my_scheduler_memory, 4}; - - auto start = std::chrono::high_resolution_clock::now(); - my_scheduler.perform_work([] (){ - int result; - - fib fib_sub_task{45, &result}; - fork_join_task tbb_task{&fib_sub_task, task_id{1}}; - scheduler::execute_task(tbb_task); + using aligned_state = std::aligned_storage::type; + aligned_state data; - std::cout << "Result: " << result << std::endl; - }); - auto end = std::chrono::high_resolution_clock::now(); - long time = std::chrono::duration_cast(end - start).count(); - std::cout << "Startup time in us: " << time << std::endl; + std::cout << sizeof(aligned_state) << std::endl; + malloc_scheduler_memory sched_memory{8}; + std::cout << (std::uintptr_t)sched_memory.thread_for(0) % 64 << ", " << (std::uintptr_t)sched_memory.thread_for(1) % 64 << ", " << (std::uintptr_t)sched_memory.thread_for(2) % 64 << ", " << std::endl; } diff --git a/app/test_for_new/main.cpp b/app/test_for_new/main.cpp index 2e74529..fc48d64 100644 --- a/app/test_for_new/main.cpp +++ b/app/test_for_new/main.cpp @@ -1,7 +1,7 @@ #include #include -using namespace pls::internal::base; +using namespace pls::internal::data_structures; int global = 0; diff --git a/lib/pls/CMakeLists.txt b/lib/pls/CMakeLists.txt index 6cee207..28218d5 100644 --- a/lib/pls/CMakeLists.txt +++ b/lib/pls/CMakeLists.txt @@ -1,24 +1,30 @@ # List all required files here (cmake best practice to NOT automate this step!) add_library(pls STATIC - src/pls.cpp include/pls/pls.h - src/internal/base/spin_lock.cpp include/pls/internal/base/spin_lock.h - src/internal/base/thread.cpp include/pls/internal/base/thread.h - include/pls/internal/helpers/prohibit_new.h - src/internal/scheduling/abstract_task.cpp include/pls/internal/scheduling/abstract_task.h - src/internal/scheduling/scheduler.cpp include/pls/internal/scheduling/scheduler.h - src/internal/scheduling/thread_state.cpp include/pls/internal/scheduling/thread_state.h - src/internal/base/barrier.cpp include/pls/internal/base/barrier.h - src/internal/scheduling/root_task.cpp include/pls/internal/scheduling/root_task.h - src/internal/base/aligned_stack.cpp include/pls/internal/base/aligned_stack.h - include/pls/internal/base/system_details.h - src/internal/scheduling/run_on_n_threads_task.cpp include/pls/internal/scheduling/run_on_n_threads_task.h - src/internal/scheduling/fork_join_task.cpp include/pls/internal/scheduling/fork_join_task.h - src/internal/base/deque.cpp include/pls/internal/base/deque.h - src/algorithms/invoke_parallel.cpp include/pls/algorithms/invoke_parallel.h - include/pls/internal/base/error_handling.h - include/pls/internal/scheduling/scheduler_memory.h src/internal/scheduling/scheduler_memory.cpp - include/pls/internal/helpers/profiler.h - include/pls/internal/helpers/mini_benchmark.h) + include/pls/pls.h src/pls.cpp + + include/pls/algorithms/invoke_parallel.h src/algorithms/invoke_parallel.cpp + + include/pls/internal/base/spin_lock.h src/internal/base/spin_lock.cpp + include/pls/internal/base/thread.h src/internal/base/thread.cpp + include/pls/internal/base/barrier.h src/internal/base/barrier.cpp + include/pls/internal/base/system_details.h + include/pls/internal/base/error_handling.h + + include/pls/internal/data_structures/aligned_stack.h src/internal/data_structures/aligned_stack.cpp + include/pls/internal/data_structures/deque.h src/internal/data_structures/deque.cpp + + include/pls/internal/helpers/prohibit_new.h + include/pls/internal/helpers/profiler.h + include/pls/internal/helpers/mini_benchmark.h + + include/pls/internal/scheduling/root_task.h src/internal/scheduling/root_task.cpp + include/pls/internal/scheduling/thread_state.h src/internal/scheduling/thread_state.cpp + include/pls/internal/scheduling/abstract_task.h src/internal/scheduling/abstract_task.cpp + include/pls/internal/scheduling/scheduler.h src/internal/scheduling/scheduler.cpp + include/pls/internal/scheduling/run_on_n_threads_task.h src/internal/scheduling/run_on_n_threads_task.cpp + include/pls/internal/scheduling/fork_join_task.h src/internal/scheduling/fork_join_task.cpp + include/pls/internal/scheduling/scheduler_memory.h src/internal/scheduling/scheduler_memory.cpp +) # Add everything in `./include` to be in the include path of this project target_include_directories(pls diff --git a/lib/pls/include/pls/internal/base/barrier.h b/lib/pls/include/pls/internal/base/barrier.h index f5ea58b..996f0e0 100644 --- a/lib/pls/include/pls/internal/base/barrier.h +++ b/lib/pls/include/pls/internal/base/barrier.h @@ -7,21 +7,22 @@ namespace pls { namespace internal { namespace base { + /** + * Provides standard barrier behaviour. + * `count` threads have to call `wait()` before any of the `wait()` calls returns, + * thus blocking all threads until everyone reached the barrier. + * + * PORTABILITY: + * Current implementation is based on pthreads. + */ class barrier { pthread_barrier_t barrier_; public: - explicit barrier(const unsigned int count): barrier_{} { - pthread_barrier_init(&barrier_, nullptr, count); - } + explicit barrier(unsigned int count); + ~barrier(); - ~barrier() { - pthread_barrier_destroy(&barrier_); - } - - void wait() { - pthread_barrier_wait(&barrier_); - } + void wait(); }; } } diff --git a/lib/pls/include/pls/internal/base/error_handling.h b/lib/pls/include/pls/internal/base/error_handling.h index d405dde..235964e 100644 --- a/lib/pls/include/pls/internal/base/error_handling.h +++ b/lib/pls/include/pls/internal/base/error_handling.h @@ -4,7 +4,12 @@ #include -// TODO: Figure out proper exception handling +/** + * Called when there is an non-recoverable error/invariant in the scheduler. + * This SHOULD NOT HAPPEN AT ANY POINT in production, any instance of this is a bug! + * The implementation can be changed if for example no iostream is available on a system + * (or its inclusion adds too much overhead). + */ #define PLS_ERROR(msg) std::cout << msg << std::endl; exit(1); #endif //PLS_ERROR_HANDLING_H diff --git a/lib/pls/include/pls/internal/base/spin_lock.h b/lib/pls/include/pls/internal/base/spin_lock.h index c2b98c8..5acaf0a 100644 --- a/lib/pls/include/pls/internal/base/spin_lock.h +++ b/lib/pls/include/pls/internal/base/spin_lock.h @@ -10,6 +10,12 @@ namespace pls { namespace internal { namespace base { + /** + * A simple set and test_and_set based spin lock implementation. + * + * PORTABILITY: + * Current implementation is based on C++ 11 atomic_flag. + */ class spin_lock { std::atomic_flag flag_; int yield_at_tries_; diff --git a/lib/pls/include/pls/internal/base/system_details.h b/lib/pls/include/pls/internal/base/system_details.h index a8dfb72..9df0707 100644 --- a/lib/pls/include/pls/internal/base/system_details.h +++ b/lib/pls/include/pls/internal/base/system_details.h @@ -7,6 +7,12 @@ namespace pls { namespace internal { namespace base { + /** + * Collection of system details, e.g. hardware cache line size. + * + * PORTABILITY: + * Currently sane default values for x86. + */ constexpr std::uintptr_t CACHE_LINE_SIZE = 64; } } diff --git a/lib/pls/include/pls/internal/base/thread.h b/lib/pls/include/pls/internal/base/thread.h index f03be21..6b94043 100644 --- a/lib/pls/include/pls/internal/base/thread.h +++ b/lib/pls/include/pls/internal/base/thread.h @@ -15,6 +15,16 @@ namespace pls { namespace base { using thread_entrypoint = void(); + /** + * Static methods than can be performed on the current thread. + * + * usage: + * this_thread::yield(); + * T* state = this_thread::state(); + * + * PORTABILITY: + * Current implementation is based on pthreads. + */ class this_thread { template friend class thread; @@ -51,6 +61,22 @@ namespace pls { } }; + /** + * Abstraction for starting a function in a sparate thread. + * + * @tparam Function Lambda being started on the new thread. + * @tparam State State type held for this thread. + * + * usage: + * T* state; + * auto thread = start_thread([] { + * // Run on new thread + * }, state); + * thread.join(); // Wait for it to finish + * + * PORTABILITY: + * Current implementation is based on pthreads. + */ template class thread { friend class this_thread; diff --git a/lib/pls/include/pls/internal/base/aligned_stack.h b/lib/pls/include/pls/internal/data_structures/aligned_stack.h similarity index 80% rename from lib/pls/include/pls/internal/base/aligned_stack.h rename to lib/pls/include/pls/internal/data_structures/aligned_stack.h index 7c16fec..ae5c2b6 100644 --- a/lib/pls/include/pls/internal/base/aligned_stack.h +++ b/lib/pls/include/pls/internal/data_structures/aligned_stack.h @@ -9,7 +9,19 @@ namespace pls { namespace internal { - namespace base { + namespace data_structures { + /** + * Generic stack-like data structure that allows to allocate arbitrary objects in a given memory region. + * The objects will be stored aligned in the stack, making the storage cache friendly and very fast + * (as long as one can live with the stack restrictions). + * + * IMPORTANT: Does not call destructors on stored objects! Do not allocate resources in the objects! + * + * Usage: + * aligned_stack stack{pointer_to_memory, size_of_memory}; + * T* pointer = stack.push(some_object); // Copy-Constrict the object on top of stack + * stack.pop(); // Deconstruct the top object of type T + */ class aligned_stack { // Keep bounds of our memory block char* memory_start_; @@ -24,15 +36,11 @@ namespace pls { typedef char* state; aligned_stack(): memory_start_{nullptr}, memory_end_{nullptr}, head_{nullptr} {}; - - aligned_stack(char* memory_region, const std::size_t size): - memory_start_{memory_region}, - memory_end_{memory_region + size}, - head_{next_alignment(memory_start_)} {} + aligned_stack(char* memory_region, const std::size_t size); template T* push(const T& object) { - // Placement new into desired memory location + // Copy-Construct return new ((void*)push())T(object); } @@ -52,7 +60,6 @@ namespace pls { template T pop() { head_ = head_ - next_alignment(sizeof(T)); - return *reinterpret_cast(head_); } diff --git a/lib/pls/include/pls/internal/base/deque.h b/lib/pls/include/pls/internal/data_structures/deque.h similarity index 82% rename from lib/pls/include/pls/internal/base/deque.h rename to lib/pls/include/pls/internal/data_structures/deque.h index 5fbe0bb..8652cc3 100644 --- a/lib/pls/include/pls/internal/base/deque.h +++ b/lib/pls/include/pls/internal/data_structures/deque.h @@ -2,11 +2,14 @@ #ifndef PLS_DEQUE_H #define PLS_DEQUE_H -#include "spin_lock.h" +#include "pls/internal/base/spin_lock.h" namespace pls { namespace internal { - namespace base { + namespace data_structures { + /** + * Turns any object into deque item when inheriting from this. + */ class deque_item { friend class deque_internal; @@ -20,13 +23,19 @@ namespace pls { deque_item* head_; deque_item* tail_; - spin_lock lock_; + base::spin_lock lock_; deque_item* pop_head_internal(); deque_item* pop_tail_internal(); void push_tail_internal(deque_item *new_item); }; + /** + * A double linked list based deque. + * Storage is therefore only needed for the individual items. + * + * @tparam Item The type of items stored in this deque + */ template class deque: deque_internal { public: diff --git a/lib/pls/include/pls/internal/scheduling/fork_join_task.h b/lib/pls/include/pls/internal/scheduling/fork_join_task.h index 830772f..efcd395 100644 --- a/lib/pls/include/pls/internal/scheduling/fork_join_task.h +++ b/lib/pls/include/pls/internal/scheduling/fork_join_task.h @@ -4,8 +4,8 @@ #include "pls/internal/helpers/profiler.h" -#include "pls/internal/base/aligned_stack.h" -#include "pls/internal/base/deque.h" +#include "pls/internal/data_structures/aligned_stack.h" +#include "pls/internal/data_structures/deque.h" #include "abstract_task.h" #include "thread_state.h" @@ -14,7 +14,7 @@ namespace pls { namespace internal { namespace scheduling { class fork_join_task; - class fork_join_sub_task: public base::deque_item { + class fork_join_sub_task: public data_structures::deque_item { friend class fork_join_task; // Coordinate finishing of sub_tasks @@ -25,7 +25,7 @@ namespace pls { fork_join_task* tbb_task_; // Stack Management (reset stack pointer after wait_for_all() calls) - base::aligned_stack::state stack_state_; + data_structures::aligned_stack::state stack_state_; protected: explicit fork_join_sub_task(); fork_join_sub_task(const fork_join_sub_task& other); @@ -62,10 +62,10 @@ namespace pls { fork_join_sub_task* root_task_; fork_join_sub_task* currently_executing_; - base::aligned_stack* my_stack_; + data_structures::aligned_stack* my_stack_; // Double-Ended Queue management - base::deque deque_; + data_structures::deque deque_; // Steal Management fork_join_sub_task* last_stolen_; diff --git a/lib/pls/include/pls/internal/scheduling/scheduler.h b/lib/pls/include/pls/internal/scheduling/scheduler.h index 55e72b5..a9e2da5 100644 --- a/lib/pls/include/pls/internal/scheduling/scheduler.h +++ b/lib/pls/include/pls/internal/scheduling/scheduler.h @@ -7,7 +7,8 @@ #include "pls/internal/helpers/profiler.h" -#include "pls/internal/base/aligned_stack.h" +#include "pls/internal/data_structures/aligned_stack.h" + #include "pls/internal/base/thread.h" #include "pls/internal/base/barrier.h" diff --git a/lib/pls/include/pls/internal/scheduling/scheduler_memory.h b/lib/pls/include/pls/internal/scheduling/scheduler_memory.h index c9d233d..dd233e9 100644 --- a/lib/pls/include/pls/internal/scheduling/scheduler_memory.h +++ b/lib/pls/include/pls/internal/scheduling/scheduler_memory.h @@ -1,4 +1,4 @@ -#include "pls/internal/base/aligned_stack.h" +#include "pls/internal/data_structures/aligned_stack.h" #include "pls/internal/base/thread.h" #include "thread_state.h" @@ -17,7 +17,7 @@ namespace pls { virtual size_t max_threads() = 0; virtual thread_state* thread_state_for(size_t id) = 0; virtual scheduler_thread* thread_for(size_t id) = 0; - virtual base::aligned_stack* task_stack_for(size_t id) = 0; + virtual data_structures::aligned_stack* task_stack_for(size_t id) = 0; }; template @@ -25,28 +25,28 @@ namespace pls { std::array threads_; std::array thread_states_; std::array, MAX_THREADS> task_stacks_memory_; - std::array task_stacks_; + std::array task_stacks_; public: static_scheduler_memory() { for (size_t i = 0; i < MAX_THREADS; i++) { - task_stacks_[i] = base::aligned_stack(task_stacks_memory_[i].data(), TASK_STACK_SIZE); + task_stacks_[i] = data_structures::aligned_stack(task_stacks_memory_[i].data(), TASK_STACK_SIZE); } } size_t max_threads() override { return MAX_THREADS; } thread_state* thread_state_for(size_t id) override { return &thread_states_[id]; } scheduler_thread* thread_for(size_t id) override { return &threads_[id]; } - base::aligned_stack* task_stack_for(size_t id) override { return &task_stacks_[id]; } + data_structures::aligned_stack* task_stack_for(size_t id) override { return &task_stacks_[id]; } }; class malloc_scheduler_memory: public scheduler_memory { size_t num_threads_; - scheduler_thread* threads_; + alignas(64) scheduler_thread* threads_; thread_state* thread_states_; char** task_stacks_memory_; - base::aligned_stack* task_stacks_; + data_structures::aligned_stack* task_stacks_; public: explicit malloc_scheduler_memory(size_t num_threads, size_t memory_per_stack = 2 << 16); ~malloc_scheduler_memory(); @@ -54,7 +54,7 @@ namespace pls { size_t max_threads() override { return num_threads_; } thread_state* thread_state_for(size_t id) override { return &thread_states_[id]; } scheduler_thread* thread_for(size_t id) override { return &threads_[id]; } - base::aligned_stack* task_stack_for(size_t id) override { return &task_stacks_[id]; } + data_structures::aligned_stack* task_stack_for(size_t id) override { return &task_stacks_[id]; } }; } } diff --git a/lib/pls/include/pls/internal/scheduling/thread_state.h b/lib/pls/include/pls/internal/scheduling/thread_state.h index 2f9cda9..ee864db 100644 --- a/lib/pls/include/pls/internal/scheduling/thread_state.h +++ b/lib/pls/include/pls/internal/scheduling/thread_state.h @@ -4,10 +4,9 @@ #include +#include "pls/internal/data_structures/aligned_stack.h" #include "abstract_task.h" -#include "pls/internal/base/aligned_stack.h" - namespace pls { namespace internal { namespace scheduling { @@ -18,7 +17,7 @@ namespace pls { scheduler* scheduler_; abstract_task* root_task_; abstract_task* current_task_; - base::aligned_stack* task_stack_; + data_structures::aligned_stack* task_stack_; size_t id_; base::spin_lock lock_; std::minstd_rand random_; @@ -31,7 +30,7 @@ namespace pls { id_{0}, random_{id_} {}; - thread_state(scheduler* scheduler, base::aligned_stack* task_stack, unsigned int id): + thread_state(scheduler* scheduler, data_structures::aligned_stack* task_stack, unsigned int id): scheduler_{scheduler}, root_task_{nullptr}, current_task_{nullptr}, diff --git a/lib/pls/src/internal/base/barrier.cpp b/lib/pls/src/internal/base/barrier.cpp index 038e030..a2893be 100644 --- a/lib/pls/src/internal/base/barrier.cpp +++ b/lib/pls/src/internal/base/barrier.cpp @@ -3,7 +3,17 @@ namespace pls { namespace internal { namespace base { + barrier::barrier(const unsigned int count): barrier_{} { + pthread_barrier_init(&barrier_, nullptr, count); + } + barrier::~barrier() { + pthread_barrier_destroy(&barrier_); + } + + void barrier::wait() { + pthread_barrier_wait(&barrier_); + } } } } diff --git a/lib/pls/src/internal/base/aligned_stack.cpp b/lib/pls/src/internal/data_structures/aligned_stack.cpp similarity index 72% rename from lib/pls/src/internal/base/aligned_stack.cpp rename to lib/pls/src/internal/data_structures/aligned_stack.cpp index 4efe681..ff54dd3 100644 --- a/lib/pls/src/internal/base/aligned_stack.cpp +++ b/lib/pls/src/internal/data_structures/aligned_stack.cpp @@ -1,15 +1,20 @@ -#include "pls/internal/base/aligned_stack.h" +#include "pls/internal/data_structures/aligned_stack.h" #include "pls/internal/base/system_details.h" namespace pls { namespace internal { - namespace base { + namespace data_structures { + aligned_stack::aligned_stack(char* memory_region, const std::size_t size): + memory_start_{memory_region}, + memory_end_{memory_region + size}, + head_{next_alignment(memory_start_)} {} + std::uintptr_t aligned_stack::next_alignment(std::uintptr_t size) { - std::uintptr_t miss_alignment = size % CACHE_LINE_SIZE; + std::uintptr_t miss_alignment = size % base::CACHE_LINE_SIZE; if (miss_alignment == 0) { return size; } else { - return size + (CACHE_LINE_SIZE - miss_alignment); + return size + (base::CACHE_LINE_SIZE - miss_alignment); } } diff --git a/lib/pls/src/internal/base/deque.cpp b/lib/pls/src/internal/data_structures/deque.cpp similarity index 89% rename from lib/pls/src/internal/base/deque.cpp rename to lib/pls/src/internal/data_structures/deque.cpp index 5370a91..786e04b 100644 --- a/lib/pls/src/internal/base/deque.cpp +++ b/lib/pls/src/internal/data_structures/deque.cpp @@ -1,12 +1,12 @@ #include -#include "pls/internal/base/deque.h" +#include "pls/internal/data_structures/deque.h" namespace pls { namespace internal { - namespace base { + namespace data_structures { deque_item* deque_internal::pop_head_internal() { - std::lock_guard lock{lock_}; + std::lock_guard lock{lock_}; if (head_ == nullptr) { return nullptr; @@ -24,7 +24,7 @@ namespace pls { } deque_item* deque_internal::pop_tail_internal() { - std::lock_guard lock{lock_}; + std::lock_guard lock{lock_}; if (tail_ == nullptr) { return nullptr; @@ -42,7 +42,7 @@ namespace pls { } void deque_internal::push_tail_internal(deque_item *new_item) { - std::lock_guard lock{lock_}; + std::lock_guard lock{lock_}; if (tail_ != nullptr) { tail_->prev_ = new_item; diff --git a/lib/pls/src/internal/scheduling/fork_join_task.cpp b/lib/pls/src/internal/scheduling/fork_join_task.cpp index 1f1360c..164f804 100644 --- a/lib/pls/src/internal/scheduling/fork_join_task.cpp +++ b/lib/pls/src/internal/scheduling/fork_join_task.cpp @@ -7,14 +7,14 @@ namespace pls { namespace internal { namespace scheduling { fork_join_sub_task::fork_join_sub_task(): - base::deque_item{}, + data_structures::deque_item{}, ref_count_{0}, parent_{nullptr}, tbb_task_{nullptr}, stack_state_{nullptr} {} fork_join_sub_task::fork_join_sub_task(const fork_join_sub_task& other): - base::deque_item(other), + data_structures::deque_item(other), ref_count_{0}, parent_{nullptr}, tbb_task_{nullptr}, diff --git a/lib/pls/src/internal/scheduling/scheduler_memory.cpp b/lib/pls/src/internal/scheduling/scheduler_memory.cpp index 9018be9..7201242 100644 --- a/lib/pls/src/internal/scheduling/scheduler_memory.cpp +++ b/lib/pls/src/internal/scheduling/scheduler_memory.cpp @@ -8,11 +8,11 @@ namespace pls { threads_ = reinterpret_cast(malloc(num_threads * sizeof(scheduler_thread))); thread_states_ = reinterpret_cast(malloc(num_threads * sizeof(thread_state))); - task_stacks_ = reinterpret_cast(malloc(num_threads * sizeof(base::aligned_stack))); + task_stacks_ = reinterpret_cast(malloc(num_threads * sizeof(data_structures::aligned_stack))); task_stacks_memory_ = reinterpret_cast(malloc(num_threads * sizeof(char*))); for (size_t i = 0; i < num_threads_; i++) { task_stacks_memory_[i] = reinterpret_cast(malloc(memory_per_stack)); - task_stacks_[i] = base::aligned_stack(task_stacks_memory_[i], memory_per_stack); + task_stacks_[i] = data_structures::aligned_stack(task_stacks_memory_[i], memory_per_stack); } } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index dbe5d58..8e7850d 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,4 +1,4 @@ add_executable(tests main.cpp - base_tests.cpp scheduling_tests.cpp) + base_tests.cpp scheduling_tests.cpp data_structures_test.cpp) target_link_libraries(tests catch2 pls) diff --git a/test/base_tests.cpp b/test/base_tests.cpp index f1764d6..b22cfd4 100644 --- a/test/base_tests.cpp +++ b/test/base_tests.cpp @@ -1,12 +1,10 @@ #include #include #include -#include #include #include #include -#include using namespace pls::internal::base; using namespace std; @@ -15,7 +13,7 @@ static bool base_tests_visited; static int base_tests_local_value_one; static vector base_tests_local_value_two; -TEST_CASE( "thread creation and joining", "[internal/base/thread.h]") { +TEST_CASE( "thread creation and joining", "[internal/data_structures/thread.h]") { base_tests_visited = false; auto t1 = start_thread([]() { base_tests_visited = true; }); t1.join(); @@ -23,7 +21,7 @@ TEST_CASE( "thread creation and joining", "[internal/base/thread.h]") { REQUIRE(base_tests_visited); } -TEST_CASE( "thread state", "[internal/base/thread.h]") { +TEST_CASE( "thread state", "[internal/data_structures/thread.h]") { int state_one = 1; vector state_two{1, 2}; @@ -38,7 +36,7 @@ TEST_CASE( "thread state", "[internal/base/thread.h]") { int base_tests_shared_counter; -TEST_CASE( "spinlock protects concurrent counter", "[internal/base/spinlock.h]") { +TEST_CASE( "spinlock protects concurrent counter", "[internal/data_structures/spinlock.h]") { constexpr int num_iterations = 1000000; base_tests_shared_counter = 0; spin_lock lock{}; @@ -85,122 +83,3 @@ TEST_CASE( "spinlock protects concurrent counter", "[internal/base/spinlock.h]") REQUIRE(base_tests_shared_counter == 0); } } - -TEST_CASE( "aligned stack stores objects correctly", "[internal/base/aligned_stack.h]") { - constexpr long data_size = 1024; - char data[data_size]; - aligned_stack stack{data, data_size}; - - SECTION( "stack correctly pushes sub linesize objects" ) { - std::array small_data_one{'a', 'b', 'c', 'd', 'e'}; - std::array small_data_two{}; - std::array small_data_three{'A'}; - - auto pointer_one = stack.push(small_data_one); - auto pointer_two = stack.push(small_data_two); - auto pointer_three = stack.push(small_data_three); - - REQUIRE(reinterpret_cast(pointer_one) % CACHE_LINE_SIZE == 0); - REQUIRE(reinterpret_cast(pointer_two) % CACHE_LINE_SIZE == 0); - REQUIRE(reinterpret_cast(pointer_three) % CACHE_LINE_SIZE == 0); - } - - SECTION( "stack correctly pushes above linesize objects" ) { - std::array small_data_one{'a', 'b', 'c', 'd', 'e'}; - std::array big_data_one{}; - - auto big_pointer_one = stack.push(big_data_one); - auto small_pointer_one = stack.push(small_data_one); - - REQUIRE(reinterpret_cast(big_pointer_one) % CACHE_LINE_SIZE == 0); - REQUIRE(reinterpret_cast(small_pointer_one) % CACHE_LINE_SIZE == 0); - } - - SECTION( "stack correctly stores and retrieves objects" ) { - std::array data_one{'a', 'b', 'c', 'd', 'e'}; - - stack.push(data_one); - auto retrieved_data = stack.pop>(); - - REQUIRE(retrieved_data == std::array{'a', 'b', 'c', 'd', 'e'}); - } - - SECTION( "stack can push and pop multiple times with correct alignment" ) { - std::array small_data_one{'a', 'b', 'c', 'd', 'e'}; - std::array small_data_two{}; - std::array small_data_three{'A'}; - - auto pointer_one = stack.push(small_data_one); - auto pointer_two = stack.push(small_data_two); - auto pointer_three = stack.push(small_data_three); - stack.pop(); - stack.pop(); - auto pointer_four = stack.push(small_data_two); - auto pointer_five = stack.push(small_data_three); - - REQUIRE(reinterpret_cast(pointer_one) % CACHE_LINE_SIZE == 0); - REQUIRE(reinterpret_cast(pointer_two) % CACHE_LINE_SIZE == 0); - REQUIRE(reinterpret_cast(pointer_three) % CACHE_LINE_SIZE == 0); - REQUIRE(reinterpret_cast(pointer_four) % CACHE_LINE_SIZE == 0); - REQUIRE(reinterpret_cast(pointer_five) % CACHE_LINE_SIZE == 0); - - REQUIRE(pointer_four == pointer_two); - REQUIRE(pointer_five == pointer_three); - } -} - -TEST_CASE( "deque stores objects correctly", "[internal/base/deque.h]") { - class my_item: public deque_item { - - }; - - deque deque; - my_item one, two, three; - - SECTION( "add and remove items form the tail" ) { - deque.push_tail(&one); - deque.push_tail(&two); - deque.push_tail(&three); - - REQUIRE(deque.pop_tail() == &three); - REQUIRE(deque.pop_tail() == &two); - REQUIRE(deque.pop_tail() == &one); - } - - SECTION( "handles getting empty by popping the tail correctly" ) { - deque.push_tail(&one); - REQUIRE(deque.pop_tail() == &one); - - deque.push_tail(&two); - REQUIRE(deque.pop_tail() == &two); - } - - SECTION( "remove items form the head" ) { - deque.push_tail(&one); - deque.push_tail(&two); - deque.push_tail(&three); - - REQUIRE(deque.pop_head() == &one); - REQUIRE(deque.pop_head() == &two); - REQUIRE(deque.pop_head() == &three); - } - - SECTION( "handles getting empty by popping the head correctly" ) { - deque.push_tail(&one); - REQUIRE(deque.pop_head() == &one); - - deque.push_tail(&two); - REQUIRE(deque.pop_head() == &two); - } - - SECTION( "handles getting empty by popping the head and tail correctly" ) { - deque.push_tail(&one); - REQUIRE(deque.pop_tail() == &one); - - deque.push_tail(&two); - REQUIRE(deque.pop_head() == &two); - - deque.push_tail(&three); - REQUIRE(deque.pop_tail() == &three); - } -} diff --git a/test/data_structures_test.cpp b/test/data_structures_test.cpp new file mode 100644 index 0000000..616b85f --- /dev/null +++ b/test/data_structures_test.cpp @@ -0,0 +1,133 @@ +#include + +#include + +#include +#include + +#include +#include + +using namespace pls::internal::data_structures; +using namespace pls::internal::base; +using namespace std; + + +TEST_CASE( "aligned stack stores objects correctly", "[internal/data_structures/aligned_stack.h]") { + constexpr long data_size = 1024; + char data[data_size]; + aligned_stack stack{data, data_size}; + + SECTION( "stack correctly pushes sub linesize objects" ) { + std::array small_data_one{'a', 'b', 'c', 'd', 'e'}; + std::array small_data_two{}; + std::array small_data_three{'A'}; + + auto pointer_one = stack.push(small_data_one); + auto pointer_two = stack.push(small_data_two); + auto pointer_three = stack.push(small_data_three); + + REQUIRE(reinterpret_cast(pointer_one) % CACHE_LINE_SIZE == 0); + REQUIRE(reinterpret_cast(pointer_two) % CACHE_LINE_SIZE == 0); + REQUIRE(reinterpret_cast(pointer_three) % CACHE_LINE_SIZE == 0); + } + + SECTION( "stack correctly pushes above linesize objects" ) { + std::array small_data_one{'a', 'b', 'c', 'd', 'e'}; + std::array big_data_one{}; + + auto big_pointer_one = stack.push(big_data_one); + auto small_pointer_one = stack.push(small_data_one); + + REQUIRE(reinterpret_cast(big_pointer_one) % CACHE_LINE_SIZE == 0); + REQUIRE(reinterpret_cast(small_pointer_one) % CACHE_LINE_SIZE == 0); + } + + SECTION( "stack correctly stores and retrieves objects" ) { + std::array data_one{'a', 'b', 'c', 'd', 'e'}; + + stack.push(data_one); + auto retrieved_data = stack.pop>(); + + REQUIRE(retrieved_data == std::array{'a', 'b', 'c', 'd', 'e'}); + } + + SECTION( "stack can push and pop multiple times with correct alignment" ) { + std::array small_data_one{'a', 'b', 'c', 'd', 'e'}; + std::array small_data_two{}; + std::array small_data_three{'A'}; + + auto pointer_one = stack.push(small_data_one); + auto pointer_two = stack.push(small_data_two); + auto pointer_three = stack.push(small_data_three); + stack.pop(); + stack.pop(); + auto pointer_four = stack.push(small_data_two); + auto pointer_five = stack.push(small_data_three); + + REQUIRE(reinterpret_cast(pointer_one) % CACHE_LINE_SIZE == 0); + REQUIRE(reinterpret_cast(pointer_two) % CACHE_LINE_SIZE == 0); + REQUIRE(reinterpret_cast(pointer_three) % CACHE_LINE_SIZE == 0); + REQUIRE(reinterpret_cast(pointer_four) % CACHE_LINE_SIZE == 0); + REQUIRE(reinterpret_cast(pointer_five) % CACHE_LINE_SIZE == 0); + + REQUIRE(pointer_four == pointer_two); + REQUIRE(pointer_five == pointer_three); + } +} + +TEST_CASE( "deque stores objects correctly", "[internal/data_structures/deque.h]") { + class my_item: public deque_item { + + }; + + deque deque; + my_item one, two, three; + + SECTION( "add and remove items form the tail" ) { + deque.push_tail(&one); + deque.push_tail(&two); + deque.push_tail(&three); + + REQUIRE(deque.pop_tail() == &three); + REQUIRE(deque.pop_tail() == &two); + REQUIRE(deque.pop_tail() == &one); + } + + SECTION( "handles getting empty by popping the tail correctly" ) { + deque.push_tail(&one); + REQUIRE(deque.pop_tail() == &one); + + deque.push_tail(&two); + REQUIRE(deque.pop_tail() == &two); + } + + SECTION( "remove items form the head" ) { + deque.push_tail(&one); + deque.push_tail(&two); + deque.push_tail(&three); + + REQUIRE(deque.pop_head() == &one); + REQUIRE(deque.pop_head() == &two); + REQUIRE(deque.pop_head() == &three); + } + + SECTION( "handles getting empty by popping the head correctly" ) { + deque.push_tail(&one); + REQUIRE(deque.pop_head() == &one); + + deque.push_tail(&two); + REQUIRE(deque.pop_head() == &two); + } + + SECTION( "handles getting empty by popping the head and tail correctly" ) { + deque.push_tail(&one); + REQUIRE(deque.pop_tail() == &one); + + deque.push_tail(&two); + REQUIRE(deque.pop_head() == &two); + + deque.push_tail(&three); + REQUIRE(deque.pop_tail() == &three); + } +}