From f3e7df7702a3f4fe95dda8524670464ccb5906b6 Mon Sep 17 00:00:00 2001 From: FritzFlorian Date: Fri, 30 Aug 2019 12:44:52 +0200 Subject: [PATCH] Move scheduling related data structures in correct package. --- lib/pls/CMakeLists.txt | 14 +++++++------- lib/pls/include/pls/internal/data_structures/aligned_stack.h | 64 ---------------------------------------------------------------- lib/pls/include/pls/internal/data_structures/aligned_stack_impl.h | 34 ---------------------------------- lib/pls/include/pls/internal/data_structures/deque.h | 19 ------------------- lib/pls/include/pls/internal/data_structures/locking_deque.h | 77 ----------------------------------------------------------------------------- lib/pls/include/pls/internal/data_structures/locking_deque_impl.h | 105 --------------------------------------------------------------------------------------------------------- lib/pls/include/pls/internal/data_structures/stamped_integer.h | 27 --------------------------- lib/pls/include/pls/internal/data_structures/work_stealing_deque.h | 97 ------------------------------------------------------------------------------------------------- lib/pls/include/pls/internal/data_structures/work_stealing_deque_impl.h | 162 ------------------------------------------------------------------------------------------------------------------------------------------------------------------ lib/pls/include/pls/internal/scheduling/data_structures/aligned_stack.h | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/pls/include/pls/internal/scheduling/data_structures/aligned_stack_impl.h | 36 ++++++++++++++++++++++++++++++++++++ lib/pls/include/pls/internal/scheduling/data_structures/deque.h | 21 +++++++++++++++++++++ lib/pls/include/pls/internal/scheduling/data_structures/locking_deque.h | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/pls/include/pls/internal/scheduling/data_structures/locking_deque_impl.h | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/pls/include/pls/internal/scheduling/data_structures/stamped_integer.h | 29 +++++++++++++++++++++++++++++ lib/pls/include/pls/internal/scheduling/data_structures/work_stealing_deque.h | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/pls/include/pls/internal/scheduling/data_structures/work_stealing_deque_impl.h | 164 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 17 files changed, 608 insertions(+), 592 deletions(-) delete mode 100644 lib/pls/include/pls/internal/data_structures/aligned_stack.h delete mode 100644 lib/pls/include/pls/internal/data_structures/aligned_stack_impl.h delete mode 100644 lib/pls/include/pls/internal/data_structures/deque.h delete mode 100644 lib/pls/include/pls/internal/data_structures/locking_deque.h delete mode 100644 lib/pls/include/pls/internal/data_structures/locking_deque_impl.h delete mode 100644 lib/pls/include/pls/internal/data_structures/stamped_integer.h delete mode 100644 lib/pls/include/pls/internal/data_structures/work_stealing_deque.h delete mode 100644 lib/pls/include/pls/internal/data_structures/work_stealing_deque_impl.h create mode 100644 lib/pls/include/pls/internal/scheduling/data_structures/aligned_stack.h create mode 100644 lib/pls/include/pls/internal/scheduling/data_structures/aligned_stack_impl.h create mode 100644 lib/pls/include/pls/internal/scheduling/data_structures/deque.h create mode 100644 lib/pls/include/pls/internal/scheduling/data_structures/locking_deque.h create mode 100644 lib/pls/include/pls/internal/scheduling/data_structures/locking_deque_impl.h create mode 100644 lib/pls/include/pls/internal/scheduling/data_structures/stamped_integer.h create mode 100644 lib/pls/include/pls/internal/scheduling/data_structures/work_stealing_deque.h create mode 100644 lib/pls/include/pls/internal/scheduling/data_structures/work_stealing_deque_impl.h diff --git a/lib/pls/CMakeLists.txt b/lib/pls/CMakeLists.txt index 67e9cd8..24e0e9a 100644 --- a/lib/pls/CMakeLists.txt +++ b/lib/pls/CMakeLists.txt @@ -35,13 +35,13 @@ add_library(pls STATIC include/pls/internal/base/error_handling.h include/pls/internal/base/alignment.h src/internal/base/alignment.cpp - include/pls/internal/data_structures/aligned_stack.h src/internal/data_structures/aligned_stack.cpp - include/pls/internal/data_structures/aligned_stack_impl.h - include/pls/internal/data_structures/deque.h - include/pls/internal/data_structures/locking_deque.h - include/pls/internal/data_structures/locking_deque_impl.h - include/pls/internal/data_structures/work_stealing_deque.h include/pls/internal/data_structures/work_stealing_deque_impl.h - include/pls/internal/data_structures/stamped_integer.h + include/pls/internal/scheduling/data_structures/aligned_stack.h src/internal/data_structures/aligned_stack.cpp + include/pls/internal/scheduling/data_structures/aligned_stack_impl.h + include/pls/internal/scheduling/data_structures/deque.h + include/pls/internal/scheduling/data_structures/locking_deque.h + include/pls/internal/scheduling/data_structures/locking_deque_impl.h + include/pls/internal/scheduling/data_structures/work_stealing_deque.h include/pls/internal/scheduling/data_structures/work_stealing_deque_impl.h + include/pls/internal/scheduling/data_structures/stamped_integer.h include/pls/internal/helpers/prohibit_new.h include/pls/internal/helpers/profiler.h diff --git a/lib/pls/include/pls/internal/data_structures/aligned_stack.h b/lib/pls/include/pls/internal/data_structures/aligned_stack.h deleted file mode 100644 index 926ffc5..0000000 --- a/lib/pls/include/pls/internal/data_structures/aligned_stack.h +++ /dev/null @@ -1,64 +0,0 @@ - -#ifndef PLS_ALIGNED_STACK_H -#define PLS_ALIGNED_STACK_H - -#include -#include - -#include "pls/internal/base/error_handling.h" -#include "pls/internal/base/alignment.h" - -namespace pls { -namespace internal { -namespace data_structures { - -using base::system_details::pointer_t; - -/** - * Generic stack-like data structure that allows to allocate arbitrary objects in a given memory region. - * The objects will be stored aligned in the stack, making the storage cache friendly and very fast - * (as long as one can live with the stack restrictions). - * - * IMPORTANT: Does not call destructors on stored objects! Do not allocate resources in the objects! - * - * Usage: - * aligned_stack stack{pointer_to_memory, size_of_memory}; - * T* pointer = stack.push(constructor_arguments); // Perfect-Forward-Construct the object on top of stack - * stack.pop(); // Remove the top object of type T - */ -class aligned_stack { - public: - typedef size_t stack_offset; - - aligned_stack() : aligned_memory_start_{0}, aligned_memory_end_{0}, max_offset_{0}, current_offset_{0} {}; - aligned_stack(pointer_t memory_region, std::size_t size); - aligned_stack(char *memory_region, std::size_t size); - - template - T *push(ARGS &&... args); - template - void *push_bytes(); - void *push_bytes(size_t size); - template - T pop(); - - void *memory_at_offset(stack_offset offset) const; - - stack_offset save_offset() const { return current_offset_; } - void reset_offset(stack_offset new_offset) { current_offset_ = new_offset; } - - private: - // Keep bounds of our memory block - pointer_t aligned_memory_start_; - pointer_t aligned_memory_end_; - - stack_offset max_offset_; - stack_offset current_offset_; -}; - -} -} -} -#include "aligned_stack_impl.h" - -#endif //PLS_ALIGNED_STACK_H diff --git a/lib/pls/include/pls/internal/data_structures/aligned_stack_impl.h b/lib/pls/include/pls/internal/data_structures/aligned_stack_impl.h deleted file mode 100644 index cf500c4..0000000 --- a/lib/pls/include/pls/internal/data_structures/aligned_stack_impl.h +++ /dev/null @@ -1,34 +0,0 @@ - -#ifndef PLS_ALIGNED_STACK_IMPL_H -#define PLS_ALIGNED_STACK_IMPL_H - -#include - -namespace pls { -namespace internal { -namespace data_structures { - -template -T *aligned_stack::push(ARGS &&... args) { - // Perfect-Forward construct - return new(push_bytes())T(std::forward(args)...); -} - -template -void *aligned_stack::push_bytes() { - return push_bytes(sizeof(T)); -} - -template -T aligned_stack::pop() { - auto num_cache_lines = base::alignment::next_alignment(sizeof(T)) / base::system_details::CACHE_LINE_SIZE; - current_offset_ -= num_cache_lines; - - return *reinterpret_cast(memory_at_offset(current_offset_)); -} - -} -} -} - -#endif //PLS_ALIGNED_STACK_IMPL_H diff --git a/lib/pls/include/pls/internal/data_structures/deque.h b/lib/pls/include/pls/internal/data_structures/deque.h deleted file mode 100644 index 5f90a41..0000000 --- a/lib/pls/include/pls/internal/data_structures/deque.h +++ /dev/null @@ -1,19 +0,0 @@ - -#ifndef PLS_DEQUE_H_ -#define PLS_DEQUE_H_ - -#include "work_stealing_deque.h" -#include "locking_deque.h" - -namespace pls { -namespace internal { -namespace data_structures { - -template -using deque = work_stealing_deque; - -} -} -} - -#endif //PLS_DEQUE_H_ diff --git a/lib/pls/include/pls/internal/data_structures/locking_deque.h b/lib/pls/include/pls/internal/data_structures/locking_deque.h deleted file mode 100644 index 73caa68..0000000 --- a/lib/pls/include/pls/internal/data_structures/locking_deque.h +++ /dev/null @@ -1,77 +0,0 @@ - -#ifndef PLS_LOCKING_DEQUE_H -#define PLS_LOCKING_DEQUE_H - -#include - -#include "pls/internal/base/spin_lock.h" -#include "pls/internal/data_structures/aligned_stack.h" - -namespace pls { -namespace internal { -namespace data_structures { - -using deque_offset = aligned_stack::stack_offset; - -/** - * Wraps any object into a deque item. - */ -template -struct locking_deque_item { - Item *item_; - - locking_deque_item *prev_; - locking_deque_item *next_; - -}; - -template -struct locking_deque_container : public locking_deque_item { - Content content_; - - public: - template - explicit locking_deque_container(ARGS &&... args) : content_{std::forward(args)...} {} -}; - -/** - * A double linked list based deque. - * Storage is therefore only needed for the individual items. - * - * @tparam Item The type of items stored in this deque - */ -template -class locking_deque { - aligned_stack *stack_; - - locking_deque_item *head_; - locking_deque_item *tail_; - - locking_deque_item *last_inserted_; - - base::spin_lock lock_; - - public: - explicit locking_deque(aligned_stack *stack) - : stack_{stack}, head_{nullptr}, tail_{nullptr}, lock_{} {} - - template - T *push_task(ARGS &&... args); - template - T *push_object(ARGS &&... args); - void *push_bytes(size_t size); - void publish_last_task(); - - Item *pop_local_task(); - Item *pop_external_task(); - - void reset_offset(deque_offset state); - deque_offset save_offset(); -}; - -} -} -} -#include "locking_deque_impl.h" - -#endif //PLS_LOCKING_DEQUE_H diff --git a/lib/pls/include/pls/internal/data_structures/locking_deque_impl.h b/lib/pls/include/pls/internal/data_structures/locking_deque_impl.h deleted file mode 100644 index 7657fcf..0000000 --- a/lib/pls/include/pls/internal/data_structures/locking_deque_impl.h +++ /dev/null @@ -1,105 +0,0 @@ - -#ifndef PLS_LOCKING_DEQUE_IMPL_H_ -#define PLS_LOCKING_DEQUE_IMPL_H_ - -namespace pls { -namespace internal { -namespace data_structures { - -template -template -T *locking_deque::push_task(ARGS &&...args) { - static_assert(std::is_same::value || std::is_base_of::value, - "Must only push types of onto work_stealing_deque"); - - // Allocate object - auto deque_item = stack_->push>(std::forward(args)...); - deque_item->item_ = &deque_item->content_; - - // Keep for later publishing - last_inserted_ = deque_item; - - // ...actual data reference - return &deque_item->content_; -} - -template -template -T *locking_deque::push_object(ARGS &&... args) { - // Simply add data to the stack, do not publish it in any way - return stack_->push(std::forward(args)...); -} - -template -void *locking_deque::push_bytes(size_t size) { - // Simply add data to the stack, do not publish it in any way - return stack_->push_bytes(size); -} - -template -void locking_deque::publish_last_task() { - std::lock_guard lock{lock_}; - - if (tail_ != nullptr) { - tail_->next_ = last_inserted_; - } else { - head_ = last_inserted_; - } - last_inserted_->prev_ = tail_; - last_inserted_->next_ = nullptr; - tail_ = last_inserted_; -} - -template -Task *locking_deque::pop_local_task() { - std::lock_guard lock{lock_}; - - if (tail_ == nullptr) { - return nullptr; - } - - auto result = tail_; - tail_ = tail_->prev_; - if (tail_ == nullptr) { - head_ = nullptr; - } else { - tail_->next_ = nullptr; - } - - return result->item_; -} - -template -Task *locking_deque::pop_external_task() { - std::lock_guard lock{lock_}; - - if (head_ == nullptr) { - return nullptr; - } - - auto result = head_; - head_ = head_->next_; - if (head_ == nullptr) { - tail_ = nullptr; - } else { - head_->prev_ = nullptr; - } - - return result->item_; -} - -template -void locking_deque::reset_offset(deque_offset state) { - stack_->reset_offset(state); -} - -template -deque_offset locking_deque::save_offset() { - return stack_->save_offset(); -} - -} -} -} - -#endif //PLS_LOCKING_DEQUE_IMPL_H_ diff --git a/lib/pls/include/pls/internal/data_structures/stamped_integer.h b/lib/pls/include/pls/internal/data_structures/stamped_integer.h deleted file mode 100644 index a24bcfa..0000000 --- a/lib/pls/include/pls/internal/data_structures/stamped_integer.h +++ /dev/null @@ -1,27 +0,0 @@ - -#ifndef PREDICTABLE_PARALLEL_PATTERNS_LIB_PLS_INCLUDE_PLS_INTERNAL_DATA_STRUCTURES_STAMPED_INTEGER_H_ -#define PREDICTABLE_PARALLEL_PATTERNS_LIB_PLS_INCLUDE_PLS_INTERNAL_DATA_STRUCTURES_STAMPED_INTEGER_H_ - -#include "pls/internal/base/system_details.h" - -namespace pls { -namespace internal { -namespace data_structures { - -constexpr unsigned long HALF_CACHE_LINE = base::system_details::CACHE_LINE_SIZE / 2; -struct stamped_integer { - using member_t = base::system_details::cas_integer; - - member_t stamp:HALF_CACHE_LINE; - member_t value:HALF_CACHE_LINE; - - stamped_integer() : stamp{0}, value{0} {}; - stamped_integer(member_t new_value) : stamp{0}, value{new_value} {}; - stamped_integer(member_t new_stamp, member_t new_value) : stamp{new_stamp}, value{new_value} {}; -}; - -} -} -} - -#endif //PREDICTABLE_PARALLEL_PATTERNS_LIB_PLS_INCLUDE_PLS_INTERNAL_DATA_STRUCTURES_STAMPED_INTEGER_H_ diff --git a/lib/pls/include/pls/internal/data_structures/work_stealing_deque.h b/lib/pls/include/pls/internal/data_structures/work_stealing_deque.h deleted file mode 100644 index 63c109d..0000000 --- a/lib/pls/include/pls/internal/data_structures/work_stealing_deque.h +++ /dev/null @@ -1,97 +0,0 @@ - -#ifndef PLS_WORK_STEALING_DEQUE_H_ -#define PLS_WORK_STEALING_DEQUE_H_ - -#include - -#include "pls/internal/base/error_handling.h" -#include "pls/internal/data_structures/stamped_integer.h" - -#include "aligned_stack.h" - -namespace pls { -namespace internal { -namespace data_structures { - -using base::system_details::pointer_t; - -// Integer split into two halfs, can be used in CAS operations -using data_structures::stamped_integer; -using deque_offset = stamped_integer::member_t; - -// Single Item in the deque -class work_stealing_deque_item { - // TODO: In our opinion these atomic's are a pure formality to make the thread sanitizer happy, - // as the race occurs in 'pop_head', where ALL CASES reading a corrupt/old value are cases - // where the next CAS fails anywas, thus making these corrupted values have no influence on - // the overall program execution. - // ==> If we find performance problems in this queue, try removing the atomics again. - // Pointer to the actual data - std::atomic data_; - // Index (relative to stack base) to the next and previous element - std::atomic next_item_; - deque_offset previous_item_; - - public: - work_stealing_deque_item() : data_{0}, next_item_{}, previous_item_{} {} - - template - Item *data() { - return reinterpret_cast(data_.load()); - } - - template - void set_data(Item *data) { - data_ = reinterpret_cast(data); - } - - deque_offset next_item() const { return next_item_.load(); } - void set_next_item(deque_offset next_item) { next_item_ = next_item; } - - deque_offset previous_item() const { return previous_item_; } - void set_previous_item(deque_offset previous_item) { previous_item_ = previous_item; } -}; - -template -class work_stealing_deque { - // Deque 'takes over' stack and handles memory management while in use. - // At any point in time the deque can stop using more memory and the stack can be used by other entities. - aligned_stack *stack_; - - std::atomic head_; - std::atomic tail_; - deque_offset previous_tail_; - - Task* last_pushed_task_; - - public: - explicit work_stealing_deque(aligned_stack *stack) : stack_{stack}, - head_{stamped_integer{0, 0}}, - tail_{0}, - previous_tail_{0}, - last_pushed_task_{0} {} - - template - T *push_task(ARGS &&... args); - template - T *push_object(ARGS &&... args); - void *push_bytes(size_t size); - void publish_last_task(); - - Task *pop_local_task(); - Task *pop_external_task(); - - void reset_offset(deque_offset offset); - deque_offset save_offset(); - - private: - work_stealing_deque_item *item_at(deque_offset offset); - deque_offset current_stack_offset(); -}; - -} -} -} -#include "work_stealing_deque_impl.h" - -#endif //PLS_WORK_STEALING_DEQUE_H_ diff --git a/lib/pls/include/pls/internal/data_structures/work_stealing_deque_impl.h b/lib/pls/include/pls/internal/data_structures/work_stealing_deque_impl.h deleted file mode 100644 index 415809c..0000000 --- a/lib/pls/include/pls/internal/data_structures/work_stealing_deque_impl.h +++ /dev/null @@ -1,162 +0,0 @@ - -#ifndef PLS_WORK_STEALING_DEQUE_IMPL_H_ -#define PLS_WORK_STEALING_DEQUE_IMPL_H_ - -#include -#include - -namespace pls { -namespace internal { -namespace data_structures { - -template -work_stealing_deque_item *work_stealing_deque::item_at(deque_offset offset) { - return reinterpret_cast(stack_->memory_at_offset(offset)); -} - -template -deque_offset work_stealing_deque::current_stack_offset() { - return stack_->save_offset(); -} - -template -template -T *work_stealing_deque::push_task(ARGS &&... args) { - static_assert(std::is_same::value || std::is_base_of::value, - "Must only push types of onto work_stealing_deque"); - - // 'Union' type to push both the task and the deque entry as one part onto the stack - using pair_t = std::pair; - // Allocate space on stack - auto new_pair = reinterpret_cast(stack_->push_bytes()); - // Initialize memory on stack - new((void *) &(new_pair->first)) work_stealing_deque_item(); - new((void *) &(new_pair->second)) T(std::forward(args)...); - - // Keep reference for later publishing - last_pushed_task_ = &new_pair->second; - - // Item is not publicly visible until it is published - return &(new_pair->second); -} - -template -template -T *work_stealing_deque::push_object(ARGS &&... args) { - // Simply add data to the stack, do not publish it in any way - return stack_->push(std::forward(args)...); -} - -template -void *work_stealing_deque::push_bytes(size_t size) { - // Simply add data to the stack, do not publish it in any way - return stack_->push_bytes(size); -} - -template -void work_stealing_deque::publish_last_task() { - deque_offset local_tail = tail_; - - // Prepare current tail to point to correct next task - auto tail_deque_item = item_at(local_tail); - tail_deque_item->set_data(last_pushed_task_); - tail_deque_item->set_next_item(current_stack_offset()); - tail_deque_item->set_previous_item(previous_tail_); - previous_tail_ = local_tail; - - // Linearization point, task appears after this write - deque_offset new_tail = current_stack_offset(); - tail_ = new_tail; -} - -template -Task *work_stealing_deque::pop_local_task() { - deque_offset local_tail = tail_; - stamped_integer local_head = head_; - - if (local_tail <= local_head.value) { - return nullptr; // EMPTY - } - - work_stealing_deque_item *previous_tail_item = item_at(previous_tail_); - deque_offset new_tail = previous_tail_; - previous_tail_ = previous_tail_item->previous_item(); - - // Publish our wish to set the tail back - tail_ = new_tail; - // Get the state of local head AFTER we published our wish - local_head = head_; // Linearization point, outside knows list is empty - - if (local_head.value < new_tail) { - return previous_tail_item->data(); // Success, enough distance to other threads - } - - if (local_head.value == new_tail) { - stamped_integer new_head = stamped_integer{local_head.stamp + 1, new_tail}; - // Try competing with consumers by updating the head's stamp value - if (head_.compare_exchange_strong(local_head, new_head)) { - return previous_tail_item->data(); // SUCCESS, we won the competition with other threads - } - } - - // Some other thread either won the competition or it already set the head further than we are - // before we even tried to compete with it. - // Reset the queue into an empty state => head_ = tail_ - tail_ = local_head.value; // ...we give up to the other winning thread - - return nullptr; // EMPTY, we lost the competition with other threads -} - -template -Task *work_stealing_deque::pop_external_task() { - stamped_integer local_head = head_; - deque_offset local_tail = tail_; - - if (local_tail <= local_head.value) { - return nullptr; // EMPTY - } - // Load info on current deque item. - // In case we have a race with a new (aba) overwritten item at this position, - // there has to be a competition over the tail -> the stamp increased and our next - // operation will fail anyways! - work_stealing_deque_item *head_deque_item = item_at(local_head.value); - deque_offset next_item_offset = head_deque_item->next_item(); - Task *head_data_item = head_deque_item->data(); - - // We try to set the head to this new position. - // Possible outcomes: - // 1) no one interrupted us, we win this competition - // 2) other thread took the head, we lose to this - // 3) owning thread removed tail, we lose to this - stamped_integer new_head = stamped_integer{local_head.stamp + 1, next_item_offset}; - if (head_.compare_exchange_strong(local_head, new_head)) { - return head_data_item; // SUCCESS, we won the competition - } - - return nullptr; // EMPTY, we lost the competition -} - -template -void work_stealing_deque::reset_offset(deque_offset offset) { - stack_->reset_offset(offset); - - stamped_integer local_head = head_; - deque_offset local_tail = tail_; - if (offset < local_tail) { - tail_ = offset; - if (local_head.value >= local_tail) { - head_ = stamped_integer{local_head.stamp + 1, offset}; - } - } -} - -template -deque_offset work_stealing_deque::save_offset() { - return current_stack_offset(); -} - -} -} -} - -#endif //PLS_WORK_STEALING_DEQUE_IMPL_H_ diff --git a/lib/pls/include/pls/internal/scheduling/data_structures/aligned_stack.h b/lib/pls/include/pls/internal/scheduling/data_structures/aligned_stack.h new file mode 100644 index 0000000..e3f63fb --- /dev/null +++ b/lib/pls/include/pls/internal/scheduling/data_structures/aligned_stack.h @@ -0,0 +1,66 @@ + +#ifndef PLS_ALIGNED_STACK_H +#define PLS_ALIGNED_STACK_H + +#include +#include + +#include "pls/internal/base/error_handling.h" +#include "pls/internal/base/alignment.h" + +namespace pls { +namespace internal { +namespace scheduling { +namespace data_structures { + +using base::system_details::pointer_t; + +/** + * Generic stack-like data structure that allows to allocate arbitrary objects in a given memory region. + * The objects will be stored aligned in the stack, making the storage cache friendly and very fast + * (as long as one can live with the stack restrictions). + * + * IMPORTANT: Does not call destructors on stored objects! Do not allocate resources in the objects! + * + * Usage: + * aligned_stack stack{pointer_to_memory, size_of_memory}; + * T* pointer = stack.push(constructor_arguments); // Perfect-Forward-Construct the object on top of stack + * stack.pop(); // Remove the top object of type T + */ +class aligned_stack { + public: + typedef size_t stack_offset; + + aligned_stack() : aligned_memory_start_{0}, aligned_memory_end_{0}, max_offset_{0}, current_offset_{0} {}; + aligned_stack(pointer_t memory_region, std::size_t size); + aligned_stack(char *memory_region, std::size_t size); + + template + T *push(ARGS &&... args); + template + void *push_bytes(); + void *push_bytes(size_t size); + template + T pop(); + + void *memory_at_offset(stack_offset offset) const; + + stack_offset save_offset() const { return current_offset_; } + void reset_offset(stack_offset new_offset) { current_offset_ = new_offset; } + + private: + // Keep bounds of our memory block + pointer_t aligned_memory_start_; + pointer_t aligned_memory_end_; + + stack_offset max_offset_; + stack_offset current_offset_; +}; + +} +} +} +} +#include "aligned_stack_impl.h" + +#endif //PLS_ALIGNED_STACK_H diff --git a/lib/pls/include/pls/internal/scheduling/data_structures/aligned_stack_impl.h b/lib/pls/include/pls/internal/scheduling/data_structures/aligned_stack_impl.h new file mode 100644 index 0000000..f04e9e1 --- /dev/null +++ b/lib/pls/include/pls/internal/scheduling/data_structures/aligned_stack_impl.h @@ -0,0 +1,36 @@ + +#ifndef PLS_ALIGNED_STACK_IMPL_H +#define PLS_ALIGNED_STACK_IMPL_H + +#include + +namespace pls { +namespace internal { +namespace scheduling { +namespace data_structures { + +template +T *aligned_stack::push(ARGS &&... args) { + // Perfect-Forward construct + return new(push_bytes())T(std::forward(args)...); +} + +template +void *aligned_stack::push_bytes() { + return push_bytes(sizeof(T)); +} + +template +T aligned_stack::pop() { + auto num_cache_lines = base::alignment::next_alignment(sizeof(T)) / base::system_details::CACHE_LINE_SIZE; + current_offset_ -= num_cache_lines; + + return *reinterpret_cast(memory_at_offset(current_offset_)); +} + +} +} +} +} + +#endif //PLS_ALIGNED_STACK_IMPL_H diff --git a/lib/pls/include/pls/internal/scheduling/data_structures/deque.h b/lib/pls/include/pls/internal/scheduling/data_structures/deque.h new file mode 100644 index 0000000..729bbf9 --- /dev/null +++ b/lib/pls/include/pls/internal/scheduling/data_structures/deque.h @@ -0,0 +1,21 @@ + +#ifndef PLS_DEQUE_H_ +#define PLS_DEQUE_H_ + +#include "work_stealing_deque.h" +#include "locking_deque.h" + +namespace pls { +namespace internal { +namespace scheduling { +namespace data_structures { + +template +using deque = work_stealing_deque; + +} +} +} +} + +#endif //PLS_DEQUE_H_ diff --git a/lib/pls/include/pls/internal/scheduling/data_structures/locking_deque.h b/lib/pls/include/pls/internal/scheduling/data_structures/locking_deque.h new file mode 100644 index 0000000..3f798f6 --- /dev/null +++ b/lib/pls/include/pls/internal/scheduling/data_structures/locking_deque.h @@ -0,0 +1,79 @@ + +#ifndef PLS_LOCKING_DEQUE_H +#define PLS_LOCKING_DEQUE_H + +#include + +#include "pls/internal/base/spin_lock.h" +#include "aligned_stack.h" + +namespace pls { +namespace internal { +namespace scheduling { +namespace data_structures { + +using deque_offset = aligned_stack::stack_offset; + +/** + * Wraps any object into a deque item. + */ +template +struct locking_deque_item { + Item *item_; + + locking_deque_item *prev_; + locking_deque_item *next_; + +}; + +template +struct locking_deque_container : public locking_deque_item { + Content content_; + + public: + template + explicit locking_deque_container(ARGS &&... args) : content_{std::forward(args)...} {} +}; + +/** + * A double linked list based deque. + * Storage is therefore only needed for the individual items. + * + * @tparam Item The type of items stored in this deque + */ +template +class locking_deque { + aligned_stack *stack_; + + locking_deque_item *head_; + locking_deque_item *tail_; + + locking_deque_item *last_inserted_; + + base::spin_lock lock_; + + public: + explicit locking_deque(aligned_stack *stack) + : stack_{stack}, head_{nullptr}, tail_{nullptr}, lock_{} {} + + template + T *push_task(ARGS &&... args); + template + T *push_object(ARGS &&... args); + void *push_bytes(size_t size); + void publish_last_task(); + + Item *pop_local_task(); + Item *pop_external_task(); + + void reset_offset(deque_offset state); + deque_offset save_offset(); +}; + +} +} +} +} +#include "locking_deque_impl.h" + +#endif //PLS_LOCKING_DEQUE_H diff --git a/lib/pls/include/pls/internal/scheduling/data_structures/locking_deque_impl.h b/lib/pls/include/pls/internal/scheduling/data_structures/locking_deque_impl.h new file mode 100644 index 0000000..c683cb9 --- /dev/null +++ b/lib/pls/include/pls/internal/scheduling/data_structures/locking_deque_impl.h @@ -0,0 +1,107 @@ + +#ifndef PLS_LOCKING_DEQUE_IMPL_H_ +#define PLS_LOCKING_DEQUE_IMPL_H_ + +namespace pls { +namespace internal { +namespace scheduling { +namespace data_structures { + +template +template +T *locking_deque::push_task(ARGS &&...args) { + static_assert(std::is_same::value || std::is_base_of::value, + "Must only push types of onto work_stealing_deque"); + + // Allocate object + auto deque_item = stack_->push < locking_deque_container < Task, T>>(std::forward(args)...); + deque_item->item_ = &deque_item->content_; + + // Keep for later publishing + last_inserted_ = deque_item; + + // ...actual data reference + return &deque_item->content_; +} + +template +template +T *locking_deque::push_object(ARGS &&... args) { + // Simply add data to the stack, do not publish it in any way + return stack_->push(std::forward(args)...); +} + +template +void *locking_deque::push_bytes(size_t size) { + // Simply add data to the stack, do not publish it in any way + return stack_->push_bytes(size); +} + +template +void locking_deque::publish_last_task() { + std::lock_guard lock{lock_}; + + if (tail_ != nullptr) { + tail_->next_ = last_inserted_; + } else { + head_ = last_inserted_; + } + last_inserted_->prev_ = tail_; + last_inserted_->next_ = nullptr; + tail_ = last_inserted_; +} + +template +Task *locking_deque::pop_local_task() { + std::lock_guard lock{lock_}; + + if (tail_ == nullptr) { + return nullptr; + } + + auto result = tail_; + tail_ = tail_->prev_; + if (tail_ == nullptr) { + head_ = nullptr; + } else { + tail_->next_ = nullptr; + } + + return result->item_; +} + +template +Task *locking_deque::pop_external_task() { + std::lock_guard lock{lock_}; + + if (head_ == nullptr) { + return nullptr; + } + + auto result = head_; + head_ = head_->next_; + if (head_ == nullptr) { + tail_ = nullptr; + } else { + head_->prev_ = nullptr; + } + + return result->item_; +} + +template +void locking_deque::reset_offset(deque_offset state) { + stack_->reset_offset(state); +} + +template +deque_offset locking_deque::save_offset() { + return stack_->save_offset(); +} + +} +} +} +} + +#endif //PLS_LOCKING_DEQUE_IMPL_H_ diff --git a/lib/pls/include/pls/internal/scheduling/data_structures/stamped_integer.h b/lib/pls/include/pls/internal/scheduling/data_structures/stamped_integer.h new file mode 100644 index 0000000..04fea63 --- /dev/null +++ b/lib/pls/include/pls/internal/scheduling/data_structures/stamped_integer.h @@ -0,0 +1,29 @@ + +#ifndef PREDICTABLE_PARALLEL_PATTERNS_LIB_PLS_INCLUDE_PLS_INTERNAL_DATA_STRUCTURES_STAMPED_INTEGER_H_ +#define PREDICTABLE_PARALLEL_PATTERNS_LIB_PLS_INCLUDE_PLS_INTERNAL_DATA_STRUCTURES_STAMPED_INTEGER_H_ + +#include "pls/internal/base/system_details.h" + +namespace pls { +namespace internal { +namespace scheduling { +namespace data_structures { + +constexpr unsigned long HALF_CACHE_LINE = base::system_details::CACHE_LINE_SIZE / 2; +struct stamped_integer { + using member_t = base::system_details::cas_integer; + + member_t stamp:HALF_CACHE_LINE; + member_t value:HALF_CACHE_LINE; + + stamped_integer() : stamp{0}, value{0} {}; + stamped_integer(member_t new_value) : stamp{0}, value{new_value} {}; + stamped_integer(member_t new_stamp, member_t new_value) : stamp{new_stamp}, value{new_value} {}; +}; + +} +} +} +} + +#endif //PREDICTABLE_PARALLEL_PATTERNS_LIB_PLS_INCLUDE_PLS_INTERNAL_DATA_STRUCTURES_STAMPED_INTEGER_H_ diff --git a/lib/pls/include/pls/internal/scheduling/data_structures/work_stealing_deque.h b/lib/pls/include/pls/internal/scheduling/data_structures/work_stealing_deque.h new file mode 100644 index 0000000..2266ad5 --- /dev/null +++ b/lib/pls/include/pls/internal/scheduling/data_structures/work_stealing_deque.h @@ -0,0 +1,99 @@ + +#ifndef PLS_WORK_STEALING_DEQUE_H_ +#define PLS_WORK_STEALING_DEQUE_H_ + +#include + +#include "pls/internal/base/error_handling.h" +#include "stamped_integer.h" + +#include "aligned_stack.h" + +namespace pls { +namespace internal { +namespace scheduling { +namespace data_structures { + +using base::system_details::pointer_t; + +// Integer split into two halfs, can be used in CAS operations +using data_structures::stamped_integer; +using deque_offset = stamped_integer::member_t; + +// Single Item in the deque +class work_stealing_deque_item { + // TODO: In our opinion these atomic's are a pure formality to make the thread sanitizer happy, + // as the race occurs in 'pop_head', where ALL CASES reading a corrupt/old value are cases + // where the next CAS fails anywas, thus making these corrupted values have no influence on + // the overall program execution. + // ==> If we find performance problems in this queue, try removing the atomics again. + // Pointer to the actual data + std::atomic data_; + // Index (relative to stack base) to the next and previous element + std::atomic next_item_; + deque_offset previous_item_; + + public: + work_stealing_deque_item() : data_{0}, next_item_{}, previous_item_{} {} + + template + Item *data() { + return reinterpret_cast(data_.load()); + } + + template + void set_data(Item *data) { + data_ = reinterpret_cast(data); + } + + deque_offset next_item() const { return next_item_.load(); } + void set_next_item(deque_offset next_item) { next_item_ = next_item; } + + deque_offset previous_item() const { return previous_item_; } + void set_previous_item(deque_offset previous_item) { previous_item_ = previous_item; } +}; + +template +class work_stealing_deque { + // Deque 'takes over' stack and handles memory management while in use. + // At any point in time the deque can stop using more memory and the stack can be used by other entities. + aligned_stack *stack_; + + std::atomic head_; + std::atomic tail_; + deque_offset previous_tail_; + + Task *last_pushed_task_; + + public: + explicit work_stealing_deque(aligned_stack *stack) : stack_{stack}, + head_{stamped_integer{0, 0}}, + tail_{0}, + previous_tail_{0}, + last_pushed_task_{0} {} + + template + T *push_task(ARGS &&... args); + template + T *push_object(ARGS &&... args); + void *push_bytes(size_t size); + void publish_last_task(); + + Task *pop_local_task(); + Task *pop_external_task(); + + void reset_offset(deque_offset offset); + deque_offset save_offset(); + + private: + work_stealing_deque_item *item_at(deque_offset offset); + deque_offset current_stack_offset(); +}; + +} +} +} +} +#include "work_stealing_deque_impl.h" + +#endif //PLS_WORK_STEALING_DEQUE_H_ diff --git a/lib/pls/include/pls/internal/scheduling/data_structures/work_stealing_deque_impl.h b/lib/pls/include/pls/internal/scheduling/data_structures/work_stealing_deque_impl.h new file mode 100644 index 0000000..b791bac --- /dev/null +++ b/lib/pls/include/pls/internal/scheduling/data_structures/work_stealing_deque_impl.h @@ -0,0 +1,164 @@ + +#ifndef PLS_WORK_STEALING_DEQUE_IMPL_H_ +#define PLS_WORK_STEALING_DEQUE_IMPL_H_ + +#include +#include + +namespace pls { +namespace internal { +namespace scheduling { +namespace data_structures { + +template +work_stealing_deque_item *work_stealing_deque::item_at(deque_offset offset) { + return reinterpret_cast(stack_->memory_at_offset(offset)); +} + +template +deque_offset work_stealing_deque::current_stack_offset() { + return stack_->save_offset(); +} + +template +template +T *work_stealing_deque::push_task(ARGS &&... args) { + static_assert(std::is_same::value || std::is_base_of::value, + "Must only push types of onto work_stealing_deque"); + + // 'Union' type to push both the task and the deque entry as one part onto the stack + using pair_t = std::pair; + // Allocate space on stack + auto new_pair = reinterpret_cast(stack_->push_bytes()); + // Initialize memory on stack + new((void *) &(new_pair->first)) work_stealing_deque_item(); + new((void *) &(new_pair->second)) T(std::forward(args)...); + + // Keep reference for later publishing + last_pushed_task_ = &new_pair->second; + + // Item is not publicly visible until it is published + return &(new_pair->second); +} + +template +template +T *work_stealing_deque::push_object(ARGS &&... args) { + // Simply add data to the stack, do not publish it in any way + return stack_->push(std::forward(args)...); +} + +template +void *work_stealing_deque::push_bytes(size_t size) { + // Simply add data to the stack, do not publish it in any way + return stack_->push_bytes(size); +} + +template +void work_stealing_deque::publish_last_task() { + deque_offset local_tail = tail_; + + // Prepare current tail to point to correct next task + auto tail_deque_item = item_at(local_tail); + tail_deque_item->set_data(last_pushed_task_); + tail_deque_item->set_next_item(current_stack_offset()); + tail_deque_item->set_previous_item(previous_tail_); + previous_tail_ = local_tail; + + // Linearization point, task appears after this write + deque_offset new_tail = current_stack_offset(); + tail_ = new_tail; +} + +template +Task *work_stealing_deque::pop_local_task() { + deque_offset local_tail = tail_; + stamped_integer local_head = head_; + + if (local_tail <= local_head.value) { + return nullptr; // EMPTY + } + + work_stealing_deque_item *previous_tail_item = item_at(previous_tail_); + deque_offset new_tail = previous_tail_; + previous_tail_ = previous_tail_item->previous_item(); + + // Publish our wish to set the tail back + tail_ = new_tail; + // Get the state of local head AFTER we published our wish + local_head = head_; // Linearization point, outside knows list is empty + + if (local_head.value < new_tail) { + return previous_tail_item->data(); // Success, enough distance to other threads + } + + if (local_head.value == new_tail) { + stamped_integer new_head = stamped_integer{local_head.stamp + 1, new_tail}; + // Try competing with consumers by updating the head's stamp value + if (head_.compare_exchange_strong(local_head, new_head)) { + return previous_tail_item->data(); // SUCCESS, we won the competition with other threads + } + } + + // Some other thread either won the competition or it already set the head further than we are + // before we even tried to compete with it. + // Reset the queue into an empty state => head_ = tail_ + tail_ = local_head.value; // ...we give up to the other winning thread + + return nullptr; // EMPTY, we lost the competition with other threads +} + +template +Task *work_stealing_deque::pop_external_task() { + stamped_integer local_head = head_; + deque_offset local_tail = tail_; + + if (local_tail <= local_head.value) { + return nullptr; // EMPTY + } + // Load info on current deque item. + // In case we have a race with a new (aba) overwritten item at this position, + // there has to be a competition over the tail -> the stamp increased and our next + // operation will fail anyways! + work_stealing_deque_item *head_deque_item = item_at(local_head.value); + deque_offset next_item_offset = head_deque_item->next_item(); + Task *head_data_item = head_deque_item->data(); + + // We try to set the head to this new position. + // Possible outcomes: + // 1) no one interrupted us, we win this competition + // 2) other thread took the head, we lose to this + // 3) owning thread removed tail, we lose to this + stamped_integer new_head = stamped_integer{local_head.stamp + 1, next_item_offset}; + if (head_.compare_exchange_strong(local_head, new_head)) { + return head_data_item; // SUCCESS, we won the competition + } + + return nullptr; // EMPTY, we lost the competition +} + +template +void work_stealing_deque::reset_offset(deque_offset offset) { + stack_->reset_offset(offset); + + stamped_integer local_head = head_; + deque_offset local_tail = tail_; + if (offset < local_tail) { + tail_ = offset; + if (local_head.value >= local_tail) { + head_ = stamped_integer{local_head.stamp + 1, offset}; + } + } +} + +template +deque_offset work_stealing_deque::save_offset() { + return current_stack_offset(); +} + +} +} +} +} + +#endif //PLS_WORK_STEALING_DEQUE_IMPL_H_ -- libgit2 0.26.0