Commit c21af88f by FritzFlorian

Remove unused data structures (old work stealing deques).

If they are required in future iterations they can be re-added from the history, however,
for now it only confuses project users as it is not clear which data structures are actually in use.
parent a0418cc8
Pipeline #1591 passed with stages
in 4 minutes 38 seconds
......@@ -20,13 +20,9 @@ add_library(pls STATIC
include/pls/internal/base/stack_allocator.h src/internal/base/stack_allocator.cpp
include/pls/internal/base/futex_wrapper.h
include/pls/internal/data_structures/aligned_stack.h src/internal/data_structures/aligned_stack.cpp
include/pls/internal/data_structures/aligned_stack_impl.h
include/pls/internal/data_structures/stamped_integer.h
include/pls/internal/data_structures/stamped_split_integer.h
include/pls/internal/data_structures/delayed_initialization.h
include/pls/internal/data_structures/bounded_trading_deque.h
include/pls/internal/data_structures/bounded_ws_deque.h
include/pls/internal/data_structures/optional.h
include/pls/internal/helpers/prohibit_new.h
......
#ifndef PLS_ALIGNED_STACK_H
#define PLS_ALIGNED_STACK_H
#include <cstdint>
#include <cstdlib>
#include <array>
#include <memory>
#include "pls/internal/base/error_handling.h"
#include "pls/internal/base/alignment.h"
#include "pls/internal/base/system_details.h"
namespace pls {
namespace internal {
namespace data_structures {
using base::system_details::pointer_t;
/**
* Generic stack-like data structure that allows to allocate arbitrary objects in a given memory region.
* The objects will be stored aligned in the stack, making the storage cache friendly and very fast
* (as long as one can live with the stack restrictions).
*
* IMPORTANT: Does only call the deconstructor when explicitly using pop<T>().
* In this case you have to be sure that push<T, ...>() and pop<T>() calls
* match up through out your program.
*
* Usage:
* aligned_static_stack<SIZE> stack; or heap_aligned_stack stack(size);
* T* pointer = stack.push<T>(constructor_arguments); // Perfect-Forward-Construct the object on top of stack
* stack.pop<T>(); // Remove the top object of type T and deconstruct it
*/
class aligned_stack {
public:
typedef size_t stack_offset;
aligned_stack(char *memory_pointer, size_t size);
aligned_stack(char *memory_pointer, size_t size, size_t original_size);
template<typename T, typename ...ARGS>
T *push(ARGS &&... args);
template<typename T>
char *push_bytes();
char *push_bytes(size_t size);
template<typename T>
void pop();
char *memory_at_offset(stack_offset offset) const;
stack_offset save_offset() const { return current_offset_; }
void reset_offset(stack_offset new_offset) { current_offset_ = new_offset; }
protected:
// Keep bounds of our memory block
char *unaligned_memory_pointer_;
char *memory_pointer_;
stack_offset max_offset_;
stack_offset current_offset_;
};
template<size_t SIZE>
class static_aligned_stack {
public:
static_aligned_stack();
aligned_stack &get_stack() { return aligned_stack_; }
private:
alignas(base::system_details::CACHE_LINE_SIZE) std::array<char, SIZE> memory_;
aligned_stack aligned_stack_;
};
class heap_aligned_stack {
public:
explicit heap_aligned_stack(size_t size) :
unaligned_memory_size_{base::alignment::next_alignment(size)},
unaligned_memory_pointer_{new char[unaligned_memory_size_]},
aligned_stack_{unaligned_memory_pointer_, size, unaligned_memory_size_} {}
~heap_aligned_stack() {
delete[] unaligned_memory_pointer_;
}
aligned_stack &get_stack() { return aligned_stack_; }
private:
size_t unaligned_memory_size_;
char *unaligned_memory_pointer_;
aligned_stack aligned_stack_;
};
}
}
}
#include "aligned_stack_impl.h"
#endif //PLS_ALIGNED_STACK_H
#ifndef PLS_ALIGNED_STACK_IMPL_H
#define PLS_ALIGNED_STACK_IMPL_H
#include <utility>
namespace pls {
namespace internal {
namespace data_structures {
template<typename T, typename ...ARGS>
T *aligned_stack::push(ARGS &&... args) {
return new(push_bytes<T>())T(std::forward<ARGS>(args)...);
}
template<typename T>
char *aligned_stack::push_bytes() {
return push_bytes(sizeof(T));
}
template<typename T>
void aligned_stack::pop() {
auto num_cache_lines = base::alignment::next_alignment(sizeof(T)) / base::system_details::CACHE_LINE_SIZE;
current_offset_ -= num_cache_lines;
auto result = *reinterpret_cast<T *>(memory_at_offset(current_offset_));
result.~T();
}
template<size_t SIZE>
static_aligned_stack<SIZE>::static_aligned_stack(): memory_{}, aligned_stack_{memory_.data(), SIZE} {};
}
}
}
#endif //PLS_ALIGNED_STACK_IMPL_H
#ifndef PLS_INTERNAL_DATA_STRUCTURES_BOUNDED_TRADING_DEQUE_H_
#define PLS_INTERNAL_DATA_STRUCTURES_BOUNDED_TRADING_DEQUE_H_
#include <atomic>
#include <tuple>
#include "pls/internal/base/error_handling.h"
#include "pls/internal/base/system_details.h"
#include "pls/internal/data_structures/optional.h"
#include "pls/internal/data_structures/stamped_integer.h"
namespace pls {
namespace internal {
namespace data_structures {
template<typename TradedType>
class traded_field {
static_assert(base::system_details::CACHE_LINE_SIZE >= 4,
"Traded objects must not use their last address bits, as we use them for status flags."
"As traded objects are usually cache aligned, we need big enough cache lines.");
// TODO: Replace unsigned long with a portable sized integer
// (some systems might have different pointer sizes to long sizes).
static constexpr unsigned long SHIFT = 0x2lu;
static constexpr unsigned long TAG_BITS = 0x3lu;
static constexpr unsigned long RELEVANT_BITS = ~TAG_BITS;
static constexpr unsigned long EMPTY_TAG = 0x0lu;
static constexpr unsigned long STAMP_TAG = 0x1lu;
static constexpr unsigned long TRADE_TAG = 0x2lu;
public:
void fill_with_stamp(unsigned long stamp) {
pointer_ = (void *) ((stamp << SHIFT) | STAMP_TAG);
}
unsigned long get_stamp() {
PLS_ASSERT(is_filled_with_tag(), "Must only read out the tag when the traded field contains one.");
return ((unsigned long) pointer_) >> SHIFT;
}
bool is_filled_with_tag() {
return (((unsigned long) pointer_) & TAG_BITS) == STAMP_TAG;
}
void fill_with_trade_object(TradedType *trade_object) {
PLS_ASSERT((((unsigned long) trade_object) & TAG_BITS) == 0,
"Must only store aligned objects in this data structure (last bits are needed for tag bit)");
pointer_ = reinterpret_cast<void *>(((unsigned long) trade_object) | TRADE_TAG);
}
TradedType *get_trade_object() {
PLS_ASSERT(is_filled_with_object(), "Must only read out the object when the traded field contains one.");
return reinterpret_cast<TradedType *>(((unsigned long) pointer_) & RELEVANT_BITS);
}
bool is_filled_with_object() {
return (((unsigned long) pointer_) & TAG_BITS) == TRADE_TAG;
}
bool is_empty() {
return (((unsigned long) pointer_) & TAG_BITS) == EMPTY_TAG;
}
private:
void *pointer_{nullptr};
};
template<typename EntryType, typename TradedType>
class alignas(base::system_details::CACHE_LINE_SIZE) trading_deque_entry {
public:
/*
* Fill the slot with its initial values, making it ready for being stolen.
* Performs no synchronization/memory ordering constraints.
*
* Method is called to init a field on pushBot.
*/
void fill_slots(EntryType *entry_item, unsigned long expected_stamp) {
entry_slot_.store(entry_item, std::memory_order_relaxed);
forwarding_stamp_.store(expected_stamp, std::memory_order_relaxed);
// Relaxed is fine for this, as adding elements is synced over the bot pointer
auto old = trade_slot_.load(std::memory_order_relaxed);
old.fill_with_stamp(expected_stamp);
trade_slot_.store(old, std::memory_order_relaxed);
}
/**
* Tries to atomically read out the object traded in by thieves.
* Either returns the traded in field (the slot was stolen) or no result (the slot is still owned locally).
*
* Method is used to pop a field on popBot.
*/
optional<TradedType *> acquire_traded_type() {
traded_field<TradedType> empty_field;
traded_field<TradedType> old_field_value = trade_slot_.exchange(empty_field, std::memory_order_acq_rel);
if (old_field_value.is_filled_with_tag()) {
return optional<TradedType *>();
} else {
return optional<TradedType *>(old_field_value.get_trade_object());
}
}
EntryType *get_object() {
return entry_slot_;
}
bool is_empty() {
return trade_slot_.load(std::memory_order_seq_cst).is_empty();
}
optional<EntryType *> trade_object(TradedType *offered_object, unsigned long &expected_stamp) {
// Read our potential result
EntryType *result = entry_slot_.load(std::memory_order_relaxed);
unsigned long forwarding_stamp = forwarding_stamp_.load(std::memory_order_relaxed);
// Try to get it by CAS with the expected field entry, giving up our offered_object for it
traded_field<TradedType> expected_field;
expected_field.fill_with_stamp(expected_stamp);
traded_field<TradedType> offered_field;
offered_field.fill_with_trade_object(offered_object);
if (trade_slot_.compare_exchange_strong(expected_field, offered_field, std::memory_order_acq_rel)) {
return optional<EntryType *>{result};
} else {
if (expected_field.is_empty()) {
expected_stamp = forwarding_stamp;
}
return optional<EntryType *>{};
}
}
private:
std::atomic<EntryType *> entry_slot_{nullptr};
std::atomic<unsigned long> forwarding_stamp_{};
std::atomic<traded_field<TradedType>> trade_slot_{};
};
/**
* A work stealing deque (single produces/consumer at the end, multiple consumers at the start).
* A pointer to an OfferedType object can only be acquired by stealing consumers (from the start),
* when they also offer a pointer to a TradeType object.
*
* The exchange of 'goods' (OfferedType and TradedType) happens atomically at a linearization point.
* This means that the owning thread always gets a TradedType for each and every OfferedType that was
* successfully stolen.
*
* The owner of the deque must pop ALL elements, even the stolen ones (to get the traded goods instead).
*
* @tparam EntryType The type of objects stored in the deque
* @tparam TradedType The type of objects traded in for acquiring a deque element.
*/
template<typename EntryType, typename TradedType>
class bounded_trading_deque {
using deque_entry = trading_deque_entry<EntryType, TradedType>;
public:
bounded_trading_deque(deque_entry *entries, size_t num_entries) :
entries_{entries}, num_entries_{num_entries} {};
void push_bot(EntryType *offered_object) {
auto expected_stamp = bot_internal_.stamp_;
auto &current_entry = entries_[bot_internal_.value_];
current_entry.fill_slots(offered_object, expected_stamp);
bot_internal_.stamp_++;
bot_internal_.value_++;
bot_.store(bot_internal_.value_, std::memory_order_release);
}
struct pop_result {
explicit pop_result(optional<EntryType *> entry, optional<TradedType *> traded) : entry_{entry},
traded_{traded} {};
pop_result() : entry_{}, traded_{} {};
optional<EntryType *> entry_;
optional<TradedType *> traded_;
};
pop_result pop_bot() {
if (bot_internal_.value_ == 0) {
return pop_result{}; // Empty, nothing to return...
}
// Go one step back
bot_internal_.value_--;
auto &current_entry = entries_[bot_internal_.value_];
optional<TradedType *> traded_object = current_entry.acquire_traded_type();
optional<EntryType *> queue_entry;
if (traded_object) {
// We do not return an entry, but the traded object
queue_entry = optional<EntryType *>{};
} else {
// We still got it locally, grab the object
queue_entry = optional<EntryType *>{current_entry.get_object()};
}
bot_.store(bot_internal_.value_, std::memory_order_relaxed);
if (bot_internal_.value_ == 0) {
bot_internal_.stamp_++;
top_.store({bot_internal_.stamp_, 0}, std::memory_order_release);
}
return pop_result{queue_entry, traded_object};
}
std::tuple<optional<EntryType *>, stamped_integer> peek_top() {
auto local_top = top_.load();
auto local_bot = bot_.load();
if (local_top.value_ >= local_bot) {
return std::make_tuple(optional<EntryType *>{}, local_top);
} else {
return std::make_tuple(optional<EntryType *>{entries_[local_top.value_].get_object()}, local_top);
}
}
optional<EntryType *> pop_top(TradedType *trade_offer) {
auto local_top = top_.load();
return pop_top(trade_offer, local_top);
}
optional<EntryType *> pop_top(TradedType *trade_offer, stamped_integer local_top) {
auto local_bot = bot_.load();
if (local_top.value_ >= local_bot) {
return optional<EntryType *>{};
}
unsigned long expected_top_stamp = local_top.stamp_;
optional<EntryType *> entry = entries_[local_top.value_].trade_object(trade_offer, expected_top_stamp);
if (entry) {
// We got it, for sure move the top pointer forward.
top_.compare_exchange_strong(local_top, {local_top.stamp_ + 1, local_top.value_ + 1});
} else {
// We did not get it....
if (entries_[local_top.value_].is_empty()) {
// ...update the top stamp, so the next call can get it (we still make system progress, as the owner
// must have popped off the element)
top_.compare_exchange_strong(local_top, {expected_top_stamp, local_top.value_});
} else {
// ...move the pointer forward if someone else put a valid trade object in there.
top_.compare_exchange_strong(local_top, {local_top.stamp_ + 1, local_top.value_ + 1});
}
}
return entry;
}
private:
deque_entry *entries_;
size_t num_entries_;
alignas(base::system_details::CACHE_LINE_SIZE) std::atomic<stamped_integer> top_{{0, 0}};
alignas(base::system_details::CACHE_LINE_SIZE) std::atomic<size_t> bot_{0};
stamped_integer bot_internal_{0, 0};
};
template<typename EntryType, typename TradedType, size_t SIZE>
class static_bounded_trading_deque {
public:
static_bounded_trading_deque() : items_{}, deque_{items_.data(), SIZE} {}
bounded_trading_deque<EntryType, TradedType> &get_deque() { return deque_; }
private:
std::array<trading_deque_entry<EntryType, TradedType>, SIZE> items_;
bounded_trading_deque<EntryType, TradedType> deque_;
};
}
}
}
#endif //PLS_INTERNAL_DATA_STRUCTURES_BOUNDED_TRADING_DEQUE_H_
#ifndef PLS_INTERNAL_DATA_STRUCTURES_BOUNDED_WS_DEQUE_H_
#define PLS_INTERNAL_DATA_STRUCTURES_BOUNDED_WS_DEQUE_H_
#include <cstdio>
#include <array>
#include <atomic>
#include "pls/internal/base/system_details.h"
#include "pls/internal/data_structures/stamped_integer.h"
#include "pls/internal/data_structures/optional.h"
namespace pls {
namespace internal {
namespace data_structures {
/**
* Classic, text book ws bounded deque based on arrays.
* Stores a fixed amount of fixed size objects in an array,
* allowing or local push/pop on the bottom and remote
* pop on the top.
*
* The local operations are cheap as long as head and tail are
* far enough apart, making it ideal to avoid cache problems.
*
* Depends on overaligned datatypes to be cache line friendly.
* This does not concern C++14 and upwards, but hinders you to properly
* allocate it on the heap in C++11 (see base::alignment::alignment_wrapper for a solution).
*/
// TODO: Relax memory orderings in here...
template<typename T>
class bounded_ws_deque {
public:
bounded_ws_deque(T *item_array, size_t size) : size_{size}, item_array_{item_array} {}
void push_bottom(T item) {
item_array_[local_bottom_] = item;
local_bottom_++;
bottom_.store(local_bottom_, std::memory_order_release);
}
bool is_empty() {
return top_.load().value_ < bottom_.load();
}
optional<T> pop_top() {
stamped_integer old_top = top_.load();
unsigned int new_stamp = old_top.stamp_ + 1;
unsigned int new_value = old_top.value_ + 1;
if (bottom_.load() <= old_top.value_) {
return optional<T>();
}
optional<T> result(item_array_[old_top.value_]);
if (top_.compare_exchange_strong(old_top, {new_stamp, new_value})) {
return result;
}
return optional<T>();
}
optional<T> pop_bottom() {
if (local_bottom_ == 0) {
return optional<T>();
}
local_bottom_--;
bottom_.store(local_bottom_, std::memory_order_seq_cst);
optional<T> result(item_array_[local_bottom_]);
stamped_integer old_top = top_.load(std::memory_order_acquire);
if (local_bottom_ > old_top.value_) {
// Enough distance to just return the value
return result;
}
if (local_bottom_ == old_top.value_) {
local_bottom_ = 0;
bottom_.store(local_bottom_);
if (top_.compare_exchange_strong(old_top, {old_top.stamp_ + 1, 0})) {
// We won the competition and the queue is empty
return result;
}
}
// The queue is empty and we lost the competition
local_bottom_ = 0;
bottom_.store(local_bottom_);
top_.store({old_top.stamp_ + 1, 0});
return optional<T>();
}
private:
alignas(base::system_details::CACHE_LINE_SIZE) std::atomic<stamped_integer> top_{stamped_integer{0, 0}};
alignas(base::system_details::CACHE_LINE_SIZE) std::atomic<unsigned int> bottom_{0};
unsigned int local_bottom_{0};
size_t size_;
T *item_array_;
};
template<typename T, size_t SIZE>
class static_bounded_ws_deque {
public:
static_bounded_ws_deque() : items_{}, deque_{items_.data(), SIZE} {}
bounded_ws_deque<T> &get_deque() { return deque_; }
private:
std::array<T, SIZE> items_;
bounded_ws_deque<T> deque_;
};
}
}
}
#endif //PLS_INTERNAL_DATA_STRUCTURES_BOUNDED_WS_DEQUE_H_
#include "pls/internal/data_structures/aligned_stack.h"
#include "pls/internal/base/system_details.h"
namespace pls {
namespace internal {
namespace data_structures {
aligned_stack::aligned_stack(char *memory_pointer, size_t size) :
unaligned_memory_pointer_{memory_pointer},
memory_pointer_{memory_pointer}, // MUST be aligned
max_offset_{size / base::system_details::CACHE_LINE_SIZE},
current_offset_{0} {
PLS_ASSERT((pointer_t) memory_pointer_ % base::system_details::CACHE_LINE_SIZE == 0,
"Must initialize an aligned_stack with a properly aligned memory region!")
}
aligned_stack::aligned_stack(char *unaligned_memory_pointer, size_t size, size_t unaligned_size) :
unaligned_memory_pointer_{unaligned_memory_pointer},
memory_pointer_{base::alignment::next_alignment(unaligned_memory_pointer)},
max_offset_{unaligned_size / base::system_details::CACHE_LINE_SIZE},
current_offset_{0} {
PLS_ASSERT(size == base::alignment::previous_alignment(unaligned_size),
"Initialized aligned stack with invalid memory configuration!")
}
char *aligned_stack::memory_at_offset(stack_offset offset) const {
const auto byte_offset = offset * base::system_details::CACHE_LINE_SIZE;
return reinterpret_cast<char *>(memory_pointer_ + byte_offset);
}
char *aligned_stack::push_bytes(size_t size) {
size_t round_up_size = base::alignment::next_alignment(size);
size_t num_cache_lines = round_up_size / base::system_details::CACHE_LINE_SIZE;
char *result = memory_at_offset(current_offset_);
// Move head to next aligned position after new object
current_offset_ += num_cache_lines;
PLS_ASSERT(current_offset_ <= max_offset_,
"Tried to allocate object on alligned_stack without sufficient memory!");
return result;
}
}
}
}
add_executable(tests
main.cpp
data_structures_test.cpp
base_tests.cpp
scheduling_tests.cpp
patterns_test.cpp
test_helpers.h scheduling_lock_free_tests.cpp)
test_helpers.h
scheduling_lock_free_tests.cpp)
target_link_libraries(tests catch2 pls)
#include <catch.hpp>
#include <mutex>
#include "pls/internal/base/system_details.h"
#include "pls/internal/data_structures/aligned_stack.h"
using namespace pls::internal::data_structures;
using namespace pls::internal::base;
using namespace std;
// Forward Declaration
void test_stack(aligned_stack &stack);
TEST_CASE("aligned stack stores objects correctly", "[internal/data_structures/aligned_stack.h]") {
constexpr long data_size = 1024;
SECTION("plain aligned stack") {
alignas(system_details::CACHE_LINE_SIZE) char data[data_size];
aligned_stack stack{data, data_size, data_size};
test_stack(stack);
}
SECTION("static aligned stack") {
static_aligned_stack<data_size> stack;
test_stack(stack.get_stack());
}
SECTION("heap aligned stack") {
heap_aligned_stack stack{data_size};
test_stack(stack.get_stack());
}
}
void test_stack(aligned_stack &stack) {
SECTION("stack correctly pushes sub linesize objects") {
std::array<char, 5> small_data_one{'a', 'b', 'c', 'd', 'e'};
std::array<char, 64> small_data_two{};
std::array<char, 1> small_data_three{'A'};
auto pointer_one = stack.push<decltype(small_data_one)>(small_data_one);
auto pointer_two = stack.push<decltype(small_data_two)>(small_data_two);
auto pointer_three = stack.push<decltype(small_data_three)>(small_data_three);
REQUIRE(reinterpret_cast<std::uintptr_t>(pointer_one) % system_details::CACHE_LINE_SIZE == 0);
REQUIRE(reinterpret_cast<std::uintptr_t>(pointer_two) % system_details::CACHE_LINE_SIZE == 0);
REQUIRE(reinterpret_cast<std::uintptr_t>(pointer_three) % system_details::CACHE_LINE_SIZE == 0);
}
SECTION("stack correctly pushes above linesize objects") {
std::array<char, 5> small_data_one{'a', 'b', 'c', 'd', 'e'};
std::array<char, system_details::CACHE_LINE_SIZE + 10> big_data_one{};
auto big_pointer_one = stack.push<decltype(big_data_one)>(big_data_one);
auto small_pointer_one = stack.push<decltype(small_data_one)>(small_data_one);
REQUIRE(reinterpret_cast<std::uintptr_t>(big_pointer_one) % system_details::CACHE_LINE_SIZE == 0);
REQUIRE(reinterpret_cast<std::uintptr_t>(small_pointer_one) % system_details::CACHE_LINE_SIZE == 0);
}
SECTION("stack correctly stores and retrieves objects") {
std::array<char, 5> data_one{'a', 'b', 'c', 'd', 'e'};
auto *push_one = stack.push<decltype(data_one)>(data_one);
stack.pop<std::array<char, 5>>();
auto *push_two = stack.push<decltype(data_one)>(data_one);
REQUIRE(push_one == push_two);
}
SECTION("stack can push and pop multiple times with correct alignment") {
std::array<char, 5> small_data_one{'a', 'b', 'c', 'd', 'e'};
std::array<char, 64> small_data_two{};
std::array<char, 1> small_data_three{'A'};
auto pointer_one = stack.push<decltype(small_data_one)>(small_data_one);
auto pointer_two = stack.push<decltype(small_data_two)>(small_data_two);
auto pointer_three = stack.push<decltype(small_data_three)>(small_data_three);
stack.pop<decltype(small_data_three)>();
stack.pop<decltype(small_data_two)>();
auto pointer_four = stack.push<decltype(small_data_two)>(small_data_two);
auto pointer_five = stack.push<decltype(small_data_three)>(small_data_three);
REQUIRE(reinterpret_cast<std::uintptr_t>(pointer_one) % system_details::CACHE_LINE_SIZE == 0);
REQUIRE(reinterpret_cast<std::uintptr_t>(pointer_two) % system_details::CACHE_LINE_SIZE == 0);
REQUIRE(reinterpret_cast<std::uintptr_t>(pointer_three) % system_details::CACHE_LINE_SIZE == 0);
REQUIRE(reinterpret_cast<std::uintptr_t>(pointer_four) % system_details::CACHE_LINE_SIZE == 0);
REQUIRE(reinterpret_cast<std::uintptr_t>(pointer_five) % system_details::CACHE_LINE_SIZE == 0);
REQUIRE(pointer_four == pointer_two);
REQUIRE(pointer_five == pointer_three);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment