diff --git a/lib/pls/include/pls/internal/base/alignment.h b/lib/pls/include/pls/internal/base/alignment.h index 777c72f..71aa7a0 100644 --- a/lib/pls/include/pls/internal/base/alignment.h +++ b/lib/pls/include/pls/internal/base/alignment.h @@ -13,9 +13,9 @@ namespace internal { namespace base { namespace alignment { -system_details::pointer_t next_alignment(system_details::pointer_t size); +constexpr system_details::pointer_t next_alignment(system_details::pointer_t size); +constexpr system_details::pointer_t previous_alignment(system_details::pointer_t size); char *next_alignment(char *pointer); -system_details::pointer_t previous_alignment(system_details::pointer_t size); /** * Forces alignment requirements on a type equal to a cache line size. diff --git a/lib/pls/include/pls/internal/base/alignment_impl.h b/lib/pls/include/pls/internal/base/alignment_impl.h index 2ec6a7b..a734a40 100644 --- a/lib/pls/include/pls/internal/base/alignment_impl.h +++ b/lib/pls/include/pls/internal/base/alignment_impl.h @@ -23,7 +23,7 @@ constexpr system_details::pointer_t previous_alignment(system_details::pointer_t size - (size % system_details::CACHE_LINE_SIZE); } -constexpr char *next_alignment(char *pointer) { +char *next_alignment(char *pointer) { return reinterpret_cast(next_alignment(reinterpret_cast(pointer))); } diff --git a/lib/pls/include/pls/internal/scheduling/cont_manager.h b/lib/pls/include/pls/internal/scheduling/cont_manager.h index 1290921..b7f33f2 100644 --- a/lib/pls/include/pls/internal/scheduling/cont_manager.h +++ b/lib/pls/include/pls/internal/scheduling/cont_manager.h @@ -3,7 +3,7 @@ #define PLS_CONT_MANAGER_H_ #include -#include +#include #include #include "pls/internal/data_structures/aligned_stack.h" @@ -42,11 +42,11 @@ class cont_manager { continuation_node *cont_chain_start, continuation_node *prev) { // Represents one cont node and its corresponding memory buffer (as one continuous block of memory). - using cont_node_memory_pair = std::tuple>; - char *tuple_memory = cont_storage.push_bytes(); - char *cont_node_address = tuple_memory; - char *cont_node_memory_address = tuple_memory + sizeof(continuation_node); + char *pair_memory = cont_storage.push_bytes(); + char *cont_node_address = pair_memory; + char *cont_node_memory_address = pair_memory + sizeof(continuation_node); return new(cont_node_address) continuation_node(cont_node_memory_address, cont_chain_start, prev); } diff --git a/lib/pls/include/pls/internal/scheduling/task.h b/lib/pls/include/pls/internal/scheduling/task.h index 46abf95..f5c351c 100644 --- a/lib/pls/include/pls/internal/scheduling/task.h +++ b/lib/pls/include/pls/internal/scheduling/task.h @@ -2,8 +2,8 @@ #ifndef PLS_TASK_H #define PLS_TASK_H -#include "pls/internal/scheduling/task_manager.h" #include "pls/internal/scheduling/thread_state.h" +#include "pls/internal/scheduling/continuation.h" namespace pls { namespace internal { @@ -15,26 +15,11 @@ namespace scheduling { * * Override the execute_internal() method for your custom code. */ -class task { +class base_task { friend class scheduler; - // TODO: Add ref to continuation - task_manager::task_manager_state task_manager_state_; - protected: - explicit task(); - - /** - * Allow to allocate extra memory during run-time for this task. - * Memory will be pushed onto the stack (in aligned memory, thus avoid many small chunks). - * - * Memory is fully self managed. Calling e.g. de-constructors when not needing objects - * anymore is the users responsibility (memory is simply re-used after the life time of the task ends). - * - * @param size Number of bytes to be allocated - * @return The allocated memory region - */ - void *allocate_memory(long size); + base_task() = default; /** * Overwrite this with the actual behaviour of concrete tasks. @@ -42,7 +27,27 @@ class task { virtual void execute_internal() = 0; private: - void execute(); + void execute() { + // TODO: Figure out slow path execution + execute_internal(); + } +}; + +template +class task : public base_task { + public: + template + explicit task(FARG &&function, continuation *continuation) + : base_task{}, function_{std::forward(function)}, continuation_{continuation} {} + + void execute_internal() override { + continuation_->store_result_2(function_()); + // TODO: Properly notify continuation on slow path + } + + private: + F function_; + continuation *continuation_; }; } diff --git a/lib/pls/include/pls/internal/scheduling/task_manager.h b/lib/pls/include/pls/internal/scheduling/task_manager.h index 7a0aeb6..d31a2a8 100644 --- a/lib/pls/include/pls/internal/scheduling/task_manager.h +++ b/lib/pls/include/pls/internal/scheduling/task_manager.h @@ -3,15 +3,26 @@ #define PLS_TASK_MANAGER_H_ #include +#include +#include +#include -#include "pls/internal/data_structures/aligned_stack.h" +#include "pls/internal/scheduling/task.h" +#include "pls/internal/data_structures/stamped_integer.h" +#include "task.h" namespace pls { namespace internal { namespace scheduling { -// TODO: Remove forward references -class task; +struct task_handle { + public: + enum state { uninitialized, initialized, execute_local, stealing, execute_remote, finished }; + using stamped_state = data_structures::stamped_integer; + + std::atomic stamped_state_{uninitialized}; + base_task *task_; +}; /** * Handles management of tasks in the system. Each thread has a local task manager, @@ -22,42 +33,65 @@ class task; * integrate the memory management into the stealing procedure. */ class task_manager { - using task_manager_offset = data_structures::aligned_stack::stack_offset; - public: - // Data each task needs to store to enable the 'return_task' functionality. - using task_manager_state = task_manager_offset; - - // Construct a task onto the stack. Stores the previous offset in the newly constructed task. - template - T *push_task(ARGS ...args); // Publishes a task on the stack, i.e. makes it visible for other threads to steal. - void publish_task(task *task); - // Return a no longer needed task to the stack. Must be the current most top task (will reset the stack pointer). - void return_task(task *task); + // The task itself is located on the stack of the worker, as the stealer will copy it away before it is freed. + void publish_task(base_task &task) { + task_handle_stack_[tail_internal_].task_ = &task; + task_handle_stack_[tail_internal_].stamped_state_.store({stamp_internal_++, task_handle::initialized}, + std::memory_order_relaxed); + tail_internal_++; + tail_.store(tail_internal_, std::memory_order_release); // Linearization point, handle is published here + } // Try to pop a local task from this task managers stack. - task *pop_local_task(); - // Try to steal a task from a remote task_manager instance. - // The returned task pointer is valid during the lifetyme of the task. - // The returned task pointer must be returned to this task_manager instance. - // (This is because we can either decide to just steal a remote task pointer or to copy the whole task) - task *pop_remote_task(task_manager &other); + // This should only be required on the fast path of the implementation, + // thus only returning if the operation was a success. + // Essentially this is an 'un-publish' of a task with a notion if it was successful. + bool steal_local_task() { + tail_internal_--; + tail_.store(tail_internal_, std::memory_order_relaxed); + + task_handle::stamped_state swapped_state{task_handle::execute_local, stamp_internal_++}; + task_handle_stack_[tail_internal_].stamped_state_.exchange(swapped_state, std::memory_order_acq_rel); + + if (swapped_state.value == task_handle::execute_remote || + swapped_state.value == task_handle::finished) { + // Someone got the other task, return to 'non linear' execution path + // TODO: Properly handle slow path + return false; + } else { + // No one got the task so far, we are happy and continue our fast path + return true; + } + } + + // Try to steal a task from a remote task_manager instance. The stolen task must be stored locally. + // Returns a pair containing the actual task and if the steal was successful. + // TODO: Re-implement after fast path is done +// std::pair steal_remote_task(task_manager &other); - explicit task_manager(data_structures::aligned_stack &task_stack) : task_stack_{task_stack} {} + explicit task_manager(task_handle *task_handle_stack) : task_handle_stack_{task_handle_stack}, + head_{{0}}, + tail_{0}, + tail_internal_{0}, + stamp_internal_{0} {} private: - data_structures::aligned_stack &task_stack_; + task_handle *task_handle_stack_; + alignas(base::system_details::CACHE_LINE_SIZE) std::atomic> head_; + alignas(base::system_details::CACHE_LINE_SIZE) std::atomic tail_; + alignas(base::system_details::CACHE_LINE_SIZE) unsigned int tail_internal_, stamp_internal_; }; template class static_task_manager { public: - static_task_manager() : static_task_stack_{}, task_manager_{static_task_stack_} {}; + static_task_manager() : static_task_handle_stack_{}, task_manager_{static_task_handle_stack_.data()} {}; task_manager &get_task_manager() { return task_manager_; } private: - data_structures::static_aligned_stack static_task_stack_; + std::array static_task_handle_stack_; task_manager task_manager_; };