Commit 39d2fbd8 by FritzFlorian

WIP: Sketch fast path of task manager.

parent d3b64a85
Pipeline #1334 failed with stages
in 25 seconds
......@@ -13,9 +13,9 @@ namespace internal {
namespace base {
namespace alignment {
system_details::pointer_t next_alignment(system_details::pointer_t size);
constexpr system_details::pointer_t next_alignment(system_details::pointer_t size);
constexpr system_details::pointer_t previous_alignment(system_details::pointer_t size);
char *next_alignment(char *pointer);
system_details::pointer_t previous_alignment(system_details::pointer_t size);
* Forces alignment requirements on a type equal to a cache line size.
......@@ -23,7 +23,7 @@ constexpr system_details::pointer_t previous_alignment(system_details::pointer_t
size - (size % system_details::CACHE_LINE_SIZE);
constexpr char *next_alignment(char *pointer) {
char *next_alignment(char *pointer) {
return reinterpret_cast<char *>(next_alignment(reinterpret_cast<system_details::pointer_t >(pointer)));
......@@ -3,7 +3,7 @@
#include <memory>
#include <tuple>
#include <utility>
#include <array>
#include "pls/internal/data_structures/aligned_stack.h"
......@@ -42,11 +42,11 @@ class cont_manager {
continuation_node *cont_chain_start,
continuation_node *prev) {
// Represents one cont node and its corresponding memory buffer (as one continuous block of memory).
using cont_node_memory_pair = std::tuple<continuation_node,
using cont_node_memory_pair = std::pair<continuation_node,
std::array<char, MAX_CONT_SIZE - sizeof(continuation_node)>>;
char *tuple_memory = cont_storage.push_bytes<cont_node_memory_pair>();
char *cont_node_address = tuple_memory;
char *cont_node_memory_address = tuple_memory + sizeof(continuation_node);
char *pair_memory = cont_storage.push_bytes<cont_node_memory_pair>();
char *cont_node_address = pair_memory;
char *cont_node_memory_address = pair_memory + sizeof(continuation_node);
return new(cont_node_address) continuation_node(cont_node_memory_address, cont_chain_start, prev);
......@@ -2,8 +2,8 @@
#ifndef PLS_TASK_H
#define PLS_TASK_H
#include "pls/internal/scheduling/task_manager.h"
#include "pls/internal/scheduling/thread_state.h"
#include "pls/internal/scheduling/continuation.h"
namespace pls {
namespace internal {
......@@ -15,26 +15,11 @@ namespace scheduling {
* Override the execute_internal() method for your custom code.
class task {
class base_task {
friend class scheduler;
// TODO: Add ref to continuation
task_manager::task_manager_state task_manager_state_;
explicit task();
* Allow to allocate extra memory during run-time for this task.
* Memory will be pushed onto the stack (in aligned memory, thus avoid many small chunks).
* Memory is fully self managed. Calling e.g. de-constructors when not needing objects
* anymore is the users responsibility (memory is simply re-used after the life time of the task ends).
* @param size Number of bytes to be allocated
* @return The allocated memory region
void *allocate_memory(long size);
base_task() = default;
* Overwrite this with the actual behaviour of concrete tasks.
......@@ -42,7 +27,27 @@ class task {
virtual void execute_internal() = 0;
void execute();
void execute() {
// TODO: Figure out slow path execution
template<typename F, typename R1, typename R2, typename CF>
class task : public base_task {
template<typename FARG>
explicit task(FARG &&function, continuation<R1, R2, CF> *continuation)
: base_task{}, function_{std::forward<FARG>(function)}, continuation_{continuation} {}
void execute_internal() override {
// TODO: Properly notify continuation on slow path
F function_;
continuation<R1, R2, CF> *continuation_;
......@@ -3,15 +3,26 @@
#include <memory>
#include <utility>
#include <array>
#include <atomic>
#include "pls/internal/data_structures/aligned_stack.h"
#include "pls/internal/scheduling/task.h"
#include "pls/internal/data_structures/stamped_integer.h"
#include "task.h"
namespace pls {
namespace internal {
namespace scheduling {
// TODO: Remove forward references
class task;
struct task_handle {
enum state { uninitialized, initialized, execute_local, stealing, execute_remote, finished };
using stamped_state = data_structures::stamped_integer;
std::atomic<stamped_state> stamped_state_{uninitialized};
base_task *task_;
* Handles management of tasks in the system. Each thread has a local task manager,
......@@ -22,42 +33,65 @@ class task;
* integrate the memory management into the stealing procedure.
class task_manager {
using task_manager_offset = data_structures::aligned_stack::stack_offset;
// Data each task needs to store to enable the 'return_task' functionality.
using task_manager_state = task_manager_offset;
// Construct a task onto the stack. Stores the previous offset in the newly constructed task.
template<class T, typename ...ARGS>
T *push_task(ARGS ...args);
// Publishes a task on the stack, i.e. makes it visible for other threads to steal.
void publish_task(task *task);
// Return a no longer needed task to the stack. Must be the current most top task (will reset the stack pointer).
void return_task(task *task);
// The task itself is located on the stack of the worker, as the stealer will copy it away before it is freed.
void publish_task(base_task &task) {
task_handle_stack_[tail_internal_].task_ = &task;
task_handle_stack_[tail_internal_]{stamp_internal_++, task_handle::initialized},
tail_internal_++;, std::memory_order_release); // Linearization point, handle is published here
// Try to pop a local task from this task managers stack.
task *pop_local_task();
// Try to steal a task from a remote task_manager instance.
// The returned task pointer is valid during the lifetyme of the task.
// The returned task pointer must be returned to this task_manager instance.
// (This is because we can either decide to just steal a remote task pointer or to copy the whole task)
task *pop_remote_task(task_manager &other);
// This should only be required on the fast path of the implementation,
// thus only returning if the operation was a success.
// Essentially this is an 'un-publish' of a task with a notion if it was successful.
bool steal_local_task() {
tail_internal_--;, std::memory_order_relaxed);
task_handle::stamped_state swapped_state{task_handle::execute_local, stamp_internal_++};
task_handle_stack_[tail_internal_], std::memory_order_acq_rel);
if (swapped_state.value == task_handle::execute_remote ||
swapped_state.value == task_handle::finished) {
// Someone got the other task, return to 'non linear' execution path
// TODO: Properly handle slow path
return false;
} else {
// No one got the task so far, we are happy and continue our fast path
return true;
// Try to steal a task from a remote task_manager instance. The stolen task must be stored locally.
// Returns a pair containing the actual task and if the steal was successful.
// TODO: Re-implement after fast path is done
// std::pair<task, bool> steal_remote_task(task_manager &other);
explicit task_manager(data_structures::aligned_stack &task_stack) : task_stack_{task_stack} {}
explicit task_manager(task_handle *task_handle_stack) : task_handle_stack_{task_handle_stack},
stamp_internal_{0} {}
data_structures::aligned_stack &task_stack_;
task_handle *task_handle_stack_;
alignas(base::system_details::CACHE_LINE_SIZE) std::atomic<std::atomic<data_structures::stamped_integer>> head_;
alignas(base::system_details::CACHE_LINE_SIZE) std::atomic<unsigned int> tail_;
alignas(base::system_details::CACHE_LINE_SIZE) unsigned int tail_internal_, stamp_internal_;
template<size_t NUM_TASKS, size_t MAX_STACK_SIZE>
class static_task_manager {
static_task_manager() : static_task_stack_{}, task_manager_{static_task_stack_} {};
static_task_manager() : static_task_handle_stack_{}, task_manager_{} {};
task_manager &get_task_manager() { return task_manager_; }
data_structures::static_aligned_stack<MAX_STACK_SIZE> static_task_stack_;
std::array<task_handle, NUM_TASKS> static_task_handle_stack_;
task_manager task_manager_;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment