Commit e2092e63 by FritzFlorian

Add custom context switch library.

The rationale to do an custom implementation is that the existing solutions are quite a bit slower and/or require more memory.
parent af75e21a
Pipeline #1382 failed with stages
in 36 seconds
...@@ -32,6 +32,7 @@ add_subdirectory(extern/benchmark_base) ...@@ -32,6 +32,7 @@ add_subdirectory(extern/benchmark_base)
add_subdirectory(extern/benchmark_runner) add_subdirectory(extern/benchmark_runner)
# Include all internal subprojects (library, examples, testing). # Include all internal subprojects (library, examples, testing).
add_subdirectory(lib/context_switcher)
add_subdirectory(lib/pls) add_subdirectory(lib/pls)
# Include examples # Include examples
......
...@@ -15,7 +15,7 @@ const size_t STACK_SIZE = 512 * 1; ...@@ -15,7 +15,7 @@ const size_t STACK_SIZE = 512 * 1;
const char MAGIC_NUMBER = (unsigned char) 0xAB; const char MAGIC_NUMBER = (unsigned char) 0xAB;
// Memory for custom stack and continuation semantics // Memory for custom stack and continuation semantics
char custom_stack[STACK_SIZE] = {0}; char custom_stack_1[STACK_SIZE] = {0};
jmp_buf buffer; jmp_buf buffer;
// Example callback function and declaration of our assembly stack switching routine // Example callback function and declaration of our assembly stack switching routine
...@@ -50,7 +50,7 @@ long measure_function_call() { ...@@ -50,7 +50,7 @@ long measure_function_call() {
long measure_stack_switch() { long measure_stack_switch() {
auto start_time = chrono::steady_clock::now(); auto start_time = chrono::steady_clock::now();
for (unsigned int i = 0; i < NUM_RUNS; i++) { for (unsigned int i = 0; i < NUM_RUNS; i++) {
custom_stack_callback(&custom_stack[STACK_SIZE]); custom_stack_callback(&custom_stack_1[STACK_SIZE]);
} }
auto end_time = chrono::steady_clock::now(); auto end_time = chrono::steady_clock::now();
return chrono::duration_cast<chrono::nanoseconds>(end_time - start_time).count(); return chrono::duration_cast<chrono::nanoseconds>(end_time - start_time).count();
...@@ -60,7 +60,7 @@ long measure_continuation() { ...@@ -60,7 +60,7 @@ long measure_continuation() {
auto start_time = chrono::steady_clock::now(); auto start_time = chrono::steady_clock::now();
for (unsigned int i = 0; i < NUM_RUNS; i++) { for (unsigned int i = 0; i < NUM_RUNS; i++) {
if (setjmp(buffer) == 0) { if (setjmp(buffer) == 0) {
custom_stack_callback(&custom_stack[STACK_SIZE]); custom_stack_callback(&custom_stack_1[STACK_SIZE]);
} }
} }
auto end_time = chrono::steady_clock::now(); auto end_time = chrono::steady_clock::now();
...@@ -71,7 +71,7 @@ long measure_continuation_and_jump() { ...@@ -71,7 +71,7 @@ long measure_continuation_and_jump() {
auto start_time = chrono::steady_clock::now(); auto start_time = chrono::steady_clock::now();
for (unsigned int i = 0; i < NUM_RUNS; i++) { for (unsigned int i = 0; i < NUM_RUNS; i++) {
if (setjmp(buffer) == 0) { if (setjmp(buffer) == 0) {
custom_stack_callback(&custom_stack[STACK_SIZE]); custom_stack_callback(&custom_stack_1[STACK_SIZE]);
longjmp(buffer, 1); longjmp(buffer, 1);
} }
} }
...@@ -87,7 +87,7 @@ void fcontext_callback_fast(fcontext_transfer_t transfer) { ...@@ -87,7 +87,7 @@ void fcontext_callback_fast(fcontext_transfer_t transfer) {
} }
long measure_fcontext_fast() { long measure_fcontext_fast() {
fcontext_t context = make_fcontext(&custom_stack[STACK_SIZE], STACK_SIZE, &fcontext_callback_fast); fcontext_t context = make_fcontext(&custom_stack_1[STACK_SIZE], STACK_SIZE, &fcontext_callback_fast);
auto start_time = chrono::steady_clock::now(); auto start_time = chrono::steady_clock::now();
for (unsigned int i = 0; i < NUM_RUNS; i++) { for (unsigned int i = 0; i < NUM_RUNS; i++) {
...@@ -105,7 +105,7 @@ void fcontext_callback_clean(fcontext_transfer_t transfer) { ...@@ -105,7 +105,7 @@ void fcontext_callback_clean(fcontext_transfer_t transfer) {
long measure_fcontext_clean() { long measure_fcontext_clean() {
auto start_time = chrono::steady_clock::now(); auto start_time = chrono::steady_clock::now();
for (unsigned int i = 0; i < NUM_RUNS; i++) { for (unsigned int i = 0; i < NUM_RUNS; i++) {
fcontext_t context = make_fcontext(&custom_stack[STACK_SIZE], STACK_SIZE, &fcontext_callback_clean); fcontext_t context = make_fcontext(&custom_stack_1[STACK_SIZE], STACK_SIZE, &fcontext_callback_clean);
jump_fcontext(context, nullptr); jump_fcontext(context, nullptr);
} }
auto end_time = chrono::steady_clock::now(); auto end_time = chrono::steady_clock::now();
...@@ -120,7 +120,7 @@ void fcontext_callcc(fcontext_transfer_t transfer) { ...@@ -120,7 +120,7 @@ void fcontext_callcc(fcontext_transfer_t transfer) {
long measure_fcontext_callcc() { long measure_fcontext_callcc() {
auto start_time = chrono::steady_clock::now(); auto start_time = chrono::steady_clock::now();
for (unsigned int i = 0; i < NUM_RUNS; i++) { for (unsigned int i = 0; i < NUM_RUNS; i++) {
fcontext_t context = make_fcontext(&custom_stack[STACK_SIZE], STACK_SIZE, &fcontext_callcc); fcontext_t context = make_fcontext(&custom_stack_1[STACK_SIZE], STACK_SIZE, &fcontext_callcc);
jump_fcontext(jump_fcontext(context, nullptr).ctx, nullptr); jump_fcontext(jump_fcontext(context, nullptr).ctx, nullptr);
} }
auto end_time = chrono::steady_clock::now(); auto end_time = chrono::steady_clock::now();
...@@ -132,7 +132,7 @@ long measure_custom() { ...@@ -132,7 +132,7 @@ long measure_custom() {
auto start_time = chrono::steady_clock::now(); auto start_time = chrono::steady_clock::now();
for (unsigned int i = 0; i < NUM_RUNS; i++) { for (unsigned int i = 0; i < NUM_RUNS; i++) {
fiber_call(custom_stack, STACK_SIZE, [](continuation_t continuation) { fiber_call(custom_stack_1, STACK_SIZE, [](continuation_t continuation) {
callback(); callback();
return continuation; return continuation;
}); });
...@@ -142,7 +142,7 @@ long measure_custom() { ...@@ -142,7 +142,7 @@ long measure_custom() {
} }
int main() { int main() {
memset(custom_stack, MAGIC_NUMBER, STACK_SIZE); memset(custom_stack_1, MAGIC_NUMBER, STACK_SIZE);
auto time_cont_jump = measure_continuation_and_jump(); auto time_cont_jump = measure_continuation_and_jump();
auto time_cont = measure_continuation(); auto time_cont = measure_continuation();
...@@ -169,7 +169,7 @@ int main() { ...@@ -169,7 +169,7 @@ int main() {
printf("Custom Fast Call : %10ld, %5.5f\n", time_custom, ((float) time_custom / NUM_RUNS)); printf("Custom Fast Call : %10ld, %5.5f\n", time_custom, ((float) time_custom / NUM_RUNS));
for (unsigned int i = 0; i < STACK_SIZE; i++) { for (unsigned int i = 0; i < STACK_SIZE; i++) {
if (custom_stack[i] != MAGIC_NUMBER) { if (custom_stack_1[i] != MAGIC_NUMBER) {
printf("\n\nUsed stack size about %u bytes.\n", (STACK_SIZE - i)); printf("\n\nUsed stack size about %u bytes.\n", (STACK_SIZE - i));
break; break;
} }
......
add_executable(playground main.cpp) add_executable(playground main.cpp)
# Example for adding the library to your app (as a cmake project dependency) # Example for adding the library to your app (as a cmake project dependency)
target_link_libraries(playground) target_link_libraries(playground context_switcher)
#include <utility>
#include <cstdio> #include <cstdio>
#include <csetjmp>
#include "context_switcher/context_switcher.h"
// Memory for custom stack and continuation semantics
const size_t STACK_SIZE = 512 * 8;
char custom_stack_1[STACK_SIZE];
// Force disable optimization
volatile int value = 0;
int main() { int main() {
printf("Buffer Size: %u\n", sizeof(jmp_buf)); using namespace context_switcher;
printf("Big Buffer Size: %u\n", sizeof(sigjmp_buf));
printf("Main 1!\n");
auto cont_2 = enter_context(custom_stack_1, STACK_SIZE, [](continuation &&cont_main) {
printf("Stack 1!\n");
cont_main = switch_context(std::move(cont_main));
printf("Stack 2!\n");
return std::move(cont_main);
});
printf("Main 2!\n");
cont_2 = switch_context(std::move(cont_2));
printf("Main 3!\n");
return 0; return 0;
} }
cmake_minimum_required(VERSION 3.10)
project(context_switcher
VERSION 0.0.1
DESCRIPTION "allows to execute functions and lambdas on a new stack and switch between them"
LANGUAGES CXX ASM)
set(CMAKE_CXX_STANDARD 11)
# Platform Support - Edit this when porting the context switch facility.
# We are rather conservative with our flags, maybe more systems follow implemented calling conventions.
# See our documentation and boost context for help when adding a new system.
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND CMAKE_SYSTEM_NAME STREQUAL "Linux")
SET(CONTEXT_SWITCH_ASSEMBLY "asm/enter_context_x86_64.s" "asm/switch_context_x86_64.s")
elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l" AND CMAKE_SYSTEM_NAME STREQUAL "Linux")
SET(CONTEXT_SWITCH_ASSEMBLY "asm/enter_context_arm32.s" "asm/switch_context_arm32.s")
else ()
MESSAGE(FATAL_ERROR "Platform (${CMAKE_SYSTEM_PROCESSOR} on ${CMAKE_SYSTEM_NAME}) not supported! Please see Readme for instructions to port.")
endif ()
message("-- Context Switcher: ${CMAKE_SYSTEM_PROCESSOR} running ${CMAKE_SYSTEM_NAME}")
add_library(context_switcher STATIC
${CONTEXT_SWITCH_ASSEMBLY}
include/context_switcher/context_switcher.h src/context_switcher.cpp include/context_switcher/assembly_bindings.h include/context_switcher/continuation.h include/context_switcher/lambda_capture.h)
# Add everything in `./include` to be in the include path of this project
target_include_directories(context_switcher
PUBLIC
$<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src
)
.arm
.text
.global __cs_enter_context
.type __cs_enter_context, %function
__cs_enter_context:
/* Parameter List (in order)
* r0 = new stack pointer
* r1 = first parameter to callback
* r2 = callback function pointer
* r3 = new stack limit (not used on most platforms)
*
* Return
* r0 = continuation that returned control back to the caller (null if fallthrough)
*
* Variables
* r4 = temporary for the old stack pointer */
/* ========== Save State ========== */
/* store programm counter for later return */
push {lr}
/* store callee saved registers */
push {r4-r12,lr}
/* ========== Save State ========== */
/* Perform change to new stack */
/* Keep old stack as second parameter to our callback function. */
mov r4, sp
/* Make sure that stack start is properly aligned. */
and r0, r0, #-16
/* Switch to new stack pointer. */
mov sp, r0
/* Perform actual function call, this will now be on the new stack */
/* r0 = first parametor to callback (continuation) */
/* r1 = second parameter to callback (arbetary pointer) */
mov r0, r4
blx r2
/* Restore state of returned continuation. */
/* To do so we first reset the stack pointer (which we get returned in r0). */
/* After that we execute our standard restore procedere to pop the state from the stack. */
mov sp, r0
/* ========== Restore State ========== */
/* restore callee saved registers */
pop {r4-r12,lr}
/* ========== Restore State ========== */
/* Just return back from the call. */
/* This is the end of a fiber, so we have no continuation. */
eor r0, r0, r0
pop {pc}
.file "enter_context_x86_64.s"
.text
.global __cs_enter_context
.type __cs_enter_context, @function
.align 16
__cs_enter_context:
# Parameter List (in order)
# rdi = new stack pointer
# rsi = first parameter to callback
# rdx = callback function pointer
# rcx = new stack limit (not used on most platforms)
# Return
# rax = continuation that returned control back to the caller (null if fallthrough)
# Variables
# r12 = temporary for the old stack pointer
############### Save State ###############
# Make space for all register state we will store.
leaq -0x38(%rsp), %rsp
# Store calee saved general registers.
movq %r12, 0x00(%rsp)
movq %r13, 0x08(%rsp)
movq %r14, 0x10(%rsp)
movq %r15, 0x18(%rsp)
movq %rbx, 0x20(%rsp)
movq %rbp, 0x28(%rsp)
# Store MMX control- and status-word
stmxcsr 0x30(%rsp)
# Store x87 control-word
fnstcw 0x34(%rsp)
############### Save State ###############
# Perform change to new stack.
# Keep old stack as second parameter to our callback function.
movq %rsp, %r12
# Make sure that stack start is properly aligned.
andq $-16, %rdi
# Switch to new stack pointer.
movq %rdi, %rsp
# Perform actual function call, this will now be on the new stack
# rdi = first parametor to callback (continuation)
# rsi = second parameter to callback (arbetary pointer)
movq %r12, %rdi
call *%rdx
# Restore state of returned continuation.
# To do so we first reset the stack pointer (which we get returned in rax).
# After that we execute our standard restore procedere to pop the state from the stack.
movq %rax, %rsp
############ Restore State ############
# restore calee saved general registers
movq 0x00(%rsp), %r12
movq 0x08(%rsp), %r13
movq 0x10(%rsp), %r14
movq 0x18(%rsp), %r15
movq 0x20(%rsp), %rbx
movq 0x28(%rsp), %rbp
# restore MMX control- and status-word
ldmxcsr 0x30(%rsp)
# restore x87 control-word
fldcw 0x34(%rsp)
# Free space for restored state
leaq 0x38(%rsp), %rsp
############ Restore State ############
# TODO: Maybe look into a 'cleanup' hook for freeing the stack space here.
# Just return back from the call.
# This is the end of a fiber, so we have no continuation.
xor %rax, %rax
ret
.arm
.text
.global __cs_switch_context
.type __cs_switch_context, %function
__cs_switch_context:
/* Parameter List (in order)
* r0 = pointer to continuation (should hold value of target stack will be filled with this continuation)
*
* Return
* r0 = continuation that returned control back to the caller (null if fallthrough)
*
* Variables
* r1 = temporary for the old stack pointer */
/* ========== Save State ========== */
/* store programm counter for later return */
push {lr}
/* store callee saved registers */
push {r4-r12,lr}
/* ========== Save State ========== */
/* Perform change to new stack */
/* Keep old stack as result from this function. */
mov r1, sp
/* Switch to new stack pointer. */
mov sp, r0
/* ========== Restore State ========== */
/* restore callee saved registers */
pop {r4-r12,lr}
/* ========== Restore State ========== */
/* Just return back from the call. */
/* This is the end of a fiber, so we have no continuation. */
mov r0, r1
pop {pc}
.file "switch_context_x86_64.s"
.text
.global __cs_switch_context
.type __cs_switch_context, @function
.align 16
__cs_switch_context:
# Parameter List (in order)
# rdi = pointer to continuation (should hold value of target stack will be filled with this continuation)
# Return
# rax = continuation that returned control back to the caller (null if fallthrough)
############### Save State ###############
# Make space for all register state we will store.
leaq -0x38(%rsp), %rsp
# Store calee saved general registers.
movq %r12, 0x00(%rsp)
movq %r13, 0x08(%rsp)
movq %r14, 0x10(%rsp)
movq %r15, 0x18(%rsp)
movq %rbx, 0x20(%rsp)
movq %rbp, 0x28(%rsp)
# Store MMX control- and status-word
stmxcsr 0x30(%rsp)
# Store x87 control-word
fnstcw 0x34(%rsp)
############### Save State ###############
# Perform change to new stack.
# Keep old stack as result from this function
movq %rsp, %rax
# switch to new stack pointer
movq %rdi, %rsp
############ Restore State ############
# restore calee saved general registers
movq 0x00(%rsp), %r12
movq 0x08(%rsp), %r13
movq 0x10(%rsp), %r14
movq 0x18(%rsp), %r15
movq 0x20(%rsp), %rbx
movq 0x28(%rsp), %rbp
# restore MMX control- and status-word
ldmxcsr 0x30(%rsp)
# restore x87 control-word
fldcw 0x34(%rsp)
# Free space for restored state
leaq 0x38(%rsp), %rsp
############ Restore State ############
# Return the context we came from as a continuation.
# rax has already the correct value
ret
#ifndef CONTEXT_SWITCHER_ASSEMBLY_BINDINGS_H_
#define CONTEXT_SWITCHER_ASSEMBLY_BINDINGS_H_
/**
* Low level bindings to the two assembly functions used to switch context.
* Can be used standalone to build new control structures, but should be handled with care.
*
* Basic usage:
* // Switch control to a new stack.
* continuation_t cont = __cs_enter_context(my_stack, arg_pointer, callback_func, my_stack_limit);
* // Will only be run when 'jumped back' by either returning the callback or an explicit jump.
* // cont will then hold the context of the jump origin (or null if it was a return form a finished callback).
* cont = __switch_context(cont);
* // Re-Enter the context that jumped to us.
*/
namespace context_switcher {
namespace assembly_bindings {
using continuation_t = void *;
using stack_pointer_t = char *;
using callback_t = continuation_t (*)(continuation_t, void *);
extern "C" {
continuation_t __cs_enter_context(stack_pointer_t stack_base,
void *callback_arg,
callback_t callback,
stack_pointer_t stack_limit);
continuation_t __cs_switch_context(continuation_t continuation);
}
}
}
#endif //CONTEXT_SWITCHER_ASSEMBLY_BINDINGS_H_
#ifndef CONTEXT_SWITCHER_CONTEXT_SWITCHER_H_
#define CONTEXT_SWITCHER_CONTEXT_SWITCHER_H_
#include <utility>
#include <cstdio>
#include <cstdint>
#include <new>
#include "assembly_bindings.h"
#include "continuation.h"
#include "lambda_capture.h"
namespace context_switcher {
template<typename F>
continuation enter_context(assembly_bindings::stack_pointer_t stack_memory, size_t stack_size, F &&lambda) {
assembly_bindings::stack_pointer_t lambda_memory = stack_memory + stack_size - sizeof(lambda_capture<F>);
auto *captured_lambda = place_lambda_capture(std::forward<F>(lambda), lambda_memory);
assembly_bindings::stack_pointer_t stack_base = lambda_memory;
assembly_bindings::stack_pointer_t stack_limit = stack_memory;
assembly_bindings::callback_t callback = lambda_capture_callback<lambda_capture<F>>;
return continuation{assembly_bindings::__cs_enter_context(stack_base, captured_lambda, callback, stack_limit)};
}
continuation switch_context(continuation &&cont) {
assembly_bindings::continuation_t cont_pointer = cont.consume();
return continuation{assembly_bindings::__cs_switch_context(cont_pointer)};
}
}
#endif //CONTEXT_SWITCHER_CONTEXT_SWITCHER_H_
#ifndef CONTEXT_SWITCHER_CONTINUATION_H_
#define CONTEXT_SWITCHER_CONTINUATION_H_
#include "assembly_bindings.h"
namespace context_switcher {
/**
* One-Shot continuation. Represents the paused state of an execution thread.
* Can be used exactly once to jump back to that state.
*
* Move only to ensure semantics of one time use.
*/
struct continuation {
public:
continuation() : cont_pointer_{nullptr} {};
explicit continuation(assembly_bindings::continuation_t cont_pointer) : cont_pointer_{cont_pointer} {};
// Move-Only Semantics
continuation(const continuation &other) = delete;
continuation(continuation &&other) noexcept {
cont_pointer_ = other.cont_pointer_;
other.cont_pointer_ = nullptr;
}
continuation &operator=(const continuation &other) = delete;
continuation &operator=(continuation &&other) noexcept {
cont_pointer_ = other.cont_pointer_;
other.cont_pointer_ = nullptr;
return *this;
}
// Semantics as 'one time use'
bool valid() const {
return cont_pointer_ != nullptr;
}
assembly_bindings::continuation_t consume() {
auto tmp = cont_pointer_;
cont_pointer_ = nullptr;
return tmp;
}
private:
assembly_bindings::continuation_t cont_pointer_;
};
}
#endif //CONTEXT_SWITCHER_CONTINUATION_H_
#ifndef CONTEXT_SWITCHER_LAMBDA_CAPTURE_H_
#define CONTEXT_SWITCHER_LAMBDA_CAPTURE_H_
#include <utility>
#include <new>
#include <type_traits>
#include "assembly_bindings.h"
#include "continuation.h"
/**
* Helpers to more easily use a lambda expression as the code run on a context switch.
* Captures lambdas by placing them in a memory region and offering a conversion form
* the assembly_bindings callback API to the user's lambda.
*/
namespace context_switcher {
template<typename F>
struct lambda_capture {
explicit lambda_capture(F &&lambda) : lambda_{std::forward<F>(lambda)} {}
assembly_bindings::continuation_t operator()(assembly_bindings::continuation_t continuation_pointer) {
continuation cont = lambda_(continuation{continuation_pointer});
return cont.consume();
}
private:
F lambda_;
};
template<typename T>
assembly_bindings::continuation_t lambda_capture_callback(assembly_bindings::continuation_t continuation_pointer,
void *lambda_capture_param) {
// Perform Call
T *lambda_capture = reinterpret_cast<T *>(lambda_capture_param);
auto result_continuation = (*lambda_capture)(continuation_pointer);
// Free resources and switch to result_continuation (this execution thread is finished with the return)
lambda_capture->~T();
return result_continuation;
}
template<typename F>
static lambda_capture<F> *place_lambda_capture(F &&lambda, char *memory) {
return new(memory) lambda_capture<F>(std::forward<F>(lambda));
}
}
#endif //CONTEXT_SWITCHER_LAMBDA_CAPTURE_H_
...@@ -58,7 +58,7 @@ add_library(pls STATIC ...@@ -58,7 +58,7 @@ add_library(pls STATIC
include/pls/internal/scheduling/task.h src/internal/scheduling/task.cpp include/pls/internal/scheduling/task.h src/internal/scheduling/task.cpp
include/pls/internal/scheduling/cont_manager.h include/pls/internal/scheduling/cont_manager.h
include/pls/internal/scheduling/cont.h include/pls/internal/scheduling/cont.h
include/pls/internal/data_structures/bounded_ws_deque.h include/pls/internal/data_structures/optional.h include/pls/internal/scheduling/memory_block.h include/pls/internal/scheduling/thread_state_static.h src/internal/base/error_handling.cpp include/pls/internal/data_structures/bounded_trading_deque.h) include/pls/internal/data_structures/bounded_ws_deque.h include/pls/internal/data_structures/optional.h include/pls/internal/scheduling/memory_block.h include/pls/internal/scheduling/thread_state_static.h src/internal/base/error_handling.cpp include/pls/internal/data_structures/bounded_trading_deque.h ../context_switcher/src/context_switcher.cpp)
# Add everything in `./include` to be in the include path of this project # Add everything in `./include` to be in the include path of this project
target_include_directories(pls target_include_directories(pls
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment