diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 975ba41..c6485bf 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -18,10 +18,11 @@ run_tests: script: ./ci_scripts/run_tests.sh -run_thread_sanitizer: - stage: sanitizer - script: - ./ci_scripts/run_thread_sanitizer.sh +# Disable until we can get a clang with the tsan patch for fibers on the CI server +#run_thread_sanitizer: +# stage: sanitizer +# script: +# ./ci_scripts/run_thread_sanitizer.sh run_address_sanitizer: stage: sanitizer diff --git a/BANANAPI.md b/BANANAPI.md new file mode 100644 index 0000000..69de631 --- /dev/null +++ b/BANANAPI.md @@ -0,0 +1,237 @@ +# Setup BananaPI for benchmarking + +The goal of this documentation is to get a linux image running on a +bananaPI board that allows for very isolated benchmarks showing full +time distributions of the measurement runs. + +## Base Setup + +First step is to get a linux image running on the banana PI. We choose to use the +[armbian](https://www.armbian.com/) project as it is the only one with support for the +bananaPI m3. For this we generally +[follow the instructions given](https://docs.armbian.com/Developer-Guide_Build-Preparation/), +below are notes on what to do to get the rt kernel patch into it and to build. + +You can also use [our pre-build image](https://drive.google.com/open?id=1RiHymBO_XjOk5tMAL31iOSJGfncrWFQh) +and skip the build process below. Just use etcher (https://www.balena.io/etcher/) or similar, +flash an sd card and the PI should boot up. Default login is root/1234, follow the instructions, +then continue with the isolating system setup steps for more accurate measurements. + +General Setup: +- Setup an ubuntu bionic 18.04 virtual box VM +- `# apt-get -y -qq install git` +- `$ git clone --depth 1 https://github.com/armbian/build` +- `$ cd build` +- To verify the environment first go for a 'clean build' without patch: `# ./compile.sh` +- Select the bananaPI m3 board and a minimal console build + +Apply RT Pach: +- Find the current kernel version armbian is using, e.g. from the previous build logs +- Download and unpack the matching rt path from https://mirrors.edge.kernel.org/pub/linux/kernel/projects/rt/ +- You should have a single .patch file, place it in build/userpatches/kernel/sunix-current/patch-5.4.28-rt19.patch +- Re-run the `# ./compile.sh` script +- Select BananaPI M3, Command Line Minimal and SHOW KERNEL CONFIG +- The build should pick up the patch (and show it in the logs) +- You will be ask to fill in some settings. Choose (4) fully preemptive at the first option +- Fill out the other asked settings to your liking. To avoid issues just leave them at default. +- You will then be in the kernel config window +- Here disable the file systems AUFS and NFS in the settings (they cause build issues and we do not need them) +- Store the settings and build the image +- If successfull, the flashed image should show the preempt patch with `uname -a` and should have good latencies in cyclictest + +## Run project + +First setup some base dependencies for running the benchmark and tests: + +- `# apt-get install rt-tests` +- `# apt-get install build-essential` +- `# apt-get install cmake` + +Next EMBB is required as a comparison in the benchmark suite. Install it using the following or similar +(as described on their github page, https://github.com/siemens/embb): +- `$ wget https://github.com/siemens/embb/archive/v1.0.0.zip` +- `$ unzip v1.0.0.zip` +- `$ cd embb-1.0.0` +- `$ mkdir cmake-build-release` +- `$ cd cmake-build-release` +- `$ cmake ../` +- `$ cmake --build .` +- `# cmake --build . --target install` + +This are all dependencies needed for executing the benchmark project and pls itself. +Follow the project specific instructions for how to use them. + +## Tweaking Scheduler, CPU and Interrupts + +We would like to get very little dispersion through system jitter. We recommend tweaking the +scheduler, CPU and interrupt settings before running benchmarks. + +See the sub-sections below for the individual measures. ***Before running tests make sure to +run the following scripts:*** +- `sudo ./setup_cpu.sh` +- `sudo ./map_interrupts_core_0.sh` +- `sudo ./setup_rt.sh` +- `sudo ./setup_cgroups.sh` + +Then start your tests manually mapped to cores 1 to 7. We also found that having any interactive sessions +open during the measurements (especially) + +### Tuning kernel parameters + +Some online references advice on some kernel parameter tweaks for getting better latencies. +To change kernel parameters edit the `boot/armbianEnv.txt` file and add a line with +`extraargs=`. + +Here are some good articles discussing jitter on linux systems: +- https://www.codethink.co.uk/articles/2018/configuring-linux-to-stabilise-latency/ (General Tips and Measurements) +- https://access.redhat.com/sites/default/files/attachments/201501-perf-brief-low-latency-tuning-rhel7-v1.1.pdf (7 - Kernel Command Line) +- https://access.redhat.com/articles/65410 (Power Management/C-States) +- https://community.mellanox.com/s/article/rivermax-linux-performance-tuning-guide--1-x (General Tips) + +We use the following settings: +```shell script +mce=ignore_ce nosoftlockup nmi_watchdog=0 transparent_hugepage=never processor.max_cstate=1 idle=poll nohz=on nohz_full=1-7 +``` + +- ***mce=ignore_ce*** do not scan for hw errors. Reduce the jitter introduced by periodic runs +- ***nosoftlockup*** do not log backtraces for tasks hogging the cpu over some time. This, again, reduces jitter and we do not need the function in our controlled test environment. +- ***nmi_watchdog=0*** disables the nmi watchdog on architectures that support it. Esentially disables a non blockable interrup that is used to detect hanging/stuck systems. We do not need this check during our benchmarks. https://medium.com/@yildirimabdrhm/nmi-watchdog-on-linux-ae3b4c86e8d8 +- ***transparent_hugepage=never*** do not scan for small pages to combine to hugepages. We have no issues with memory usage, spare us of this periodic jitter. +- ***processor.max_cstate=1 idle=poll*** do not switch to CPU power saving modes (c-states). Just run all cores at full speed all the time (we do not care about energy during our tests). +- ***nohz=on nohz_full=1-7*** disable houskeeping os ticks on our isolated benchmark cores. core 0 will handle these when needed. + +### Pin all other processes to core 0 (crgoups) + +We want to isolate our measurements to cores 1 to 7 and use core 0 for all non benchmark related processes. +isolcpus is often used for this, however, we found that it disables the scheduler from balancing tasks +between the isolated cores. A better approach is to use cgroups. +See the tutorial for further information: https://github.com/lpechacek/cpuset/blob/master/doc/tutorial.txt +Essentially, we can partition our cores into two isolated groups, then map all tasks that can be moved away from +our benchmark cores, to ensure low influence of background tasks. Cgroups also nicely interact with +the real time scheduler, as described here https://www.linuxjournal.com/article/10165, because +they allow to adapt the scheduler to ignore the other cores in its decision making process. +Note the exclusive cpu groups in this output: +```shell script +florian@bananapim3:~$ cset set +cset: + Name CPUs-X MEMs-X Tasks Subs Path + ------------ ---------- - ------- - ----- ---- ---------- + root 0-7 y 0 y 116 2 / + user 1-7 y 0 n 0 0 /user + system 0 y 0 n 58 0 /system +``` + +Create a file called 'setup_cgroups.sh' and modify it with 'chmod +x setup_cgroups.sh': +```shell script +#!/bin/bash + +sudo cset shield --cpu=1-7 -k on +``` + +This will isolate cores 1 to 7 for our benchmarks. To run the benchmarks on these cores use the following +or a similar command: `sudo chrt --fifo 90 cset shield -e --user= \-- ` + + +### CPU frequency + +Limiting the frequency to 1GHz makes sure that the banana PI dose not throttle during the tests. +Additionally, disabling any dynamic frequency scaling makes tests more reproducable. + +Create a file called 'setup_cpu.sh' and modify it with 'chmod +x setup_cpu.sh': +```shell script +#!/bin/bash + +echo "Writing frequency utils settings file..." +echo "ENABLE=true +MIN_SPEED=1412000 +MAX_SPEED=1412000 +GOVERNOR=performance" > /etc/default/cpufrequtils + +echo "Restarting frequency utils service..." +systemctl restart cpufrequtils + +echo "Done!" +echo "Try ./watch_cpu.sh to see if everything worked." +echo "Test your cooling by stressing the cpu and watching the temperature output." +``` + +Create a file called 'watch_cpu.sh' and modify it with 'chmod +x watch_cpu.sh': +````shell script +#!/bin/bash + +echo "Min/Max Frequencies" +cat /sys/devices/system/cpu/cpu*/cpufreq/cpuinfo_min_freq +echo "-----" +cat /sys/devices/system/cpu/cpu*/cpufreq/cpuinfo_max_freq + +echo "Scaling Min/Max Frequencies" +cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_min_freq +echo "-----" +cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_max_freq + +echo "Actual Frequencies" +cat /sys/devices/system/cpu/cpu*/cpufreq/cpuinfo_cur_freq + +echo "Temps.." +cat /sys/class/thermal/thermal_zone*/temp +```` + +***BEFORE TESTS***: +To setup the CPU run ***`sudo ./setup_cpu.sh`*** before your tests. To see that the change worked +and the temperatures hold stable use the `./watch_cpu.sh` script. + +### Map interrupts to core 0 + +Interrupts can infer with our benchmarks. We therefore map them to core 0 if possible and run our tests on +cores 1 to 7. + +Create a file called 'map_interrupts_core_0.sh' and modify it with 'chmod +x map_interrupts_core_0.sh': +```shell script +#!/bin/bash + +echo "Try to map interrupts to core 0." +echo "Some might fail because they can not be mapped (e.g. core specific timers)." +echo "" +echo "" + +echo 1 > /proc/irq/default_smp_affinity +for dir in /proc/irq/*/ +do + echo "Mapping $dir ..." + echo 1 > $dir/smp_affinity +done +``` + +***BEFORE TESTS***: map the interrupts to core 0 using ***`sudo ./map_interrupts_core_0.sh`*** + +### Full time slices to RT scheduler + +The RT scheduler in linux by default leaves some fraction of its scheduling time to non RT processes, +leaving the system in a responsive state if a RT application eats all CPU. We do not want this, as we +try to get a very predictable behavior in our RT scheduler. + +Create a file called 'setup_rt.sh' and modify it with 'chmod +x setup_rt.sh': +```shell script +#!/bin/bash + +sysctl -w kernel.sched_rt_runtime_us=1000000 +sysctl -w kernel.sched_rt_period_us=1000000 +```` + +***BEFORE TESTS***: give full time slices to RT tasks ***`sudo ./setup_rt.sh`*** + +## Running Tests + +***Before running tests make sure to run the following scripts:*** +- `sudo ./setup_cpu.sh` +- `sudo ./map_interrupts_core_0.sh` +- `sudo ./setup_rt.sh` + +To run the tests use the following (or a similar command with different rt policy): + +`sudo chrt --fifo 90 cset shield -e --user= \-- ` + +This maps the process to all cores but core 0 and runs them using the desired real time schedule and priority. + +We found that interactive sessions can cause huge latency spices even with this separation, +therefore we advise on starting the benchmarks and then leaving the system alone until they are done. diff --git a/CMakeLists.txt b/CMakeLists.txt index 98cb69c..f0f2c75 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,9 +1,10 @@ cmake_minimum_required(VERSION 3.10) project(predictable_parallel_patterns VERSION 0.0.1 - DESCRIPTION "predictable parallel patterns for scalable smart systems using work stealing") + DESCRIPTION "predictable parallel patterns for scalable smart systems using work stealing" + LANGUAGES CXX ASM) -set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD 17) # seperate library and test/example executable output paths. set(EXECUTABLE_OUTPUT_PATH ${CMAKE_BINARY_DIR}/bin) @@ -12,6 +13,7 @@ set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}/lib) # specific setup code is located in individual files. include(cmake/DisabelInSource.cmake) include(cmake/SetupOptimizationLevel.cmake) +include(cmake/SetupAssemblyOutput.cmake) include(cmake/SetupThreadingSupport.cmake) include(cmake/SetupThreadSanitizer.cmake) include(cmake/SetupAddressSanitizer.cmake) @@ -25,19 +27,21 @@ list(APPEND CMAKE_PREFIX_PATH "${PROJECT_SOURCE_DIR}/cmake") # Each library has an own CMakeLists.txt that should make it avaliabale as a library target, # thus allowing one to include it as any cmake dependency later on. add_subdirectory(extern/catch2) +add_subdirectory(extern/picosha2) +add_subdirectory(extern/benchmark_base) +add_subdirectory(extern/benchmark_runner) # Include all internal subprojects (library, examples, testing). +add_subdirectory(lib/context_switcher) add_subdirectory(lib/pls) # Include examples add_subdirectory(app/playground) -add_subdirectory(app/test_for_new) -add_subdirectory(app/invoke_parallel) add_subdirectory(app/benchmark_fft) add_subdirectory(app/benchmark_unbalanced) add_subdirectory(app/benchmark_matrix) -add_subdirectory(app/benchmark_prefix) -add_subdirectory(app/benchmark_pipeline) +add_subdirectory(app/benchmark_fib) +add_subdirectory(app/context_switch) # Add optional tests option(PACKAGE_TESTS "Build the tests" ON) diff --git a/NOTES.md b/NOTES.md index 4ac9407..a7ccac7 100644 --- a/NOTES.md +++ b/NOTES.md @@ -1,8 +1,195 @@ -# Notes - -A collection of stuff that we noticed during development. -Useful later on two write a project report and to go back -in time to find out why certain decisions where made. +# Notes on Continuation/Parent Stealing Implementation + +The new version of pls uses a more complicated/less user friendly +API in favor of performance and memory guarantees. +For the old version refer to the second half of this document. + +# 31.03.2020 - Test setup on BananaPI + +We currently use a banana pi m3 single board computer for running our evaluations. +The reason is that we aim for clean, isolated measurements on a system that +introduces little jitter in CPU frequencies/performance with multiple equivialent cores. +It also is a rather low power board, with a TDP of about 15W, satisfying our desire for +a platform that mimmics embedded devices. On top of that it is rather cheap at under 100$. + +The main concern is software. Options are the vendor image (kernel 3.4 with RT patch), +an [arch image from the forum](http://forum.banana-pi.org/t/bananapi-bpi-m3-new-image-archlinux-4-18-1-1-arch-2018-08-19/6544) +(kernel 4.19 with RT patch) or [armbian](https://www.armbian.com/bananapi-m3/) (kernel 5.4, NO RT patch). +We tried all three. The vendor image is simply too old to be comparable to modern changes in linux. +We then compared the arch rt kernel and the most recent armbian kernel using +[cyclictest](https://manpages.debian.org/jessie/rt-tests/cyclictest.8). We ran a normal smp test +using `cycltest --smp -p95 -m` on both systems while stressing the processor with lower priority +applications to 100% utilization. The rt arch kernel had maximum latencies between +100us and 180us on different cores, the armbian kernel had latencies between 550us and 730us on +different cores. As our benchmarks are in the magnitude of a few milliseconds (e.g. the matrix +with 3ms) we think it is important to choose an rt kernel, especially if we later look at +applications running periodically. However, the arch kernel is 2 years old, making updating its +software components with arch rolling releases very hard. Even with tinkering we can not get it +to update the system without the kernel and without boot issues. + +Following the above discussion we see that armbian is easy to setup, but has no prebuild rt image. +Because of this, we decide to use the armbian build script and add the rt patch to it, giving us the best +of both worlds. + +Full setup documentation for the os and how to further tune isolated performance can be found in a +separate document: [BANANAPI.md](./BANANAPI.md) + +# 24.03.2020 - mmap stack + +We added different means of allocating stacks for our coroutines. +The biggest difference is that we now support mmap with guard pages +for the stacks. This leads to sigsevs if the stack space is really exhausted +during runtime, which is better than undefined behaviour. + +The minimum page size is usually 4kB, which is plenty in release builds. +Other solutions for the cactus stack problem (memory mapped cactus stacks) +share this 'minimum resource per depth' limitation. They argue, that they only need +the physical memory in the worst case, whereas we need it in any execution schedule. +As we aim for worst case bounds this seems to be not a big issue. Using any smaller size +requires to give up on the guard pages, but is possible with heap allocated stacks. + +See [mmap](http://man7.org/linux/man-pages/man2/mmap.2.html), [mprotect](https://linux.die.net/man/2/mprotect) +and [sysconf](http://man7.org/linux/man-pages/man3/sysconf.3.html) for implementation details. +The general approach of mmaping the stack, then protecting a page above it should be straight forward. + +# 24.03.2020 - Bug in tsan + +While implementing the coroutines required for the current parent-stealing scheduler +we fond issues when running with thread sanitizer. First we thought that the issues +are related to the way we handle context switching, but after some investigation we found +that it was due to a memory leak in tsan. + +Specificially, tsan leaked memory mappings for each created/destroyed fiber +which lead to [linux limits](https://stackoverflow.com/questions/22779556/linux-error-from-munmap) +preventing the application from performing more mappings. This leads to the +application hanging or crashing after some time. + +We addressed the issue in a [patch request](https://reviews.llvm.org/D76073) to the +LLVM/Clang tool suite. When the patch is merged we need to setup our test-runner to use the +current version of Clang and add the information to the documentation. + +# 18.03.2020 - C++17 benefit, overaligned new + +We have many cache line aligned types. Before C++17 they can +only be allocated with correct alignment using alignas(...) +when they are stored in static storage (e.g. global memory, stack memory). + +As we move towards more modern RAII type resource management, the fact that +[C++ 17 supports alignment of arbitrary length, even in vectors](https://www.bfilipek.com/2019/08/newnew-align.html) +is extremely helpful. Before we needed wrappers to do aligned heap allocations. + +# 18.03.2020 - Coding Standard/C++ Standard + +We previously stuck to strict 'static' allocation in global static +storage or on the stack for most memory we need. However, we found +this to be very restrictive and making our code overly complicated +and not at all 'modern C++'. + +After some research we found that this restrictive style is common +in very restrictive embedded domains. However, we target multi and many +core systems that run many applications on a shared operating system in +a more traditional 'PC-like' manner. The model best fitting this application +style is the adaptive autosar standard. It models the system as a posix +compliant os running specialized software for application deployment and management. +Under this stack there is usually a normal OS scheduler, like e.g. a +linux image tuned for low latencies. The target platform for such systems +are intel and arm processors with 2 to 16 cores, making it ideal for +our framework. + +Following this logic, we decide to orient ourselfs at the +[C++ 14 guidlines of autosar](https://www.autosar.org/fileadmin/user_upload/standards/adaptive/17-03/AUTOSAR_RS_CPP14Guidelines.pdf). +Specificially, they encourage the use of dynamic memory allocation and +modern C++, as long as some restrictions are hold. +The memory usage of an app must be bounded and best practice is to +allocate resources 'outside of real-time sections, e.g. startup', +making it, again, a good fit for our concept. We do not plan +to follow all these rules, but we see them as an example for where +our research can be applied later on. As the autosar standard is +[moving towards C++ 17 and integrated into misra C++](https://www.autosar.org/news-events/details/misra-consortium-announce-integration-of-autosar-c-coding-guidelines-into-updated-industry-standar/) +we decide to go for C++ 17 in our project. These facts make develpoment +a lot more convenient and are enough to test our profe of concept prototype. + +# 13.03.2020 - Coroutines and 'Nice API' + +Before Christmas we finished the 'ugly' inline/object based API. +Because this was very hard to use (even for us), the only options +where to go to a class based solution like TBB (everything MUST be a +class, as soon as you call into user code promises are gone) or to +go with a fully fledged continuation based API. + +We decide to go for the continuation API, as one of our project questions +was if we can incorporate a nice high level API with a library only solution. + +Following this we need coroutines and we also need the stealing to work for more +than two child tasks. + +## Coroutines basic implementation + +We go for an implementation similar to [boost.context](https://github.com/boostorg/context), +where the context is stored and changed using custom assembly. +We do so, as jumpbuf is not portable at all (i.e. behaviour for the jumps +we intent to do are EXPLICITLY not allowed in the standard, see +[docs](https://en.cppreference.com/w/c/program/setjmp) on jumping other +than what expections are allowed to do). + +Inspirations for what we use in our end-product are the [deboost.context](https://github.com/septag/deboost.context), + [tinycoroutine](https://github.com/soundsrc/tinycoroutine). + +Our final implementatino uses custom assembly on supported platforms, +being faster than boost.context as we choose to ommit unneccesary setup steps, +as the focus is on quick switching into a new routine. We plan to also use +this for 'tail calls' in our future code (and think that this is a good fit if +we plan for composable APIs in future work). + +## TSan support... + +We want to integrate tsan into our fiber for sure, as it has helped us in the +past to catch various bugs. Unfortionetly, we found that our programm crashes +when running it with it. The first solution was to add the new (mid 2019) API +for explicit fiber switching. This seemed to work, however, the programm crased +after some time. After long research, we found that it was a memory leak in +thet tsan implementation. [We fixed it](https://reviews.llvm.org/D76073) +and integrated support for it into a seperate coroutine library. + +## Multiple child steal + +We then needed to adopt our stealing procedure/queue to support multiple +child tasks. To do so in a lock-free manner required some tinkering. +-> further details are in our paper notebook for now. In summary, +we use a flag to atomicially trade in resources when stealing a tasks +and use a lock free stack to store the resources pending on a task. + + + +# 05.11.2019 - Memory Allocation, 'Fat Objects' + +We change our memory allocation for all memory the scheduler requires +from allocating buffers (char* arrays) separate from the actual data +structures to 'fat datastructures' that use templating to create +an object that actually holds all the data. This allows us to more +simple add fields to manage tasks and continuations, as we do not +need to change the scheduler_memory (adding additional buffers), but +as we only have to add the fields directly to the container objects. + +# 04.11.2019 - Memory Allocation and Initialization + +Our framework tries to be explicit on how and where memory is allocated. +In any production build of the framework we will only use fixed size +memory pools/blocks to manage all data structures required by the +scheduler, as this property is our main research goal. + +Never the less, we want to offer different ways on where to allocate +these fixed pools. Some people might prefer to store them in the stack, +some to store them in heap memory, and others might want to place them +into memory managed by custom allocators. Currently we support a stack +based 'fat' object and a heap based memory object that stores each +threads state in a vector (could be changed to lists in the future +to avoid the one big memory block allocated by the vector). + +# Notes on Blocking/Child Stealing Implementation + +Notes on the child stealing implementation of pls. +This corresponds to tag v0.1. ## 02.08.2019 - Ideas for sleeping threads when no work is available diff --git a/PERFORMANCE.md b/PERFORMANCE-v1.md similarity index 100% rename from PERFORMANCE.md rename to PERFORMANCE-v1.md diff --git a/PERFORMANCE-v2.md b/PERFORMANCE-v2.md new file mode 100644 index 0000000..b8a27df --- /dev/null +++ b/PERFORMANCE-v2.md @@ -0,0 +1,22 @@ +# Notes on performance measures during development + +#### Commit e34ea267 - 05.12.2019 - First Version of new Algorithm - Scaling Problems + +The first version of our memory trading work stealing algorithm works. It still shows scaling issues over +the hyperthreading mark, very similar to what we have seen in version 1. This indicates some sort of +contention between the threads when running the FFT algorithm. + +Analyzing the current version we find issue with the frequent call to `thread_state_for(id)` in +the stealing loop. + +![](./media/e34ea267_thread_state_for.png) + +It is obvious that the method takes some amount of runtime, as FFT has a structure that tends to only +work on the continuations in the end of the computation (the critical path of FFT can only be executed +after most parallel tasks are done). + +![](./media/e34ea267_fft_execution_pattern.png) + +What we can see here is the long tail of continuations running at the end of the computation. During +this time the non working threads constantly steal, thus requiring the `thread_state_for(id)` +virtual method, potentially hindering other threads from doing their work properly. diff --git a/README.md b/README.md index 122e457..4577263 100644 --- a/README.md +++ b/README.md @@ -76,12 +76,11 @@ long fib(long n) { ``` - ## Project Structure The project uses [CMAKE](https://cmake.org/) as it's build system, the recommended IDE is either a simple text editor or [CLion](https://www.jetbrains.com/clion/). -We divide the project into subtargets to separate for the library +We divide the project into sub-targets to separate for the library itself, testing and example code. The library itself can be found in `lib/pls`, testing related code is in `test`, example and playground apps are in `app`. @@ -114,11 +113,16 @@ Available Settings: - Enables thread/datarace sanitizer to be linked to the executable - Only one sanitizer can be active at once - Enabling has a performance hit (do not use in releases) -- `-DDEBUG_SYMBOLS=ON` +- `-DDEBUG_SYMBOLS=ON/OFF` - default OFF - Enables the build with debug symbols - Use for e.g. profiling the release build +Note that these settings are persistent for one CMake build folder. +If you e.g. set a flag in the debug build it will not influence +the release build, but it will persist in the debug build folder +until you explicitly change it back. + ### Testing Testing is done using [Catch2](https://github.com/catchorg/Catch2/) @@ -167,4 +171,8 @@ For detailed profiling of small performance hotspots we prefer to use [Intel's VTune Amplifier](https://software.intel.com/en-us/vtune). It gives insights in detailed microachitecture usage and performance hotspots. Follow the instructions by Intel for using it. +Make sure to enable debug symbols (`-DDEBUG_SYMBOLS=ON`) in the +analyzed build and that all optimizations are turned on +(by choosing the release build). + diff --git a/app/benchmark_fft/CMakeLists.txt b/app/benchmark_fft/CMakeLists.txt index 41591e5..cfef00b 100644 --- a/app/benchmark_fft/CMakeLists.txt +++ b/app/benchmark_fft/CMakeLists.txt @@ -1,5 +1,5 @@ -add_executable(benchmark_fft main.cpp) -target_link_libraries(benchmark_fft pls) -if(EASY_PROFILER) - target_link_libraries(benchmark_fft easy_profiler) -endif() +add_executable(benchmark_fft_pls_v3 main.cpp) +target_link_libraries(benchmark_fft_pls_v3 pls benchmark_runner benchmark_base) +if (EASY_PROFILER) + target_link_libraries(benchmark_fft_pls_v3 easy_profiler) +endif () diff --git a/app/benchmark_fft/main.cpp b/app/benchmark_fft/main.cpp index 80de92c..ff960cb 100644 --- a/app/benchmark_fft/main.cpp +++ b/app/benchmark_fft/main.cpp @@ -1,86 +1,63 @@ -#include -#include -#include +#include "pls/internal/scheduling/scheduler.h" -#include -#include -#include +using namespace pls::internal::scheduling; -static constexpr int CUTOFF = 16; -static constexpr int NUM_ITERATIONS = 1000; -static constexpr int INPUT_SIZE = 8192; -typedef std::vector> complex_vector; +#include "benchmark_runner.h" +#include "benchmark_base/fft.h" -void divide(complex_vector::iterator data, int n) { - complex_vector tmp_odd_elements(n / 2); - for (int i = 0; i < n / 2; i++) { - tmp_odd_elements[i] = data[i * 2 + 1]; - } - for (int i = 0; i < n / 2; i++) { - data[i] = data[i * 2]; - } - for (int i = 0; i < n / 2; i++) { - data[i + n / 2] = tmp_odd_elements[i]; - } -} +using namespace comparison_benchmarks::base; -void combine(complex_vector::iterator data, int n) { - for (int i = 0; i < n / 2; i++) { - std::complex even = data[i]; - std::complex odd = data[i + n / 2]; - - // w is the "twiddle-factor". - // this could be cached, but we run the same 'data_structures' algorithm parallel/serial, - // so it won't impact the performance comparison. - std::complex w = exp(std::complex(0, -2. * M_PI * i / n)); - - data[i] = even + w * odd; - data[i + n / 2] = even - w * odd; - } -} - -void fft(complex_vector::iterator data, int n) { +void pls_conquer(fft::complex_vector::iterator data, fft::complex_vector::iterator swap_array, int n) { if (n < 2) { return; } - divide(data, n); - if (n <= CUTOFF) { - fft(data, n / 2); - fft(data + n / 2, n / 2); + fft::divide(data, swap_array, n); + if (n <= fft::RECURSIVE_CUTOFF) { + FILE* file = fopen("test.text", "w"); + fprintf(file, "test %d", n); + fclose(file); + fft::conquer(data, swap_array, n / 2); + fft::conquer(data + n / 2, swap_array + n / 2, n / 2); } else { - pls::invoke( - [&] { fft(data, n / 2); }, - [&] { fft(data + n / 2, n / 2); } - ); + scheduler::spawn([data, n, swap_array]() { + pls_conquer(data, swap_array, n / 2); + }); + scheduler::spawn([data, n, swap_array]() { + pls_conquer(data + n / 2, swap_array + n / 2, n / 2); + }); + scheduler::sync(); } - combine(data, n); + + fft::combine(data, n); } -complex_vector prepare_input(int input_size) { - std::vector known_frequencies{2, 11, 52, 88, 256}; - complex_vector data(input_size); +constexpr int MAX_NUM_TASKS = 32; +constexpr int MAX_STACK_SIZE = 1024 * 4; - // Set our input data to match a time series of the known_frequencies. - // When applying fft to this time-series we should find these frequencies. - for (int i = 0; i < input_size; i++) { - data[i] = std::complex(0.0, 0.0); - for (auto frequencie : known_frequencies) { - data[i] += sin(2 * M_PI * frequencie * i / input_size); - } - } +int main(int argc, char **argv) { + int num_threads; + string directory; + benchmark_runner::read_args(argc, argv, num_threads, directory); - return data; -} + string test_name = to_string(num_threads) + ".csv"; + string full_directory = directory + "/PLS_v3/"; + benchmark_runner runner{full_directory, test_name}; + + fft::complex_vector data(fft::SIZE); + fft::complex_vector swap_array(fft::SIZE); + fft::fill_input(data); -int main() { - PROFILE_ENABLE - complex_vector initial_input = prepare_input(INPUT_SIZE); + scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE}; - pls::internal::helpers::run_mini_benchmark([&] { - complex_vector input = initial_input; - fft(input.begin(), input.size()); - }, 7, 1000); + runner.run_iterations(fft::NUM_ITERATIONS, [&]() { + scheduler.perform_work([&]() { + pls_conquer(data.begin(), swap_array.begin(), fft::SIZE);; + }); + }, fft::NUM_WARMUP_ITERATIONS, [&]() { + fft::fill_input(data); // Reset data before each run + }); + runner.commit_results(true); - PROFILE_SAVE("test_profile.prof") + return 0; } diff --git a/app/benchmark_fib/CMakeLists.txt b/app/benchmark_fib/CMakeLists.txt new file mode 100644 index 0000000..5233f4f --- /dev/null +++ b/app/benchmark_fib/CMakeLists.txt @@ -0,0 +1,5 @@ +add_executable(benchmark_fib_pls_v3 main.cpp) +target_link_libraries(benchmark_fib_pls_v3 pls benchmark_runner benchmark_base) +if (EASY_PROFILER) + target_link_libraries(benchmark_fib_pls_v3 easy_profiler) +endif () diff --git a/app/benchmark_fib/main.cpp b/app/benchmark_fib/main.cpp new file mode 100644 index 0000000..b304bb6 --- /dev/null +++ b/app/benchmark_fib/main.cpp @@ -0,0 +1,55 @@ +#include "pls/internal/scheduling/scheduler.h" + +using namespace pls::internal::scheduling; + +#include + +#include "benchmark_runner.h" +#include "benchmark_base/fib.h" + +using namespace comparison_benchmarks::base; + +int pls_fib(int n) { + if (n == 0) { + return 0; + } + if (n == 1) { + return 1; + } + + int a, b; + scheduler::spawn([n, &a]() { + a = pls_fib(n - 1); + }); + scheduler::spawn([n, &b]() { + b = pls_fib(n - 2); + }); + scheduler::sync(); + + return a + b; +} + +constexpr int MAX_NUM_TASKS = 32; +constexpr int MAX_STACK_SIZE = 1024 * 32; + +int main(int argc, char **argv) { + int num_threads; + string directory; + benchmark_runner::read_args(argc, argv, num_threads, directory); + + string test_name = to_string(num_threads) + ".csv"; + string full_directory = directory + "/PLS_v3/"; + benchmark_runner runner{full_directory, test_name}; + + scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE}; + + volatile int res; + runner.run_iterations(fib::NUM_ITERATIONS, [&]() { + scheduler.perform_work([&]() { + res = pls_fib(fib::INPUT_N); + }); + }, fib::NUM_WARMUP_ITERATIONS); + runner.commit_results(true); + + return 0; +} diff --git a/app/benchmark_matrix/CMakeLists.txt b/app/benchmark_matrix/CMakeLists.txt index 0245a5b..fe40f88 100644 --- a/app/benchmark_matrix/CMakeLists.txt +++ b/app/benchmark_matrix/CMakeLists.txt @@ -1,5 +1,5 @@ -add_executable(benchmark_matrix main.cpp) -target_link_libraries(benchmark_matrix pls) +add_executable(benchmark_matrix_pls_v3 main.cpp) +target_link_libraries(benchmark_matrix_pls_v3 pls benchmark_runner benchmark_base) if (EASY_PROFILER) - target_link_libraries(benchmark_matrix easy_profiler) + target_link_libraries(benchmark_matrix_pls_v3 easy_profiler) endif () diff --git a/app/benchmark_matrix/main.cpp b/app/benchmark_matrix/main.cpp index ed6d874..6bc24c8 100644 --- a/app/benchmark_matrix/main.cpp +++ b/app/benchmark_matrix/main.cpp @@ -1,85 +1,47 @@ -#include -#include -#include +#include "pls/internal/scheduling/scheduler.h" +#include "pls/algorithms/for_each.h" -#include +using namespace pls::internal::scheduling; -const int MATRIX_SIZE = 128; +#include "benchmark_runner.h" +#include "benchmark_base/matrix.h" + +using namespace comparison_benchmarks::base; template -class matrix { +class pls_matrix : public matrix::matrix { public: - T data[SIZE][SIZE]; - - explicit matrix(T i = 1) { - std::fill(&data[0][0], &data[0][0] + SIZE * SIZE, i); - } + pls_matrix() : matrix::matrix() {} - void multiply(const matrix &a, const matrix &b) { - pls::for_each_range(0, SIZE, [&](int i) { + void multiply(const matrix::matrix &a, const matrix::matrix &b) override { + pls::algorithm::for_each_range(0, SIZE, [&](int i) { this->multiply_column(i, a, b); }); } - - private: - void multiply_column(int i, const matrix &a, const matrix &b) { - for (int j = 0; j < SIZE; ++j) { - data[i][j] = 0; - } - for (int k = 0; k < SIZE; ++k) { - for (int j = 0; j < SIZE; ++j) { - data[i][j] += a.data[i][k] * b.data[k][j]; - } - } - } }; -void fill_with_data(matrix &a, matrix &b) { - // Fill in some data... - for (int i = 0; i < MATRIX_SIZE; i++) { - for (int j = 0; j < MATRIX_SIZE; j++) { - a.data[i][j] = i; - b.data[i][j] = j; - } - } -} +constexpr int MAX_NUM_TASKS = 32; +constexpr int MAX_STACK_SIZE = 1024 * 1; -int main() { - PROFILE_ENABLE - matrix a; - matrix b; - matrix result; - fill_with_data(a, b); +int main(int argc, char **argv) { + int num_threads; + string directory; + benchmark_runner::read_args(argc, argv, num_threads, directory); - pls::internal::helpers::run_mini_benchmark([&] { - result.multiply(a, b); - }, 8, 1000); + string test_name = to_string(num_threads) + ".csv"; + string full_directory = directory + "/PLS_v3/"; + benchmark_runner runner{full_directory, test_name}; - PROFILE_SAVE("test_profile.prof") -} + pls_matrix a; + pls_matrix b; + pls_matrix result; -//int main() { -// PROFILE_ENABLE -// pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 18u}; -// pls::scheduler scheduler{&my_scheduler_memory, 4}; -// -// matrix a; -// matrix b; -// matrix result; -// fill_with_data(a, b); -// -// scheduler.perform_work([&] { -// auto start_time = std::chrono::high_resolution_clock::now(); -// PROFILE_MAIN_THREAD -// for (int i = 0; i < 10000; i++) { -// PROFILE_WORK_BLOCK("Top Level") -// result.multiply(a, b); -// } -// auto end_time = std::chrono::high_resolution_clock::now(); -// long time = std::chrono::duration_cast(end_time - start_time).count(); -// std::cout << "Runtime: " << time << "us" << std::endl; -// }); -// -// PROFILE_SAVE("test_profile.prof") -//} + scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE}; + runner.run_iterations(matrix::NUM_ITERATIONS, [&]() { + scheduler.perform_work([&]() { + result.multiply(a, b); + }); + }, matrix::WARMUP_ITERATIONS); + runner.commit_results(true); +} diff --git a/app/benchmark_pipeline/CMakeLists.txt b/app/benchmark_pipeline/CMakeLists.txt deleted file mode 100644 index d531b74..0000000 --- a/app/benchmark_pipeline/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -add_executable(benchmark_pipeline main.cpp) -target_link_libraries(benchmark_pipeline pls) -if (EASY_PROFILER) - target_link_libraries(benchmark_pipeline easy_profiler) -endif () diff --git a/app/benchmark_pipeline/main.cpp b/app/benchmark_pipeline/main.cpp deleted file mode 100644 index 6752d17..0000000 --- a/app/benchmark_pipeline/main.cpp +++ /dev/null @@ -1,148 +0,0 @@ -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -static constexpr int INPUT_SIZE = 8192; -typedef std::vector> complex_vector; - -using namespace pls::dataflow; - -void divide(complex_vector::iterator data, int n) { - complex_vector tmp_odd_elements(n / 2); - for (int i = 0; i < n / 2; i++) { - tmp_odd_elements[i] = data[i * 2 + 1]; - } - for (int i = 0; i < n / 2; i++) { - data[i] = data[i * 2]; - } - for (int i = 0; i < n / 2; i++) { - data[i + n / 2] = tmp_odd_elements[i]; - } -} - -void combine(complex_vector::iterator data, int n) { - for (int i = 0; i < n / 2; i++) { - std::complex even = data[i]; - std::complex odd = data[i + n / 2]; - - // w is the "twiddle-factor". - // this could be cached, but we run the same 'data_structures' algorithm parallel/serial, - // so it won't impact the performance comparison. - std::complex w = exp(std::complex(0, -2. * M_PI * i / n)); - - data[i] = even + w * odd; - data[i + n / 2] = even - w * odd; - } -} - -void fft(complex_vector::iterator data, int n) { - if (n < 2) { - return; - } - - divide(data, n); - fft(data, n / 2); - fft(data + n / 2, n / 2); - combine(data, n); -} - -complex_vector prepare_input(int input_size) { - std::vector known_frequencies{2, 11, 52, 88, 256}; - complex_vector data(input_size); - - // Set our input data to match a time series of the known_frequencies. - // When applying fft to this time-series we should find these frequencies. - for (int i = 0; i < input_size; i++) { - data[i] = std::complex(0.0, 0.0); - for (auto frequencie : known_frequencies) { - data[i] += sin(2 * M_PI * frequencie * i / input_size); - } - } - - return data; -} - -int main() { - PROFILE_ENABLE - pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 18u}; - pls::scheduler scheduler{&my_scheduler_memory, 4}; - - graph, outputs> graph; - std::atomic count{0}; - auto lambda = [&](const int &in, int &out) { - PROFILE_WORK_BLOCK("Work Lambda") - auto tmp = in; - out = tmp; - complex_vector input = prepare_input(INPUT_SIZE); - fft(input.begin(), input.size()); - count++; - }; - function_node, outputs, decltype(lambda)> step_1{lambda}; - function_node, outputs, decltype(lambda)> step_2{lambda}; - function_node, outputs, decltype(lambda)> step_3{lambda}; - function_node, outputs, decltype(lambda)> step_4{lambda}; - - graph >> step_1 >> step_2 >> step_3 >> step_4 >> graph; - graph.build(); - - const int num_elements = 10; - std::vector> results(num_elements); - - pls::internal::helpers::run_mini_benchmark([&] { - PROFILE_WORK_BLOCK("Top Level") - for (int j = 0; j < num_elements; j++) { - graph.run(std::tuple{j}, &results[j]); - } - pls::scheduler::wait_for_all(); - }, 8, 1000); - - PROFILE_SAVE("test_profile.prof") -} - -//int main() { -// PROFILE_ENABLE -// pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 18u}; -// pls::scheduler scheduler{&my_scheduler_memory, 4}; -// -// graph, outputs> graph; -// std::atomic count{0}; -// auto lambda = [&](const int &in, int &out) { -// PROFILE_WORK_BLOCK("Work Lambda") -// out = in; -// complex_vector input = prepare_input(INPUT_SIZE); -// fft(input.begin(), input.size()); -// count++; -// }; -// function_node, outputs, decltype(lambda)> step_1{lambda}; -// function_node, outputs, decltype(lambda)> step_2{lambda}; -// function_node, outputs, decltype(lambda)> step_3{lambda}; -// function_node, outputs, decltype(lambda)> step_4{lambda}; -// -// graph >> step_1 >> step_2 >> step_3 >> step_4 >> graph; -// graph.build(); -// -// const int num_elements = 10; -// std::vector> results(num_elements); -// -// scheduler.perform_work([&] { -// PROFILE_MAIN_THREAD -// for (int i = 0; i < 10; i++) { -// PROFILE_WORK_BLOCK("Top Level") -// for (int j = 0; j < num_elements; j++) { -// graph.run(std::tuple{j}, &results[j]); -// } -// pls::scheduler::wait_for_all(); -// } -// }); -// -// std::cout << count << std::endl; -// -// PROFILE_SAVE("test_profile.prof") -//} diff --git a/app/benchmark_prefix/CMakeLists.txt b/app/benchmark_prefix/CMakeLists.txt deleted file mode 100644 index f4f705b..0000000 --- a/app/benchmark_prefix/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -add_executable(benchmark_prefix main.cpp) -target_link_libraries(benchmark_prefix pls) -if (EASY_PROFILER) - target_link_libraries(benchmark_prefix easy_profiler) -endif () diff --git a/app/benchmark_prefix/main.cpp b/app/benchmark_prefix/main.cpp deleted file mode 100644 index a7cd7be..0000000 --- a/app/benchmark_prefix/main.cpp +++ /dev/null @@ -1,47 +0,0 @@ -#include -#include -#include - -#include -#include -#include - -static constexpr int INPUT_SIZE = 10e7; - -int main() { - PROFILE_ENABLE - std::vector vec(INPUT_SIZE, 1); - std::vector out(INPUT_SIZE); - - for (int i = 0; i < INPUT_SIZE; i++) { - vec[i] = i; - } - - pls::internal::helpers::run_mini_benchmark([&] { - pls::scan(vec.begin(), vec.end(), out.begin(), std::plus(), 0.0); - }, 8, 1000); - - PROFILE_SAVE("test_profile.prof") -} - -//int main() { -// PROFILE_ENABLE -// pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 18}; -// pls::scheduler scheduler{&my_scheduler_memory, 8}; -// -// std::vector vec(INPUT_SIZE, 1); -// std::vector out(INPUT_SIZE); -// -// for (int i = 0; i < INPUT_SIZE; i++) { -// vec[i] = 1; -// } -// -// scheduler.perform_work([&] { -// PROFILE_MAIN_THREAD -// for (int i = 0; i < 100; i++) { -// pls::scan(vec.begin(), vec.end(), out.begin(), std::plus(), 0.0); -// } -// }); -// -// PROFILE_SAVE("test_profile.prof") -//} diff --git a/app/benchmark_unbalanced/CMakeLists.txt b/app/benchmark_unbalanced/CMakeLists.txt index d935ada..9c86805 100644 --- a/app/benchmark_unbalanced/CMakeLists.txt +++ b/app/benchmark_unbalanced/CMakeLists.txt @@ -1,5 +1,5 @@ -add_executable(benchmark_unbalanced main.cpp node.h function_node.cpp picosha2.h) -target_link_libraries(benchmark_unbalanced pls) +add_executable(benchmark_unbalanced_pls_v3 main.cpp) +target_link_libraries(benchmark_unbalanced_pls_v3 benchmark_runner benchmark_base pls) if (EASY_PROFILER) - target_link_libraries(benchmark_unbalanced easy_profiler) + target_link_libraries(benchmark_unbalanced_pls_v3 easy_profiler) endif () diff --git a/app/benchmark_unbalanced/main.cpp b/app/benchmark_unbalanced/main.cpp index 75f5daa..e9c312e 100644 --- a/app/benchmark_unbalanced/main.cpp +++ b/app/benchmark_unbalanced/main.cpp @@ -1,62 +1,60 @@ -#include -#include -#include +#include "pls/internal/scheduling/scheduler.h" -#include "node.h" +using namespace pls::internal::scheduling; -const int SEED = 42; -const int ROOT_CHILDREN = 140; -const double Q = 0.124875; -const int NORMAL_CHILDREN = 8; +#include "benchmark_runner.h" +#include "benchmark_base/unbalanced.h" -const int NUM_NODES = 71069; +using namespace comparison_benchmarks::base; -int count_child_nodes(uts::node &node) { - int child_count = 1; - std::vector children = node.spawn_child_nodes(); +#include - if (children.empty()) { - return child_count; +int count_child_nodes(unbalanced::node &node) { + if (node.get_num_children() < 1) { + return 1; } - std::vector results(children.size()); - for (size_t i = 0; i < children.size(); i++) { - size_t index = i; - auto lambda = [&, index] { - results[index] = count_child_nodes(children[index]); - }; - using child_type = pls::lambda_task_by_value; - pls::scheduler::spawn_child(lambda); - } - pls::scheduler::wait_for_all(); - for (auto result : results) { - child_count += result; + std::atomic count{1}; + for (int i = 0; i < node.get_num_children(); i++) { + scheduler::spawn([i, &count, &node] { + unbalanced::node child_node = node.spawn_child_node(i); + count.fetch_add(count_child_nodes(child_node)); + }); } + scheduler::sync(); - return child_count; + return count; } int unbalanced_tree_search(int seed, int root_children, double q, int normal_children) { - int result; + unbalanced::node root(seed, root_children, q, normal_children); + return count_child_nodes(root); +} - auto lambda = [&] { - uts::node root(seed, root_children, q, normal_children); - result = count_child_nodes(root); - }; - using child_type = pls::lambda_task_by_reference; - pls::scheduler::spawn_child(lambda); - pls::scheduler::wait_for_all(); +constexpr int MAX_NUM_TASKS = 256; +constexpr int MAX_STACK_SIZE = 1024 * 2; - return result; -} +int main(int argc, char **argv) { + int num_threads; + string directory; + benchmark_runner::read_args(argc, argv, num_threads, directory); + + string test_name = to_string(num_threads) + ".csv"; + string full_directory = directory + "/PLS_v3/"; + benchmark_runner runner{full_directory, test_name}; + + scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE}; -int main() { - PROFILE_ENABLE - pls::internal::helpers::run_mini_benchmark([&] { - unbalanced_tree_search(SEED, ROOT_CHILDREN, Q, NORMAL_CHILDREN); - }, 8, 2000); + runner.run_iterations(unbalanced::NUM_ITERATIONS, [&]() { + scheduler.perform_work([&]() { + unbalanced_tree_search(unbalanced::SEED, + unbalanced::ROOT_CHILDREN, + unbalanced::Q, + unbalanced::NORMAL_CHILDREN); + }); + }, unbalanced::WARMUP_ITERATIONS); + runner.commit_results(true); - PROFILE_SAVE("test_profile.prof") } //int main() { diff --git a/app/context_switch/CMakeLists.txt b/app/context_switch/CMakeLists.txt new file mode 100644 index 0000000..059a778 --- /dev/null +++ b/app/context_switch/CMakeLists.txt @@ -0,0 +1,17 @@ +add_subdirectory(deboost.context) + +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND CMAKE_SYSTEM_NAME STREQUAL "Linux") + SET(SWITCH_ASSEMBLY "custom_stack_callback_x86_64.s") +elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l" AND CMAKE_SYSTEM_NAME STREQUAL "Linux") + SET(SWITCH_ASSEMBLY "custom_stack_callback_arm32.s") +else () + MESSAGE(FATAL_ERROR "Platform (${CMAKE_SYSTEM_PROCESSOR} on ${CMAKE_SYSTEM_NAME}) not supported! Please see Readme for instructions to port.") +endif () + + +add_executable(context_switch + main.cpp + ${SWITCH_ASSEMBLY}) + +# Example for adding the library to your app (as a cmake project dependency) +target_link_libraries(context_switch fcontext context_switcher) diff --git a/app/context_switch/custom_stack_callback_arm32.s b/app/context_switch/custom_stack_callback_arm32.s new file mode 100644 index 0000000..dd81eeb --- /dev/null +++ b/app/context_switch/custom_stack_callback_arm32.s @@ -0,0 +1,15 @@ + .arm + .text + .global custom_stack_callback + .type custom_stack_callback, %function + +.align 4 +custom_stack_callback: + /* r0 new stack adress (passed as parameter) */ + /* r4 temporary for restoring old stack (callee saved, so we get the correct value in case of a return) */ + push {r4, lr} /* store the callee saved register as required and the return address */ + mov r4, sp /* store current stack pointer */ + mov sp, r0 /* update stack pointer to new user level stack */ + bl callback /* enter next tasks (will not return if continuation is stolen) */ + mov sp, r4 /* restore to the old stack pointer */ + pop {r4, pc} /* restore the callee saved register as required and returns */ diff --git a/app/context_switch/custom_stack_callback_x86_64.s b/app/context_switch/custom_stack_callback_x86_64.s new file mode 100644 index 0000000..5a6d71e --- /dev/null +++ b/app/context_switch/custom_stack_callback_x86_64.s @@ -0,0 +1,16 @@ + .file "custom_stack_callback_x86_64.s" + .text + .global custom_stack_callback + .type custom_stack_callback, @function + +.align 16 +custom_stack_callback: + # rdi = new stack adress (passed as parameter) + # r12 temporary for restoring old stack (callee saved, so we get the correct value in case of a return) + push %r12 # store the callee saved register as required + movq %rsp, %r12 # store current stack pointer + movq %rdi, %rsp # update stack pointer to new user level stack + call callback # enter next tasks (will not return if continuation is stolen) + movq %r12, %rsp # restore to the old stack pointer + pop %r12 # restore the callee saved register as required + ret diff --git a/app/context_switch/deboost.context/.gitignore b/app/context_switch/deboost.context/.gitignore new file mode 100644 index 0000000..1d8e736 --- /dev/null +++ b/app/context_switch/deboost.context/.gitignore @@ -0,0 +1,237 @@ +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. + +# User-specific files +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +bld/ +[Bb]in/ +[Oo]bj/ +.build + +# Visual Studio 2015 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUNIT +*.VisualState.xml +TestResult.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# DNX +project.lock.json +artifacts/ + +*_i.c +*_p.c +*_i.h +*.ilk +*.meta +*.obj +*.pch +*.pdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# JustCode is a .NET coding add-in +.JustCode + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# TODO: Comment the next line if you want to checkin your web deploy settings +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# NuGet Packages +*.nupkg +# The packages folder can be ignored because of Package Restore +**/packages/* +# except build/, which is used as an MSBuild target. +!**/packages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/packages/repositories.config +# NuGet v3's project.json files produces more ignoreable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Microsoft Azure ApplicationInsights config file +ApplicationInsights.config + +# Windows Store app package directory +AppPackages/ +BundleArtifacts/ + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.pfx +*.publishsettings +node_modules/ +orleans.codegen.cs + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm + +# SQL Server files +*.mdf +*.ldf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe + +# FAKE - F# Make +.fake/ diff --git a/app/context_switch/deboost.context/CMakeLists.txt b/app/context_switch/deboost.context/CMakeLists.txt new file mode 100644 index 0000000..3a87e38 --- /dev/null +++ b/app/context_switch/deboost.context/CMakeLists.txt @@ -0,0 +1,79 @@ +# PROJECT: fcontext +cmake_minimum_required(VERSION 3.0) +project(fcontext C) + +if (NOT CMAKE_MODULE_PATH) + set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") +endif () + +if (MSVC) + enable_language(CXX ASM_MASM) +else () + enable_language(CXX ASM) +endif () + +if (MSVC) + add_definitions(-D_ITERATOR_DEBUG_LEVEL=0) + add_definitions(-D_HAS_EXCEPTIONS=0) +endif () +add_definitions(-DBOOST_CONTEXT_EXPORT=) + +set(HEADER "include/fcontext/fcontext.h") +set(SOURCES "source/stack.c") + +# OS +if (APPLE) + set(CPU_ARCH "combined") + set(ASM_EXT "all_macho_gas.S") +elseif (ANDROID) + # Android + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") + set(CPU_ARCH "arm") + set(ASM_EXT "aapcs_elf_gas.S") + elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") + set(CPU_ARCH "arm64") + set(ASM_EXT "aapcs_elf_gas.S") + elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "i686") + set(CPU_ARCH "i386") + set(ASM_EXT "sysv_elf_gas.S") + elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64") + set(CPU_ARCH "x86_64") + set(ASM_EXT "sysv_elf_gas.S") + endif () +elseif (UNIX) + # PC (x86/x64) + if (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l") + set(CPU_ARCH "arm") + set(ASM_EXT "aapcs_elf_gas.S") # Untested, but should work for linux/unix + elseif (CMAKE_SIZEOF_VOID_P EQUAL 8) + set(CPU_ARCH "x86_64") + set(ASM_EXT "sysv_elf_gas.S") # Linux/Unix + else () + set(CPU_ARCH "i386") + set(ASM_EXT "sysv_elf_gas.S") # Linux/Unix + endif () +elseif (WIN32) + # Windows PC + if (CMAKE_SIZEOF_VOID_P EQUAL 8) + set(CPU_ARCH "x86_64") + else () + set(CPU_ARCH "i386") + endif () + set(ASM_EXT "ms_pe_masm.asm") +endif () + +set(ASM_SOURCES "asm/make_${CPU_ARCH}_${ASM_EXT}" + "asm/jump_${CPU_ARCH}_${ASM_EXT}" + "asm/ontop_${CPU_ARCH}_${ASM_EXT}") + +add_library(fcontext STATIC ${SOURCES} ${ASM_SOURCES}) +target_include_directories(fcontext + PRIVATE include/fcontext + INTERFACE include) + +set_property(TARGET fcontext PROPERTY INTERPROCEDURAL_OPTIMIZATION FALSE) +set_target_properties(fcontext PROPERTIES FOLDER Deps ${IOS_GENERAL_PROPERTIES}) + +install(TARGETS fcontext DESTINATION lib) +install(FILES ${HEADER} DESTINATION include/fcontext) + diff --git a/app/context_switch/deboost.context/LICENSE b/app/context_switch/deboost.context/LICENSE new file mode 100644 index 0000000..de1d6f2 --- /dev/null +++ b/app/context_switch/deboost.context/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2016 Sepehr Taghdisian + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/app/context_switch/deboost.context/README.md b/app/context_switch/deboost.context/README.md new file mode 100644 index 0000000..6cd1113 --- /dev/null +++ b/app/context_switch/deboost.context/README.md @@ -0,0 +1,33 @@ +## deboost.context +"Deboostified" version of boost.context (coroutines), Plain and simple C API for context switching. Easy build on multiple platforms. + +### Build +#### Currently supported platforms +- Windows (x86_64, Win32) +- Linux (x86_64/x86) +- OSX (x86_64/x86) +- Android (ARM/x86/ARM64/x86_64) +- iOS (Arm64, Arm7, x86_64, i386) + +#### iOS +I've made an extra xcode project files for iOS ```projects/xcode/fcontext``` because I didn't know how to set different ASM files for each ARM architecture in cmake. So If you know how to do it, I'd be happy if you tell me. +So, you can use the included toolchain file or use your own, just define _IOS_ to include the xcode project instead of generating it with cmake. +``` +cd deboost.context +mkdir .build +cd .build +cmake .. -DCMAKE_TOOLCHAIN_FILE=../cmake/ios.toolchain.cmake -G Xcode +``` + + +### Usage +Link your program with fcontext.lib/libfcontext.a and include the file _fcontext.h_. +See _include/fcontext/fcontext.h_ for API usage. +More info is available at: [boost.context](http://www.boost.org/doc/libs/1_60_0/libs/context/doc/html/index.html) + +### Credits +- Boost.context: This library uses the code from boost.context [github](https://github.com/boostorg/context) + +### Thanks +- Ali Salehi [github](https://github.com/lordhippo) + diff --git a/app/context_switch/deboost.context/asm/jump_arm64_aapcs_elf_gas.S b/app/context_switch/deboost.context/asm/jump_arm64_aapcs_elf_gas.S new file mode 100644 index 0000000..7f012e6 --- /dev/null +++ b/app/context_switch/deboost.context/asm/jump_arm64_aapcs_elf_gas.S @@ -0,0 +1,88 @@ +/* + Copyright Edward Nevill + Oliver Kowalke 2015 + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | x19 | x20 | x21 | x22 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | x23 | x24 | x25 | x26 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 0x40| 0x44| 0x48| 0x4c| 0x50| 0x54| 0x58| 0x5c| * + * ------------------------------------------------- * + * | x27 | x28 | FP | LR | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 0x60| 0x64| 0x68| 0x6c| 0x70| 0x74| 0x78| 0x7c| * + * ------------------------------------------------- * + * | PC | align | | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.cpu generic+fp+simd +.text +.align 2 +.global jump_fcontext +.type jump_fcontext, %function +jump_fcontext: + # prepare stack for GP + FPU + sub sp, sp, #0x70 + + # save x19-x30 + stp x19, x20, [sp, #0x00] + stp x21, x22, [sp, #0x10] + stp x23, x24, [sp, #0x20] + stp x25, x26, [sp, #0x30] + stp x27, x28, [sp, #0x40] + stp x29, x30, [sp, #0x50] + + # save LR as PC + str x30, [sp, #0x60] + + # store RSP (pointing to context-data) in X0 + mov x4, sp + + # restore RSP (pointing to context-data) from X1 + mov sp, x0 + + # load x19-x30 + ldp x19, x20, [sp, #0x00] + ldp x21, x22, [sp, #0x10] + ldp x23, x24, [sp, #0x20] + ldp x25, x26, [sp, #0x30] + ldp x27, x28, [sp, #0x40] + ldp x29, x30, [sp, #0x50] + + # return transfer_t from jump + # pass transfer_t as first arg in context function + # X0 == FCTX, X1 == DATA + mov x0, x4 + + # load pc + ldr x4, [sp, #0x60] + + # restore stack from GP + FPU + add sp, sp, #0x70 + + ret x4 +.size jump_fcontext,.-jump_fcontext +# Mark that we don't need executable stack. +.section .note.GNU-stack,"",%progbits diff --git a/app/context_switch/deboost.context/asm/jump_arm64_aapcs_macho_gas.S b/app/context_switch/deboost.context/asm/jump_arm64_aapcs_macho_gas.S new file mode 100644 index 0000000..61512a4 --- /dev/null +++ b/app/context_switch/deboost.context/asm/jump_arm64_aapcs_macho_gas.S @@ -0,0 +1,77 @@ +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | x19 | x20 | x21 | x22 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | x23 | x24 | x25 | x26 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 0x40| 0x44| 0x48| 0x4c| 0x50| 0x54| 0x58| 0x5c| * + * ------------------------------------------------- * + * | x27 | x28 | FP | LR | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 0x60| 0x64| 0x68| 0x6c| 0x70| 0x74| 0x78| 0x7c| * + * ------------------------------------------------- * + * | PC | align | | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.globl _jump_fcontext +.balign 16 +_jump_fcontext: + ; prepare stack for GP + FPU + sub sp, sp, #0x70 + + ; save x19-x30 + stp x19, x20, [sp, #0x00] + stp x21, x22, [sp, #0x10] + stp x23, x24, [sp, #0x20] + stp x25, x26, [sp, #0x30] + stp x27, x28, [sp, #0x40] + stp fp, lr, [sp, #0x50] + + ; save LR as PC + str lr, [sp, #0x60] + + ; store RSP (pointing to context-data) in X0 + mov x4, sp + + ; restore RSP (pointing to context-data) from X1 + mov sp, x0 + + ; load x19-x30 + ldp x19, x20, [sp, #0x00] + ldp x21, x22, [sp, #0x10] + ldp x23, x24, [sp, #0x20] + ldp x25, x26, [sp, #0x30] + ldp x27, x28, [sp, #0x40] + ldp fp, lr, [sp, #0x50] + + ; return transfer_t from jump + ; pass transfer_t as first arg in context function + ; X0 == FCTX, X1 == DATA + mov x0, x4 + + ; load pc + ldr x4, [sp, #0x60] + + ; restore stack from GP + FPU + add sp, sp, #0x70 + + ret x4 diff --git a/app/context_switch/deboost.context/asm/jump_arm_aapcs_elf_gas.S b/app/context_switch/deboost.context/asm/jump_arm_aapcs_elf_gas.S new file mode 100644 index 0000000..db4636c --- /dev/null +++ b/app/context_switch/deboost.context/asm/jump_arm_aapcs_elf_gas.S @@ -0,0 +1,58 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * |hiddn| v1 | v2 | v3 | v4 | v5 | v6 | v7 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | v8 | lr | pc | FCTX| DATA| | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.globl jump_fcontext +.align 2 +.type jump_fcontext,%function +jump_fcontext: + @ save LR as PC + push {lr} + @ save hidden,V1-V8,LR + push {a1,v1-v8,lr} + + @ store RSP (pointing to context-data) in A1 + mov a1, sp + + @ restore RSP (pointing to context-data) from A2 + mov sp, a2 + + @ restore hidden,V1-V8,LR + pop {a4,v1-v8,lr} + + @ return transfer_t from jump + str a1, [a4, #0] + str a3, [a4, #4] + @ pass transfer_t as first arg in context function + @ A1 == FCTX, A2 == DATA + mov a2, a3 + + @ restore PC + pop {pc} +.size jump_fcontext,.-jump_fcontext + +@ Mark that we don't need executable stack. +.section .note.GNU-stack,"",%progbits diff --git a/app/context_switch/deboost.context/asm/jump_arm_aapcs_macho_gas.S b/app/context_switch/deboost.context/asm/jump_arm_aapcs_macho_gas.S new file mode 100644 index 0000000..4868171 --- /dev/null +++ b/app/context_switch/deboost.context/asm/jump_arm_aapcs_macho_gas.S @@ -0,0 +1,67 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | sjlj|hiddn| v1 | v2 | v3 | v4 | v5 | v6 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | v7 | v8 | lr | pc | FCTX| DATA| | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.globl _jump_fcontext +.align 2 +_jump_fcontext: + @ save LR as PC + push {lr} + @ save hidden,V1-V8,LR + push {a1,v1-v8,lr} + + @ locate TLS to save/restore SjLj handler + mrc p15, 0, v2, c13, c0, #3 + bic v2, v2, #3 + + @ load TLS[__PTK_LIBC_DYLD_Unwind_SjLj_Key] + ldr v1, [v2, #8] + @ save SjLj handler + push {v1} + + @ store RSP (pointing to context-data) in A1 + mov a1, sp + + @ restore RSP (pointing to context-data) from A2 + mov sp, a2 + + @ r#estore SjLj handler + pop {v1} + @ store SjLj handler in TLS + str v1, [v2, #8] + + @ restore hidden,V1-V8,LR + pop {a4,v1-v8,lr} + + @ return transfer_t from jump + str a1, [a4, #0] + str a3, [a4, #4] + @ pass transfer_t as first arg in context function + @ A1 == FCTX, A2 == DATA + mov a2, a3 + + @ restore PC + pop {pc} diff --git a/app/context_switch/deboost.context/asm/jump_arm_aapcs_pe_armasm.asm b/app/context_switch/deboost.context/asm/jump_arm_aapcs_pe_armasm.asm new file mode 100644 index 0000000..bca923c --- /dev/null +++ b/app/context_switch/deboost.context/asm/jump_arm_aapcs_pe_armasm.asm @@ -0,0 +1,81 @@ +;/* +; Copyright Oliver Kowalke 2009. +; Distributed under the Boost Software License, Version 1.0. +; (See accompanying file LICENSE_1_0.txt or copy at +; http://www.boost.org/LICENSE_1_0.txt) +;*/ + +; ******************************************************* +; * * +; * ------------------------------------------------- * +; * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * +; * ------------------------------------------------- * +; * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * +; * ------------------------------------------------- * +; * |deall|limit| base|hiddn| v1 | v2 | v3 | v4 | * +; * ------------------------------------------------- * +; * ------------------------------------------------- * +; * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * +; * ------------------------------------------------- * +; * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * +; * ------------------------------------------------- * +; * | v5 | v6 | v7 | v8 | lr | pc | FCTX| DATA| * +; * ------------------------------------------------- * +; * * +; ******************************************************* + + AREA |.text|, CODE + ALIGN 4 + EXPORT jump_fcontext + +jump_fcontext PROC + ; save LR as PC + push {lr} + ; save hidden,V1-V8,LR + push {a1,v1-v8,lr} + + ; load TIB to save/restore thread size and limit. + ; we do not need preserve CPU flag and can use it's arg register + mrc p15, #0, v1, c13, c0, #2 + + ; save current stack base + ldr a5, [v1, #0x04] + push {a5} + ; save current stack limit + ldr a5, [v1, #0x08] + push {a5} + ; save current deallocation stack + ldr a5, [v1, #0xe0c] + push {a5} + + ; store RSP (pointing to context-data) in A1 + mov a1, sp + + ; restore RSP (pointing to context-data) from A2 + mov sp, a2 + + ; restore deallocation stack + pop {a5} + str a5, [v1, #0xe0c] + ; restore stack limit + pop {a5} + str a5, [v1, #0x08] + ; restore stack base + pop {a5} + str a5, [v1, #0x04] + + ; restore hidden,V1-V8,LR + pop {a4,v1-v8,lr} + + ; return transfer_t from jump + str a1, [a4, #0] + str a3, [a4, #4] + ; pass transfer_t as first arg in context function + ; A1 == FCTX, A2 == DATA + mov a2, a3 + + ; restore PC + pop {pc} + + ENDP + END diff --git a/app/context_switch/deboost.context/asm/jump_combined_all_macho_gas.S b/app/context_switch/deboost.context/asm/jump_combined_all_macho_gas.S new file mode 100644 index 0000000..e77d5eb --- /dev/null +++ b/app/context_switch/deboost.context/asm/jump_combined_all_macho_gas.S @@ -0,0 +1,28 @@ +/* + Copyright Sergue E. Leontiev 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +// Stub file for universal binary + +#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) + #if defined(__aarch64__) + #include "jump_arm64_aapcs_macho_gas.S" + #else + #include "jump_arm_aapcs_macho_gas.S" + #endif +#else + #if defined(__i386__) + #include "jump_i386_sysv_macho_gas.S" + #elif defined(__x86_64__) + #include "jump_x86_64_sysv_macho_gas.S" + #elif defined(__ppc__) + #include "jump_ppc32_sysv_macho_gas.S" + #elif defined(__ppc64__) + #include "jump_ppc64_sysv_macho_gas.S" + #else + #error "No arch's" + #endif +#endif \ No newline at end of file diff --git a/app/context_switch/deboost.context/asm/jump_combined_sysv_macho_gas.S b/app/context_switch/deboost.context/asm/jump_combined_sysv_macho_gas.S new file mode 100644 index 0000000..1d27afa --- /dev/null +++ b/app/context_switch/deboost.context/asm/jump_combined_sysv_macho_gas.S @@ -0,0 +1,20 @@ +/* + Copyright Sergue E. Leontiev 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +// Stub file for universal binary + +#if defined(__i386__) + #include "jump_i386_sysv_macho_gas.S" +#elif defined(__x86_64__) + #include "jump_x86_64_sysv_macho_gas.S" +#elif defined(__ppc__) + #include "jump_ppc32_sysv_macho_gas.S" +#elif defined(__ppc64__) + #include "jump_ppc64_sysv_macho_gas.S" +#else + #error "No arch's" +#endif diff --git a/app/context_switch/deboost.context/asm/jump_i386_ms_pe_gas.asm b/app/context_switch/deboost.context/asm/jump_i386_ms_pe_gas.asm new file mode 100644 index 0000000..8c44a73 --- /dev/null +++ b/app/context_switch/deboost.context/asm/jump_i386_ms_pe_gas.asm @@ -0,0 +1,105 @@ +/* + Copyright Oliver Kowalke 2009. + Copyright Thomas Sailer 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/************************************************************************************* +* --------------------------------------------------------------------------------- * +* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * +* --------------------------------------------------------------------------------- * +* | 0h | 04h | 08h | 0ch | 010h | 014h | 018h | 01ch | * +* --------------------------------------------------------------------------------- * +* | fc_strg |fc_deallo| limit | base | fc_seh | EDI | ESI | EBX | * +* --------------------------------------------------------------------------------- * +* --------------------------------------------------------------------------------- * +* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * +* --------------------------------------------------------------------------------- * +* | 020h | 024h | 028h | 02ch | 030h | 034h | 038h | 03ch | * +* --------------------------------------------------------------------------------- * +* | EBP | EIP | to | data | | EH NXT |SEH HNDLR| | * +* --------------------------------------------------------------------------------- * +*************************************************************************************/ + +.file "jump_i386_ms_pe_gas.asm" +.text +.p2align 4,,15 +.globl _jump_fcontext +.def _jump_fcontext; .scl 2; .type 32; .endef +_jump_fcontext: + pushl %ebp /* save EBP */ + pushl %ebx /* save EBX */ + pushl %esi /* save ESI */ + pushl %edi /* save EDI */ + + /* load NT_TIB */ + movl %fs:(0x18), %edx + + /* load current SEH exception list */ + movl (%edx), %eax + push %eax + + /* load current stack base */ + movl 0x04(%edx), %eax + push %eax + + /* load current stack limit */ + movl 0x08(%edx), %eax + push %eax + + /* load current dealloction stack */ + movl 0xe0c(%edx), %eax + push %eax + + /* load fiber local storage */ + movl 0x10(%edx), %eax + push %eax + + /* store ESP (pointing to context-data) in EAX */ + movl %esp, %eax + + /* first arg of jump_fcontext() == fcontext to jump to */ + movl 0x28(%esp), %ecx + + /* restore ESP (pointing to context-data) from EDX */ + movl %ecx, %esp + + /* load NT_TIB into ECX */ + movl %fs:(0x18), %edx + + /* restore fiber local storage */ + popl %ecx + movl %ecx, 0x10(%edx) + + /* restore current deallocation stack */ + popl %ecx + movl %ecx, 0xe0c(%edx) + + /* restore current stack limit */ + popl %ecx + movl %ecx, 0x08(%edx) + + /* restore current stack base */ + popl %ecx + movl %ecx, 0x04(%edx) + + /* restore current SEH exception list */ + popl %ecx + movl %ecx, (%edx) + + popl %edi /* save EDI */ + popl %esi /* save ESI */ + popl %ebx /* save EBX */ + popl %ebp /* save EBP */ + + /* return transfer_t */ + /* FCTX == EAX, DATA == EDX */ + movl 0x2c(%eax), %edx + + /* jump to context */ + ret + +.section .drectve +.ascii " -export:\"jump_fcontext\"" diff --git a/app/context_switch/deboost.context/asm/jump_i386_ms_pe_masm.asm b/app/context_switch/deboost.context/asm/jump_i386_ms_pe_masm.asm new file mode 100644 index 0000000..7d0ebdf --- /dev/null +++ b/app/context_switch/deboost.context/asm/jump_i386_ms_pe_masm.asm @@ -0,0 +1,104 @@ + +; Copyright Oliver Kowalke 2009. +; Distributed under the Boost Software License, Version 1.0. +; (See accompanying file LICENSE_1_0.txt or copy at +; http://www.boost.org/LICENSE_1_0.txt) + +; --------------------------------------------------------------------------------- +; | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | +; --------------------------------------------------------------------------------- +; | 0h | 04h | 08h | 0ch | 010h | 014h | 018h | 01ch | +; --------------------------------------------------------------------------------- +; | fc_strg |fc_deallo| limit | base | fc_seh | EDI | ESI | EBX | +; --------------------------------------------------------------------------------- +; --------------------------------------------------------------------------------- +; | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | +; --------------------------------------------------------------------------------- +; | 020h | 024h | 028h | 02ch | 030h | 034h | 038h | 03ch | +; --------------------------------------------------------------------------------- +; | EBP | EIP | to | data | | EH NXT |SEH HNDLR| | +; --------------------------------------------------------------------------------- + +.386 +.XMM +.model flat, c +.code + +jump_fcontext PROC BOOST_CONTEXT_EXPORT + push ebp ; save EBP + push ebx ; save EBX + push esi ; save ESI + push edi ; save EDI + + assume fs:nothing + ; load NT_TIB into ECX + mov edx, fs:[018h] + assume fs:error + + ; load current SEH exception list + mov eax, [edx] + push eax + + ; load current stack base + mov eax, [edx+04h] + push eax + + ; load current stack limit + mov eax, [edx+08h] + push eax + + ; load current deallocation stack + mov eax, [edx+0e0ch] + push eax + + ; load fiber local storage + mov eax, [edx+010h] + push eax + + ; store ESP (pointing to context-data) in EAX + mov eax, esp + + ; firstarg of jump_fcontext() == fcontext to jump to + mov ecx, [esp+028h] + + ; restore ESP (pointing to context-data) from EAX + mov esp, ecx + + assume fs:nothing + ; load NT_TIB into EDX + mov edx, fs:[018h] + assume fs:error + + ; restore fiber local storage + pop ecx + mov [edx+010h], ecx + + ; restore current deallocation stack + pop ecx + mov [edx+0e0ch], ecx + + ; restore current stack limit + pop ecx + mov [edx+08h], ecx + + ; restore current stack base + pop ecx + mov [edx+04h], ecx + + ; restore current SEH exception list + pop ecx + mov [edx], ecx + + pop edi ; save EDI + pop esi ; save ESI + pop ebx ; save EBX + pop ebp ; save EBP + + ; return transfer_t + ; FCTX == EAX, DATA == EDX + mov edx, [eax+02ch] + + ; jump to context + ret +jump_fcontext ENDP +END diff --git a/app/context_switch/deboost.context/asm/jump_i386_sysv_elf_gas.S b/app/context_switch/deboost.context/asm/jump_i386_sysv_elf_gas.S new file mode 100644 index 0000000..8f10504 --- /dev/null +++ b/app/context_switch/deboost.context/asm/jump_i386_sysv_elf_gas.S @@ -0,0 +1,59 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/***************************************************************************************** + * * + * ----------------------------------------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ----------------------------------------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * + * ----------------------------------------------------------------------------------- * + * | EDI | ESI | EBX | EBP | EIP | hidden | to | data | * + * ----------------------------------------------------------------------------------- * + * * + *****************************************************************************************/ + +.text +.globl jump_fcontext +.align 2 +.type jump_fcontext,@function +jump_fcontext: + pushl %ebp /* save EBP */ + pushl %ebx /* save EBX */ + pushl %esi /* save ESI */ + pushl %edi /* save EDI */ + + /* store fcontext_t in ECX */ + movl %esp, %ecx + + /* first arg of jump_fcontext() == fcontext to jump to */ + movl 0x18(%esp), %eax + + /* second arg of jump_fcontext() == data to be transferred */ + movl 0x1c(%esp), %edx + + /* restore ESP (pointing to context-data) from EAX */ + movl %eax, %esp + + /* address of returned transport_t */ + movl 0x14(%esp), %eax + /* return parent fcontext_t */ + movl %ecx, (%eax) + /* return data */ + movl %edx, 0x4(%eax) + + popl %edi /* restore EDI */ + popl %esi /* restore ESI */ + popl %ebx /* restore EBX */ + popl %ebp /* restore EBP */ + + /* jump to context */ + ret $4 +.size jump_fcontext,.-jump_fcontext + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/app/context_switch/deboost.context/asm/jump_i386_sysv_macho_gas.S b/app/context_switch/deboost.context/asm/jump_i386_sysv_macho_gas.S new file mode 100644 index 0000000..253b25b --- /dev/null +++ b/app/context_switch/deboost.context/asm/jump_i386_sysv_macho_gas.S @@ -0,0 +1,54 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/***************************************************************************************** + * * + * ----------------------------------------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ----------------------------------------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * + * ----------------------------------------------------------------------------------- * + * | EDI | ESI | EBX | EBP | EIP | hidden | to | data | * + * ----------------------------------------------------------------------------------- * + * * + *****************************************************************************************/ + +.text +.globl _jump_fcontext +.align 2 +_jump_fcontext: + pushl %ebp /* save EBP */ + pushl %ebx /* save EBX */ + pushl %esi /* save ESI */ + pushl %edi /* save EDI */ + + /* store fcontext_t in ECX */ + movl %esp, %ecx + + /* first arg of jump_fcontext() == context jumping to */ + movl 0x18(%esp), %eax + + /* second arg of jump_fcontext() == data to be transferred */ + movl 0x1c(%esp), %edx + + /* restore ESP (pointing to context-data) from EAX */ + movl %eax, %esp + + /* address of returned transport_t */ + movl 0x14(%esp), %eax + /* return parent fcontext_t */ + movl %ecx, (%eax) + /* return data */ + movl %edx, 0x4(%eax) + + popl %edi /* restore EDI */ + popl %esi /* restore ESI */ + popl %ebx /* restore EBX */ + popl %ebp /* restore EBP */ + + /* jump to context */ + ret $4 diff --git a/app/context_switch/deboost.context/asm/jump_i386_x86_64_sysv_macho_gas.S b/app/context_switch/deboost.context/asm/jump_i386_x86_64_sysv_macho_gas.S new file mode 100644 index 0000000..959ddac --- /dev/null +++ b/app/context_switch/deboost.context/asm/jump_i386_x86_64_sysv_macho_gas.S @@ -0,0 +1,16 @@ +/* + Copyright Sergue E. Leontiev 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +// Stub file for universal binary + +#if defined(__i386__) + #include "jump_i386_sysv_macho_gas.S" +#elif defined(__x86_64__) + #include "jump_x86_64_sysv_macho_gas.S" +#else + #error "No arch's" +#endif diff --git a/app/context_switch/deboost.context/asm/jump_mips32_o32_elf_gas.S b/app/context_switch/deboost.context/asm/jump_mips32_o32_elf_gas.S new file mode 100644 index 0000000..ef12864 --- /dev/null +++ b/app/context_switch/deboost.context/asm/jump_mips32_o32_elf_gas.S @@ -0,0 +1,86 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | S0 | S1 | S2 | S3 | S4 | S5 | S6 | S7 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | FP |hiddn| RA | PC | GP | FCTX| DATA| | * + * ------------------------------------------------- * + * * + * *****************************************************/ + +.text +.globl jump_fcontext +.align 2 +.type jump_fcontext,@function +.ent jump_fcontext +jump_fcontext: + # reserve space on stack + addiu $sp, $sp, -112 + + sw $s0, ($sp) # save S0 + sw $s1, 4($sp) # save S1 + sw $s2, 8($sp) # save S2 + sw $s3, 12($sp) # save S3 + sw $s4, 16($sp) # save S4 + sw $s5, 20($sp) # save S5 + sw $s6, 24($sp) # save S6 + sw $s7, 28($sp) # save S7 + sw $fp, 32($sp) # save FP + sw $a0, 36($sp) # save hidden, address of returned transfer_t + sw $ra, 40($sp) # save RA + sw $ra, 44($sp) # save RA as PC + + # store SP (pointing to context-data) in A0 + move $a0, $sp + + # restore SP (pointing to context-data) from A1 + move $sp, $a1 + + lw $s0, ($sp) # restore S0 + lw $s1, 4($sp) # restore S1 + lw $s2, 8($sp) # restore S2 + lw $s3, 12($sp) # restore S3 + lw $s4, 16($sp) # restore S4 + lw $s5, 20($sp) # restore S5 + lw $s6, 24($sp) # restore S6 + lw $s7, 28($sp) # restore S7 + lw $fp, 32($sp) # restore FP + lw $t0, 36($sp) # restore hidden, address of returned transfer_t + lw $ra, 40($sp) # restore RA + + # load PC + lw $t9, 44($sp) + + # adjust stack + addiu $sp, $sp, 112 + + # return transfer_t from jump + sw $a0, ($t0) # fctx of transfer_t + sw $a1, 4($t0) # data of transfer_t + # pass transfer_t as first arg in context function + # A0 == fctx, A1 == data + move $a1, $a2 + + # jump to context + jr $t9 +.end jump_fcontext +.size jump_fcontext, .-jump_fcontext + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/app/context_switch/deboost.context/asm/jump_ppc32_ppc64_sysv_macho_gas.S b/app/context_switch/deboost.context/asm/jump_ppc32_ppc64_sysv_macho_gas.S new file mode 100644 index 0000000..f175e31 --- /dev/null +++ b/app/context_switch/deboost.context/asm/jump_ppc32_ppc64_sysv_macho_gas.S @@ -0,0 +1,16 @@ +/* + Copyright Sergue E. Leontiev 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +// Stub file for universal binary + +#if defined(__ppc__) + #include "jump_ppc32_sysv_macho_gas.S" +#elif defined(__ppc64__) + #include "jump_ppc64_sysv_macho_gas.S" +#else + #error "No arch's" +#endif diff --git a/app/context_switch/deboost.context/asm/jump_ppc32_sysv_elf_gas.S b/app/context_switch/deboost.context/asm/jump_ppc32_sysv_elf_gas.S new file mode 100644 index 0000000..6c03053 --- /dev/null +++ b/app/context_switch/deboost.context/asm/jump_ppc32_sysv_elf_gas.S @@ -0,0 +1,129 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | R13 | R14 | R15 | R16 | R17 | R18 | R19 | R20 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | R21 | R22 | R23 | R24 | R25 | R26 | R27 | R28 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | R29 | R30 | R31 |hiddn| CR | LR | PC | FCTX| * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | DATA| | | | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.globl jump_fcontext +.align 2 +.type jump_fcontext,@function +jump_fcontext: + # reserve space on stack + subi %r1, %r1, 92 + + stw %r13, 0(%r1) # save R13 + stw %r14, 4(%r1) # save R14 + stw %r15, 8(%r1) # save R15 + stw %r16, 12(%r1) # save R16 + stw %r17, 16(%r1) # save R17 + stw %r18, 20(%r1) # save R18 + stw %r19, 24(%r1) # save R19 + stw %r20, 28(%r1) # save R20 + stw %r21, 32(%r1) # save R21 + stw %r22, 36(%r1) # save R22 + stw %r23, 40(%r1) # save R23 + stw %r24, 44(%r1) # save R24 + stw %r25, 48(%r1) # save R25 + stw %r26, 52(%r1) # save R26 + stw %r27, 56(%r1) # save R27 + stw %r28, 60(%r1) # save R28 + stw %r29, 64(%r1) # save R29 + stw %r30, 68(%r1) # save R30 + stw %r31, 72(%r1) # save R31 + stw %r3, 76(%r1) # save hidden + + # save CR + mfcr %r0 + stw %r0, 80(%r1) + # save LR + mflr %r0 + stw %r0, 84(%r1) + # save LR as PC + stw %r0, 88(%r1) + + # store RSP (pointing to context-data) in R6 + mr %r6, %r1 + + # restore RSP (pointing to context-data) from R4 + mr %r1, %r4 + + lwz %r13, 0(%r1) # restore R13 + lwz %r14, 4(%r1) # restore R14 + lwz %r15, 8(%r1) # restore R15 + lwz %r16, 12(%r1) # restore R16 + lwz %r17, 16(%r1) # restore R17 + lwz %r18, 20(%r1) # restore R18 + lwz %r19, 24(%r1) # restore R19 + lwz %r20, 28(%r1) # restore R20 + lwz %r21, 32(%r1) # restore R21 + lwz %r22, 36(%r1) # restore R22 + lwz %r23, 40(%r1) # restore R23 + lwz %r24, 44(%r1) # restore R24 + lwz %r25, 48(%r1) # restore R25 + lwz %r26, 52(%r1) # restore R26 + lwz %r27, 56(%r1) # restore R27 + lwz %r28, 60(%r1) # restore R28 + lwz %r29, 64(%r1) # restore R29 + lwz %r30, 68(%r1) # restore R30 + lwz %r31, 72(%r1) # restore R31 + lwz %r3, 76(%r1) # restore hidden + + # restore CR + lwz %r0, 80(%r1) + mtcr %r0 + # restore LR + lwz %r0, 84(%r1) + mtlr %r0 + # load PC + lwz %r0, 88(%r1) + # restore CTR + mtctr %r0 + + # adjust stack + addi %r1, %r1, 92 + + # return transfer_t + stw %r6, 0(%r3) + stw %r5, 4(%r3) + + # jump to context + bctr +.size jump_fcontext, .-jump_fcontext + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/app/context_switch/deboost.context/asm/jump_ppc32_sysv_macho_gas.S b/app/context_switch/deboost.context/asm/jump_ppc32_sysv_macho_gas.S new file mode 100644 index 0000000..ec1c3ae --- /dev/null +++ b/app/context_switch/deboost.context/asm/jump_ppc32_sysv_macho_gas.S @@ -0,0 +1,125 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | R13 | R14 | R15 | R16 | R17 | R18 | R19 | R20 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | R21 | R22 | R23 | R24 | R25 | R26 | R27 | R28 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | R29 | R30 | R31 |hiddn| CR | LR | PC | FCTX| * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | DATA| | | | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.globl _jump_fcontext +.align 2 +_jump_fcontext: + ; reserve space on stack + subi r1, r1, 92 + + stw r13, 0(r1) # save R13 + stw r14, 4(r1) # save R14 + stw r15, 8(r1) # save R15 + stw r16, 12(r1) # save R16 + stw r17, 16(r1) # save R17 + stw r18, 20(r1) # save R18 + stw r19, 24(r1) # save R19 + stw r20, 28(r1) # save R20 + stw r21, 32(r1) # save R21 + stw r22, 36(r1) # save R22 + stw r23, 40(r1) # save R23 + stw r24, 44(r1) # save R24 + stw r25, 48(r1) # save R25 + stw r26, 52(r1) # save R26 + stw r27, 56(r1) # save R27 + stw r28, 60(r1) # save R28 + stw r29, 64(r1) # save R29 + stw r30, 68(r1) # save R30 + stw r31, 72(r1) # save R31 + stw r3, 76(r1) # save hidden + + # save CR + mfcr r0 + stw r0, 80(r1) + # save LR + mflr r0 + stw r0, 84(r1) + # save LR as PC + stw r0, 88(r1) + + # store RSP (pointing to context-data) in R6 + mr r6, r1 + + # restore RSP (pointing to context-data) from R4 + mr r1, r4 + + lwz r13, 0(r1) # restore R13 + lwz r14, 4(r1) # restore R14 + lwz r15, 8(r1) # restore R15 + lwz r16, 12(r1) # restore R16 + lwz r17, 16(r1) # restore R17 + lwz r18, 20(r1) # restore R18 + lwz r19, 24(r1) # restore R19 + lwz r20, 28(r1) # restore R20 + lwz r21, 32(r1) # restore R21 + lwz r22, 36(r1) # restore R22 + lwz r23, 40(r1) # restore R23 + lwz r24, 44(r1) # restore R24 + lwz r25, 48(r1) # restore R25 + lwz r26, 52(r1) # restore R26 + lwz r27, 56(r1) # restore R27 + lwz r28, 60(r1) # restore R28 + lwz r29, 64(r1) # restore R29 + lwz r30, 68(r1) # restore R30 + lwz r31, 72(r1) # restore R31 + lwz r3, 76(r1) # restore hidden + + # restore CR + lwz r0, 80(r1) + mtcr r0 + # restore LR + lwz r0, 84(r1) + mtlr r0 + + # load PC + lwz r0, 88(r1) + # restore CTR + mtctr r0 + + # adjust stack + addi r1, r1, 92 + + # return transfer_t + stw r6, 0(r3) + stw r5, 4(r3) + + # jump to context + bctr diff --git a/app/context_switch/deboost.context/asm/jump_ppc32_sysv_xcoff_gas.S b/app/context_switch/deboost.context/asm/jump_ppc32_sysv_xcoff_gas.S new file mode 100644 index 0000000..22bcb41 --- /dev/null +++ b/app/context_switch/deboost.context/asm/jump_ppc32_sysv_xcoff_gas.S @@ -0,0 +1,88 @@ +.globl .jump_fcontext +.globl jump_fcontext[DS] +.align 2 +.csect jump_fcontext[DS] +jump_fcontext: + .long .jump_fcontext +.jump_fcontext: + # reserve space on stack + subi 1, 1, 92 + + stw 13, 0(1) # save R13 + stw 14, 4(1) # save R14 + stw 15, 8(1) # save R15 + stw 16, 12(1) # save R16 + stw 17, 16(1) # save R17 + stw 18, 20(1) # save R18 + stw 19, 24(1) # save R19 + stw 20, 28(1) # save R20 + stw 21, 32(1) # save R21 + stw 22, 36(1) # save R22 + stw 23, 40(1) # save R23 + stw 24, 44(1) # save R24 + stw 25, 48(1) # save R25 + stw 26, 52(1) # save R26 + stw 27, 56(1) # save R27 + stw 28, 60(1) # save R28 + stw 29, 64(1) # save R29 + stw 30, 68(1) # save R30 + stw 31, 72(1) # save R31 + stw 3, 76(1) # save hidden + + # save CR + mfcr 0 + stw 0, 80(1) + # save LR + mflr 0 + stw 0, 84(1) + # save LR as PC + stw 0, 88(1) + + # store RSP (pointing to context-data) in R6 + mr 6, 1 + + # restore RSP (pointing to context-data) from R4 + mr 1, 4 + + lwz 13, 0(1) # restore R13 + lwz 14, 4(1) # restore R14 + lwz 15, 8(1) # restore R15 + lwz 16, 12(1) # restore R16 + lwz 17, 16(1) # restore R17 + lwz 18, 20(1) # restore R18 + lwz 19, 24(1) # restore R19 + lwz 20, 28(1) # restore R20 + lwz 21, 32(1) # restore R21 + lwz 22, 36(1) # restore R22 + lwz 23, 40(1) # restore R23 + lwz 24, 44(1) # restore R24 + lwz 25, 48(1) # restore R25 + lwz 26, 52(1) # restore R26 + lwz 27, 56(1) # restore R27 + lwz 28, 60(1) # restore R28 + lwz 29, 64(1) # restore R29 + lwz 30, 68(1) # restore R30 + lwz 31, 72(1) # restore R31 + lwz 3, 76(1) # restore hidden + + # restore CR + lwz 0, 80(1) + mtcr 0 + # restore LR + lwz 0, 84(1) + mtlr 0 + + # load PC + lwz 0, 88(1) + # restore CTR + mtctr 0 + + # adjust stack + addi 1, 1, 92 + + # return transfer_t + stw 6, 0(3) + stw 5, 4(3) + + # jump to context + bctr diff --git a/app/context_switch/deboost.context/asm/jump_ppc64_sysv_elf_gas.S b/app/context_switch/deboost.context/asm/jump_ppc64_sysv_elf_gas.S new file mode 100644 index 0000000..36d2736 --- /dev/null +++ b/app/context_switch/deboost.context/asm/jump_ppc64_sysv_elf_gas.S @@ -0,0 +1,195 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | TOC | R14 | R15 | R16 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | R17 | R18 | R19 | R20 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | R21 | R22 | R23 | R24 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | R25 | R26 | R27 | R28 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | * + * ------------------------------------------------- * + * | R29 | R30 | R31 | hidden | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | * + * ------------------------------------------------- * + * | 160 | 164 | 168 | 172 | 176 | 180 | 184 | 188 | * + * ------------------------------------------------- * + * | CR | LR | PC | back-chain| * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | * + * ------------------------------------------------- * + * | 192 | 196 | 200 | 204 | 208 | 212 | 216 | 220 | * + * ------------------------------------------------- * + * | cr saved | lr saved | compiler | linker | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | * + * ------------------------------------------------- * + * | 224 | 228 | 232 | 236 | 240 | 244 | 248 | 252 | * + * ------------------------------------------------- * + * | TOC saved | FCTX | DATA | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.globl jump_fcontext +#if _CALL_ELF == 2 + .text + .align 2 +jump_fcontext: + addis %r2, %r12, .TOC.-jump_fcontext@ha + addi %r2, %r2, .TOC.-jump_fcontext@l + .localentry jump_fcontext, . - jump_fcontext +#else + .section ".opd","aw" + .align 3 +jump_fcontext: +# ifdef _CALL_LINUX + .quad .L.jump_fcontext,.TOC.@tocbase,0 + .type jump_fcontext,@function + .text + .align 2 +.L.jump_fcontext: +# else + .hidden .jump_fcontext + .globl .jump_fcontext + .quad .jump_fcontext,.TOC.@tocbase,0 + .size jump_fcontext,24 + .type .jump_fcontext,@function + .text + .align 2 +.jump_fcontext: +# endif +#endif + # reserve space on stack + subi %r1, %r1, 184 + +#if _CALL_ELF != 2 + std %r2, 0(%r1) # save TOC +#endif + std %r14, 8(%r1) # save R14 + std %r15, 16(%r1) # save R15 + std %r16, 24(%r1) # save R16 + std %r17, 32(%r1) # save R17 + std %r18, 40(%r1) # save R18 + std %r19, 48(%r1) # save R19 + std %r20, 56(%r1) # save R20 + std %r21, 64(%r1) # save R21 + std %r22, 72(%r1) # save R22 + std %r23, 80(%r1) # save R23 + std %r24, 88(%r1) # save R24 + std %r25, 96(%r1) # save R25 + std %r26, 104(%r1) # save R26 + std %r27, 112(%r1) # save R27 + std %r29, 120(%r1) # save R28 + std %r29, 128(%r1) # save R29 + std %r30, 136(%r1) # save R30 + std %r31, 144(%r1) # save R31 + std %r3, 152(%r1) # save hidden + + # save CR + mfcr %r0 + std %r0, 160(%r1) + # save LR + mflr %r0 + std %r0, 168(%r1) + # save LR as PC + std %r0, 176(%r1) + + # store RSP (pointing to context-data) in R6 + mr %r6, %r1 + + # restore RSP (pointing to context-data) from R4 + mr %r1, %r4 + +#if _CALL_ELF != 2 + ld %r2, 0(%r1) # restore TOC +#endif + ld %r14, 8(%r1) # restore R14 + ld %r15, 16(%r1) # restore R15 + ld %r16, 24(%r1) # restore R16 + ld %r17, 32(%r1) # restore R17 + ld %r18, 40(%r1) # restore R18 + ld %r19, 48(%r1) # restore R19 + ld %r20, 56(%r1) # restore R20 + ld %r21, 64(%r1) # restore R21 + ld %r22, 72(%r1) # restore R22 + ld %r23, 80(%r1) # restore R23 + ld %r24, 88(%r1) # restore R24 + ld %r25, 96(%r1) # restore R25 + ld %r26, 104(%r1) # restore R26 + ld %r27, 112(%r1) # restore R27 + ld %r28, 120(%r1) # restore R28 + ld %r29, 128(%r1) # restore R29 + ld %r30, 136(%r1) # restore R30 + ld %r31, 144(%r1) # restore R31 + ld %r3, 152(%r1) # restore hidden + + # restore CR + ld %r0, 160(%r1) + mtcr %r0 + # restore LR + ld %r0, 168(%r1) + mtlr %r0 + + # load PC + ld %r12, 176(%r1) + # restore CTR + mtctr %r12 + + # adjust stack + addi %r1, %r1, 184 + + # return transfer_t + std %r6, 0(%r3) + std %r5, 8(%r3) + + # jump to context + bctr +#if _CALL_ELF == 2 + .size jump_fcontext, .-jump_fcontext +#else +# ifdef _CALL_LINUX + .size .jump_fcontext, .-.L.jump_fcontext +# else + .size .jump_fcontext, .-.jump_fcontext +# endif +#endif + + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/app/context_switch/deboost.context/asm/jump_ppc64_sysv_macho_gas.S b/app/context_switch/deboost.context/asm/jump_ppc64_sysv_macho_gas.S new file mode 100644 index 0000000..478f19a --- /dev/null +++ b/app/context_switch/deboost.context/asm/jump_ppc64_sysv_macho_gas.S @@ -0,0 +1,152 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | TOC | R14 | R15 | R16 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | R17 | R18 | R19 | R20 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | R21 | R22 | R23 | R24 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | R25 | R26 | R27 | R28 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | * + * ------------------------------------------------- * + * | R29 | R30 | R31 | hidden | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | * + * ------------------------------------------------- * + * | 160 | 164 | 168 | 172 | 176 | 180 | 184 | 188 | * + * ------------------------------------------------- * + * | CR | LR | PC | back-chain| * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | * + * ------------------------------------------------- * + * | 192 | 196 | 200 | 204 | 208 | 212 | 216 | 220 | * + * ------------------------------------------------- * + * | cr saved | lr saved | compiler | linker | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | * + * ------------------------------------------------- * + * | 224 | 228 | 232 | 236 | 240 | 244 | 248 | 252 | * + * ------------------------------------------------- * + * | TOC saved | FCTX | DATA | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.align 2 +.globl jump_fcontext + +_jump_fcontext: + ; reserve space on stack + subi r1, r1, 184 + + std %r14, 8(%r1) ; save R14 + std %r15, 16(%r1) ; save R15 + std %r16, 24(%r1) ; save R16 + std %r17, 32(%r1) ; save R17 + std %r18, 40(%r1) ; save R18 + std %r19, 48(%r1) ; save R19 + std %r20, 56(%r1) ; save R20 + std %r21, 64(%r1) ; save R21 + std %r22, 72(%r1) ; save R22 + std %r23, 80(%r1) ; save R23 + std %r24, 88(%r1) ; save R24 + std %r25, 96(%r1) ; save R25 + std %r26, 104(%r1) ; save R26 + std %r27, 112(%r1) ; save R27 + std %r29, 120(%r1) ; save R28 + std %r29, 128(%r1) ; save R29 + std %r30, 136(%r1) ; save R30 + std %r31, 144(%r1) ; save R31 + std %r3, 152(%r1) ; save hidden + + ; save CR + mfcr r0 + std r0, 160(r1) + ; save LR + mflr r0 + std r0, 168(r1) + ; save LR as PC + std r0, 176(r1) + + ; store RSP (pointing to context-data) in R6 + mr %r6, %r1 + + ; restore RSP (pointing to context-data) from R4 + mr r1, r4 + + ld %r14, 8(%r1) ; restore R14 + ld %r15, 16(%r1) ; restore R15 + ld %r16, 24(%r1) ; restore R16 + ld %r17, 32(%r1) ; restore R17 + ld %r18, 40(%r1) ; restore R18 + ld %r19, 48(%r1) ; restore R19 + ld %r20, 56(%r1) ; restore R20 + ld %r21, 64(%r1) ; restore R21 + ld %r22, 72(%r1) ; restore R22 + ld %r23, 80(%r1) ; restore R23 + ld %r24, 88(%r1) ; restore R24 + ld %r25, 96(%r1) ; restore R25 + ld %r26, 104(%r1) ; restore R26 + ld %r27, 112(%r1) ; restore R27 + ld %r28, 120(%r1) ; restore R28 + ld %r29, 128(%r1) ; restore R29 + ld %r30, 136(%r1) ; restore R30 + ld %r31, 144(%r1) ; restore R31 + ld %r3, 152(%r1) ; restore hidden + + ; restore CR + ld r0, 160(r1) + mtcr r0 + ; restore LR + ld r0, 168(r1) + mtlr r0 + + ; load PC + ld r0, 176(r1) + ; restore CTR + mtctr r0 + + ; adjust stack + addi r1, r1, 184 + + ; return transfer_t + std %r6, 0(%r3) + std %r5, 8(%r3) + + ; jump to context + bctr diff --git a/app/context_switch/deboost.context/asm/jump_ppc64_sysv_xcoff_gas.S b/app/context_switch/deboost.context/asm/jump_ppc64_sysv_xcoff_gas.S new file mode 100644 index 0000000..013433f --- /dev/null +++ b/app/context_switch/deboost.context/asm/jump_ppc64_sysv_xcoff_gas.S @@ -0,0 +1,84 @@ +.align 2 +.globl .jump_fcontext +.jump_fcontext: + # reserve space on stack + subi 1, 1, 184 + + std 13, 0(1) # save R13 + std 14, 8(1) # save R14 + std 15, 16(1) # save R15 + std 16, 24(1) # save R16 + std 17, 32(1) # save R17 + std 18, 40(1) # save R18 + std 19, 48(1) # save R19 + std 20, 56(1) # save R20 + std 21, 64(1) # save R21 + std 22, 72(1) # save R22 + std 23, 80(1) # save R23 + std 24, 88(1) # save R24 + std 25, 96(1) # save R25 + std 26, 104(1) # save R26 + std 27, 112(1) # save R27 + std 29, 120(1) # save R28 + std 29, 128(1) # save R29 + std 30, 136(1) # save R30 + std 31, 144(1) # save R31 + std 3, 152(1) # save hidden + + # save CR + mfcr 0 + std 0, 160(1) + # save LR + mflr 0 + std 0, 168(1) + # save LR as PC + std 0, 176(1) + + # store RSP (pointing to context-data) in R6 + mr 6, 1 + + # restore RSP (pointing to context-data) from R4 + mr 1, 4 + + ld 13, 0(1) # restore R13 + ld 14, 8(1) # restore R14 + ld 15, 16(1) # restore R15 + ld 16, 24(1) # restore R16 + ld 17, 32(1) # restore R17 + ld 18, 40(1) # restore R18 + ld 19, 48(1) # restore R19 + ld 20, 56(1) # restore R20 + ld 21, 64(1) # restore R21 + ld 22, 72(1) # restore R22 + ld 23, 80(1) # restore R23 + ld 24, 88(1) # restore R24 + ld 25, 96(1) # restore R25 + ld 26, 104(1) # restore R26 + ld 27, 112(1) # restore R27 + ld 28, 120(1) # restore R28 + ld 29, 128(1) # restore R29 + ld 30, 136(1) # restore R30 + ld 31, 144(1) # restore R31 + ld 3, 152(1) # restore hidden + + # restore CR + ld 0, 160(1) + mtcr 0 + # restore LR + ld 0, 168(1) + mtlr 0 + + # load PC + ld 0, 176(1) + # restore CTR + mtctr 0 + + # adjust stack + addi 1, 1, 184 + + # return transfer_t + std 6, 0(3) + std 5, 8(3) + + # jump to context + bctr diff --git a/app/context_switch/deboost.context/asm/jump_x86_64_ms_pe_gas.asm b/app/context_switch/deboost.context/asm/jump_x86_64_ms_pe_gas.asm new file mode 100644 index 0000000..d1ddccd --- /dev/null +++ b/app/context_switch/deboost.context/asm/jump_x86_64_ms_pe_gas.asm @@ -0,0 +1,140 @@ +/* + Copyright Oliver Kowalke 2009. + Copyright Thomas Sailer 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/************************************************************************************** + * * + * ---------------------------------------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ---------------------------------------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * + * ---------------------------------------------------------------------------------- * + * | fbr_strg | fc_dealloc | limit | base | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ---------------------------------------------------------------------------------- * + * | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | * + * ---------------------------------------------------------------------------------- * + * | R12 | R13 | R14 | R15 | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ---------------------------------------------------------------------------------- * + * | 0xe40 | 0x44 | 0x48 | 0x4c | 0x50 | 0x54 | 0x58 | 0x5c | * + * ---------------------------------------------------------------------------------- * + * | RDI | RSI | RBX | RBP | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ---------------------------------------------------------------------------------- * + * | 0x60 | 0x64 | 0x68 | 0x6c | 0x70 | 0x74 | 0x78 | 0x7c | * + * ---------------------------------------------------------------------------------- * + * | hidden | RIP | EXIT | parameter area | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 32 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | * + * ---------------------------------------------------------------------------------- * + * | 0x80 | 0x84 | 0x88 | 0x8c | 0x90 | 0x94 | 0x98 | 0x9c | * + * ---------------------------------------------------------------------------------- * + * | parameter area | FCTX | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | * + * ---------------------------------------------------------------------------------- * + * | 0xa0 | 0xa4 | 0xa8 | 0xac | 0xb0 | 0xb4 | 0xb8 | 0xbc | * + * ---------------------------------------------------------------------------------- * + * | DATA | | | | * + * ---------------------------------------------------------------------------------- * + * * + * ***********************************************************************************/ + +.file "jump_x86_64_ms_pe_gas.asm" +.text +.p2align 4,,15 +.globl jump_fcontext +.def jump_fcontext; .scl 2; .type 32; .endef +.seh_proc jump_fcontext +jump_fcontext: +.seh_endprologue + + pushq %rcx /* save hidden address of transport_t */ + + pushq %rbp /* save RBP */ + pushq %rbx /* save RBX */ + pushq %rsi /* save RSI */ + pushq %rdi /* save RDI */ + pushq %r15 /* save R15 */ + pushq %r14 /* save R14 */ + pushq %r13 /* save R13 */ + pushq %r12 /* save R12 */ + + /* load NT_TIB */ + movq %gs:(0x30), %r10 + /* save current stack base */ + movq 0x08(%r10), %rax + pushq %rax + /* save current stack limit */ + movq 0x10(%r10), %rax + pushq %rax + /* save current deallocation stack */ + movq 0x1478(%r10), %rax + pushq %rax + /* save fiber local storage */ + movq 0x18(%r10), %rax + pushq %rax + + /* preserve RSP (pointing to context-data) in R9 */ + movq %rsp, %r9 + + /* restore RSP (pointing to context-data) from RDX */ + movq %rdx, %rsp + + /* load NT_TIB */ + movq %gs:(0x30), %r10 + /* restore fiber local storage */ + popq %rax + movq %rax, 0x18(%r10) + /* restore deallocation stack */ + popq %rax + movq %rax, 0x1478(%r10) + /* restore stack limit */ + popq %rax + movq %rax, 0x10(%r10) + /* restore stack base */ + popq %rax + movq %rax, 0x8(%r10) + + popq %r12 /* restore R12 */ + popq %r13 /* restore R13 */ + popq %r14 /* restore R14 */ + popq %r15 /* restore R15 */ + popq %rdi /* restore RDI */ + popq %rsi /* restore RSI */ + popq %rbx /* restore RBX */ + popq %rbp /* restore RBP */ + + popq %rax /* restore hidden address of transport_t */ + + /* restore return-address */ + popq %r10 + + /* transport_t returned in RAX */ + /* return parent fcontext_t */ + movq %r9, (%rax) + /* return data */ + movq %r8, 0x8(%rax) + + /* transport_t as 1.arg of context-function */ + movq %rax, %rcx + + /* indirect jump to context */ + jmp *%r10 +.seh_endproc + +.section .drectve +.ascii " -export:\"jump_fcontext\"" diff --git a/app/context_switch/deboost.context/asm/jump_x86_64_ms_pe_masm.asm b/app/context_switch/deboost.context/asm/jump_x86_64_ms_pe_masm.asm new file mode 100644 index 0000000..a623fed --- /dev/null +++ b/app/context_switch/deboost.context/asm/jump_x86_64_ms_pe_masm.asm @@ -0,0 +1,128 @@ + +; Copyright Oliver Kowalke 2009. +; Distributed under the Boost Software License, Version 1.0. +; (See accompanying file LICENSE_1_0.txt or copy at +; http://www.boost.org/LICENSE_1_0.txt) + +; ---------------------------------------------------------------------------------- +; | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | +; ---------------------------------------------------------------------------------- +; | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | +; ---------------------------------------------------------------------------------- +; | fbr_strg | fc_dealloc | limit | base | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | +; ---------------------------------------------------------------------------------- +; | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | +; ---------------------------------------------------------------------------------- +; | R12 | R13 | R14 | R15 | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | +; ---------------------------------------------------------------------------------- +; | 0xe40 | 0x44 | 0x48 | 0x4c | 0x50 | 0x54 | 0x58 | 0x5c | +; ---------------------------------------------------------------------------------- +; | RDI | RSI | RBX | RBP | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | +; ---------------------------------------------------------------------------------- +; | 0x60 | 0x64 | 0x68 | 0x6c | 0x70 | 0x74 | 0x78 | 0x7c | +; ---------------------------------------------------------------------------------- +; | hidden | RIP | EXIT | parameter area | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 32 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | +; ---------------------------------------------------------------------------------- +; | 0x80 | 0x84 | 0x88 | 0x8c | 0x90 | 0x94 | 0x98 | 0x9c | +; ---------------------------------------------------------------------------------- +; | parameter area | FCTX | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | +; ---------------------------------------------------------------------------------- +; | 0xa0 | 0xa4 | 0xa8 | 0xac | 0xb0 | 0xb4 | 0xb8 | 0xbc | +; ---------------------------------------------------------------------------------- +; | DATA | | | | +; ---------------------------------------------------------------------------------- + +.code + +jump_fcontext PROC BOOST_CONTEXT_EXPORT FRAME + .endprolog + + push rcx ; save hidden address of transport_t + + push rbp ; save RBP + push rbx ; save RBX + push rsi ; save RSI + push rdi ; save RDI + push r15 ; save R15 + push r14 ; save R14 + push r13 ; save R13 + push r12 ; save R12 + + ; load NT_TIB + mov r10, gs:[030h] + ; save current stack base + mov rax, [r10+08h] + push rax + ; save current stack limit + mov rax, [r10+010h] + push rax + ; save current deallocation stack + mov rax, [r10+01478h] + push rax + ; save fiber local storage + mov rax, [r10+018h] + push rax + + ; preserve RSP (pointing to context-data) in R9 + mov r9, rsp + + ; restore RSP (pointing to context-data) from RDX + mov rsp, rdx + + ; load NT_TIB + mov r10, gs:[030h] + ; restore fiber local storage + pop rax + mov [r10+018h], rax + ; restore deallocation stack + pop rax + mov [r10+01478h], rax + ; restore stack limit + pop rax + mov [r10+010h], rax + ; restore stack base + pop rax + mov [r10+08h], rax + + pop r12 ; restore R12 + pop r13 ; restore R13 + pop r14 ; restore R14 + pop r15 ; restore R15 + pop rdi ; restore RDI + pop rsi ; restore RSI + pop rbx ; restore RBX + pop rbp ; restore RBP + + pop rax ; restore hidden address of transport_t + + ; restore return-address + pop r10 + + ; transport_t returned in RAX + ; return parent fcontext_t + mov [rax], r9 + ; return data + mov [rax+08h], r8 + + ; transport_t as 1.arg of context-function + mov rcx, rax + + ; indirect jump to context + jmp r10 +jump_fcontext ENDP +END diff --git a/app/context_switch/deboost.context/asm/jump_x86_64_sysv_elf_gas.S b/app/context_switch/deboost.context/asm/jump_x86_64_sysv_elf_gas.S new file mode 100644 index 0000000..8403c88 --- /dev/null +++ b/app/context_switch/deboost.context/asm/jump_x86_64_sysv_elf_gas.S @@ -0,0 +1,67 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/**************************************************************************************** + * * + * ---------------------------------------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ---------------------------------------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * + * ---------------------------------------------------------------------------------- * + * | R12 | R13 | R14 | R15 | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ---------------------------------------------------------------------------------- * + * | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | * + * ---------------------------------------------------------------------------------- * + * | RBX | RBP | RIP | EXIT | * + * ---------------------------------------------------------------------------------- * + * * + ****************************************************************************************/ + +.text +.globl jump_fcontext +.type jump_fcontext,@function +.align 16 +jump_fcontext: + pushq %rbp /* save RBP */ + pushq %rbx /* save RBX */ + pushq %r15 /* save R15 */ + pushq %r14 /* save R14 */ + pushq %r13 /* save R13 */ + pushq %r12 /* save R12 */ + + /* store RSP (pointing to context-data) in RAX */ + movq %rsp, %rax + + /* restore RSP (pointing to context-data) from RDI */ + movq %rdi, %rsp + + popq %r12 /* restrore R12 */ + popq %r13 /* restrore R13 */ + popq %r14 /* restrore R14 */ + popq %r15 /* restrore R15 */ + popq %rbx /* restrore RBX */ + popq %rbp /* restrore RBP */ + + /* restore return-address */ + popq %r8 + + /* return transfer_t from jump */ + /* RAX == fctx, RDX == data */ + movq %rsi, %rdx + /* pass transfer_t as first arg in context function */ + /* RDI == fctx, RSI == data */ + movq %rax, %rdi + + /* indirect jump to context */ + jmp *%r8 +.size jump_fcontext,.-jump_fcontext + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/app/context_switch/deboost.context/asm/jump_x86_64_sysv_macho_gas.S b/app/context_switch/deboost.context/asm/jump_x86_64_sysv_macho_gas.S new file mode 100644 index 0000000..16fd13c --- /dev/null +++ b/app/context_switch/deboost.context/asm/jump_x86_64_sysv_macho_gas.S @@ -0,0 +1,62 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/**************************************************************************************** + * * + * ---------------------------------------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ---------------------------------------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * + * ---------------------------------------------------------------------------------- * + * | R12 | R13 | R14 | R15 | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ---------------------------------------------------------------------------------- * + * | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | * + * ---------------------------------------------------------------------------------- * + * | RBX | RBP | RIP | EXIT | * + * ---------------------------------------------------------------------------------- * + * * + ****************************************************************************************/ + +.text +.globl _jump_fcontext +.align 8 +_jump_fcontext: + pushq %rbp /* save RBP */ + pushq %rbx /* save RBX */ + pushq %r15 /* save R15 */ + pushq %r14 /* save R14 */ + pushq %r13 /* save R13 */ + pushq %r12 /* save R12 */ + + /* store RSP (pointing to context-data) in RAX */ + movq %rsp, %rax + + /* restore RSP (pointing to context-data) from RDI */ + movq %rdi, %rsp + + popq %r12 /* restrore R12 */ + popq %r13 /* restrore R13 */ + popq %r14 /* restrore R14 */ + popq %r15 /* restrore R15 */ + popq %rbx /* restrore RBX */ + popq %rbp /* restrore RBP */ + + /* restore return-address */ + popq %r8 + + /* return transfer_t from jump */ + /* RAX == fctx, RDX == data */ + movq %rsi, %rdx + /* pass transfer_t as first arg in context function */ + /* RDI == fctx, RSI == data */ + movq %rax, %rdi + + /* indirect jump to context */ + jmp *%r8 diff --git a/app/context_switch/deboost.context/asm/make_arm64_aapcs_elf_gas.S b/app/context_switch/deboost.context/asm/make_arm64_aapcs_elf_gas.S new file mode 100644 index 0000000..ba8984b --- /dev/null +++ b/app/context_switch/deboost.context/asm/make_arm64_aapcs_elf_gas.S @@ -0,0 +1,71 @@ +/* + Copyright Edward Nevill + Oliver Kowalke 2015 + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | x19 | x20 | x21 | x22 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | x23 | x24 | x25 | x26 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 0x40| 0x44| 0x48| 0x4c| 0x50| 0x54| 0x58| 0x5c| * + * ------------------------------------------------- * + * | x27 | x28 | FP | LR | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 0x60| 0x64| 0x68| 0x6c| 0x70| 0x74| 0x78| 0x7c| * + * ------------------------------------------------- * + * | PC | align | | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.cpu generic+fp+simd +.text +.align 2 +.global make_fcontext +.type make_fcontext, %function +make_fcontext: + # shift address in x0 (allocated stack) to lower 16 byte boundary + and x0, x0, ~0xF + + # reserve space for context-data on context-stack + sub x0, x0, #0x70 + + # third arg of make_fcontext() == address of context-function + # store address as a PC to jump in + str x2, [x0, #0x60] + + # save address of finish as return-address for context-function + # will be entered after context-function returns (LR register) + adr x1, finish + str x1, [x0, #0x58] + + ret x30 // return pointer to context-data (x0) + +finish: + # exit code is zero + mov x0, #0 + # exit application + bl _exit + +.size make_fcontext,.-make_fcontext +# Mark that we don't need executable stack. +.section .note.GNU-stack,"",%progbits diff --git a/app/context_switch/deboost.context/asm/make_arm64_aapcs_macho_gas.S b/app/context_switch/deboost.context/asm/make_arm64_aapcs_macho_gas.S new file mode 100644 index 0000000..66f35e0 --- /dev/null +++ b/app/context_switch/deboost.context/asm/make_arm64_aapcs_macho_gas.S @@ -0,0 +1,68 @@ +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | x19 | x20 | x21 | x22 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | x23 | x24 | x25 | x26 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 0x40| 0x44| 0x48| 0x4c| 0x50| 0x54| 0x58| 0x5c| * + * ------------------------------------------------- * + * | x27 | x28 | FP | LR | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 0x60| 0x64| 0x68| 0x6c| 0x70| 0x74| 0x78| 0x7c| * + * ------------------------------------------------- * + * | PC | align | | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.globl _make_fcontext +.balign 16 + +_make_fcontext: + ; shift address in x0 (allocated stack) to lower 16 byte boundary + and x0, x0, ~0xF + + ; reserve space for context-data on context-stack + sub x0, x0, #0x70 + + ; third arg of make_fcontext() == address of context-function + ; store address as a PC to jump in + str x2, [x0, #0x60] + + ; compute abs address of label finish + ; 0x0c = 3 instructions * size (4) before label 'finish' + + ; TODO: Numeric offset since llvm still does not support labels in ADR. Fix: + ; http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20140407/212336.html + adr x1, 0x0c + + ; save address of finish as return-address for context-function + ; will be entered after context-function returns (LR register) + str x1, [x0, #0x58] + + ret lr ; return pointer to context-data (x0) + +finish: + ; exit code is zero + mov x0, #0 + ; exit application + bl __exit + + diff --git a/app/context_switch/deboost.context/asm/make_arm_aapcs_elf_gas.S b/app/context_switch/deboost.context/asm/make_arm_aapcs_elf_gas.S new file mode 100644 index 0000000..79b4612 --- /dev/null +++ b/app/context_switch/deboost.context/asm/make_arm_aapcs_elf_gas.S @@ -0,0 +1,62 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * |hiddn| v1 | v2 | v3 | v4 | v5 | v6 | v7 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | v8 | lr | pc | FCTX| DATA| | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.globl make_fcontext +.align 2 +.type make_fcontext,%function +make_fcontext: + @ shift address in A1 to lower 16 byte boundary + bic a1, a1, #15 + + @ reserve space for context-data on context-stack + sub a1, a1, #60 + + @ third arg of make_fcontext() == address of context-function + str a3, [a1, #40] + + @ compute address of returned transfer_t + add a2, a1, #44 + mov a3, a2 + str a3, [a1, #0] + + @ compute abs address of label finish + adr a2, finish + @ save address of finish as return-address for context-function + @ will be entered after context-function returns + str a2, [a1, #36] + + bx lr @ return pointer to context-data + +finish: + @ exit code is zero + mov a1, #0 + @ exit application + bl _exit@PLT +.size make_fcontext,.-make_fcontext + +@ Mark that we don't need executable stack. +.section .note.GNU-stack,"",%progbits diff --git a/app/context_switch/deboost.context/asm/make_arm_aapcs_macho_gas.S b/app/context_switch/deboost.context/asm/make_arm_aapcs_macho_gas.S new file mode 100644 index 0000000..d82d3a6 --- /dev/null +++ b/app/context_switch/deboost.context/asm/make_arm_aapcs_macho_gas.S @@ -0,0 +1,57 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | sjlj|hiddn| v1 | v2 | v3 | v4 | v5 | v6 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | v7 | v8 | lr | pc | FCTX| DATA| | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.globl _make_fcontext +.align 2 +_make_fcontext: + @ shift address in A1 to lower 16 byte boundary + bic a1, a1, #15 + + @ reserve space for context-data on context-stack + sub a1, a1, #64 + + @ third arg of make_fcontext() == address of context-function + str a3, [a1, #44] + + @ compute address of returned transfer_t + add a2, a1, #48 + mov a3, a2 + str a3, [a1, #4] + + @ compute abs address of label finish + adr a2, finish + @ save address of finish as return-address for context-function + @ will be entered after context-function returns + str a2, [a1, #40] + + bx lr @ return pointer to context-data + +finish: + @ exit code is zero + mov a1, #0 + @ exit application + bl __exit diff --git a/app/context_switch/deboost.context/asm/make_arm_aapcs_pe_armasm.asm b/app/context_switch/deboost.context/asm/make_arm_aapcs_pe_armasm.asm new file mode 100644 index 0000000..27cbfb0 --- /dev/null +++ b/app/context_switch/deboost.context/asm/make_arm_aapcs_pe_armasm.asm @@ -0,0 +1,77 @@ +;/* +; Copyright Oliver Kowalke 2009. +; Distributed under the Boost Software License, Version 1.0. +; (See accompanying file LICENSE_1_0.txt or copy at +; http://www.boost.org/LICENSE_1_0.txt) +;*/ + +; ******************************************************* +; * * +; * ------------------------------------------------- * +; * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * +; * ------------------------------------------------- * +; * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * +; * ------------------------------------------------- * +; * |deall|limit| base|hiddn| v1 | v2 | v3 | v4 | * +; * ------------------------------------------------- * +; * ------------------------------------------------- * +; * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * +; * ------------------------------------------------- * +; * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * +; * ------------------------------------------------- * +; * | v5 | v6 | v7 | v8 | lr | pc | FCTX| DATA| * +; * ------------------------------------------------- * +; * * +; ******************************************************* + + + AREA |.text|, CODE + ALIGN 4 + EXPORT make_fcontext + IMPORT _exit + +make_fcontext PROC + ; first arg of make_fcontext() == top of context-stack + ; save top of context-stack (base) A4 + mov a4, a1 + + ; shift address in A1 to lower 16 byte boundary + bic a1, a1, #0x0f + + ; reserve space for context-data on context-stack + sub a1, a1, #0x48 + + ; save top address of context_stack as 'base' + str a4, [a1, #0x8] + ; second arg of make_fcontext() == size of context-stack + ; compute bottom address of context-stack (limit) + sub a4, a4, a2 + ; save bottom address of context-stack as 'limit' + str a4, [a1, #0x4] + ; save bottom address of context-stack as 'dealloction stack' + str a4, [a1, #0x0] + + ; third arg of make_fcontext() == address of context-function + str a3, [a1, #0x34] + + ; compute address of returned transfer_t + add a2, a1, #0x38 + mov a3, a2 + str a3, [a1, #0xc] + + ; compute abs address of label finish + adr a2, finish + ; save address of finish as return-address for context-function + ; will be entered after context-function returns + str a2, [a1, #0x30] + + bx lr ; return pointer to context-data + +finish + ; exit code is zero + mov a1, #0 + ; exit application + bl _exit + + ENDP + END diff --git a/app/context_switch/deboost.context/asm/make_combined_all_macho_gas.S b/app/context_switch/deboost.context/asm/make_combined_all_macho_gas.S new file mode 100644 index 0000000..8619259 --- /dev/null +++ b/app/context_switch/deboost.context/asm/make_combined_all_macho_gas.S @@ -0,0 +1,28 @@ +/* + Copyright Sergue E. Leontiev 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +// Stub file for universal binary + +#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) + #if defined(__aarch64__) + #include "make_arm64_aapcs_macho_gas.S" + #else + #include "make_arm_aapcs_macho_gas.S" + #endif +#else + #if defined(__i386__) + #include "make_i386_sysv_macho_gas.S" + #elif defined(__x86_64__) + #include "make_x86_64_sysv_macho_gas.S" + #elif defined(__ppc__) + #include "make_ppc32_sysv_macho_gas.S" + #elif defined(__ppc64__) + #include "make_ppc64_sysv_macho_gas.S" + #else + #error "No arch's" + #endif +#endif diff --git a/app/context_switch/deboost.context/asm/make_combined_sysv_macho_gas.S b/app/context_switch/deboost.context/asm/make_combined_sysv_macho_gas.S new file mode 100644 index 0000000..727e904 --- /dev/null +++ b/app/context_switch/deboost.context/asm/make_combined_sysv_macho_gas.S @@ -0,0 +1,20 @@ +/* + Copyright Sergue E. Leontiev 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +// Stub file for universal binary + +#if defined(__i386__) + #include "make_i386_sysv_macho_gas.S" +#elif defined(__x86_64__) + #include "make_x86_64_sysv_macho_gas.S" +#elif defined(__ppc__) + #include "make_ppc32_sysv_macho_gas.S" +#elif defined(__ppc64__) + #include "make_ppc64_sysv_macho_gas.S" +#else + #error "No arch's" +#endif diff --git a/app/context_switch/deboost.context/asm/make_i386_ms_pe_gas.asm b/app/context_switch/deboost.context/asm/make_i386_ms_pe_gas.asm new file mode 100644 index 0000000..5b4809a --- /dev/null +++ b/app/context_switch/deboost.context/asm/make_i386_ms_pe_gas.asm @@ -0,0 +1,139 @@ +/* + Copyright Oliver Kowalke 2009. + Copyright Thomas Sailer 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/************************************************************************************* +* --------------------------------------------------------------------------------- * +* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * +* --------------------------------------------------------------------------------- * +* | 0h | 04h | 08h | 0ch | 010h | 014h | 018h | 01ch | * +* --------------------------------------------------------------------------------- * +* | fc_strg |fc_deallo| limit | base | fc_seh | EDI | ESI | EBX | * +* --------------------------------------------------------------------------------- * +* --------------------------------------------------------------------------------- * +* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * +* --------------------------------------------------------------------------------- * +* | 020h | 024h | 028h | 02ch | 030h | 034h | 038h | 03ch | * +* --------------------------------------------------------------------------------- * +* | EBP | EIP | to | data | | EH NXT |SEH HNDLR| | * +* --------------------------------------------------------------------------------- * +*************************************************************************************/ + +.file "make_i386_ms_pe_gas.asm" +.text +.p2align 4,,15 +.globl _make_fcontext +.def _make_fcontext; .scl 2; .type 32; .endef +_make_fcontext: + /* first arg of make_fcontext() == top of context-stack */ + movl 0x04(%esp), %eax + + /* reserve space for first argument of context-function */ + /* EAX might already point to a 16byte border */ + leal -0x08(%eax), %eax + + /* shift address in EAX to lower 16 byte boundary */ + andl $-16, %eax + + /* reserve space for context-data on context-stack */ + /* size for fc_mxcsr .. EIP + return-address for context-function */ + /* on context-function entry: (ESP -0x4) % 8 == 0 */ + /* additional space is required for SEH */ + leal -0x48(%eax), %eax + + /* first arg of make_fcontext() == top of context-stack */ + movl 0x04(%esp), %ecx + /* save top address of context stack as 'base' */ + movl %ecx, 0xc(%eax) + /* second arg of make_fcontext() == size of context-stack */ + movl 0x08(%esp), %edx + /* negate stack size for LEA instruction (== substraction) */ + negl %edx + /* compute bottom address of context stack (limit) */ + leal (%ecx,%edx), %ecx + /* save bottom address of context-stack as 'limit' */ + movl %ecx, 0x8(%eax) + /* save bottom address of context-stack as 'dealloction stack' */ + movl %ecx, 0x4(%eax) + + /* third arg of make_fcontext() == address of context-function */ + /* stored in EBX */ + movl 0xc(%esp), %ecx + movl %ecx, 0x1c(%eax) + + /* compute abs address of label trampoline */ + movl $trampoline, %ecx + /* save address of trampoline as return-address for context-function */ + /* will be entered after calling jump_fcontext() first time */ + movl %ecx, 0x24(%eax) + + /* compute abs address of label finish */ + movl $finish, %ecx + /* save address of finish as return-address for context-function */ + /* will be entered after context-function returns */ + movl %ecx, 0x20(%eax) + + /* traverse current seh chain to get the last exception handler installed by Windows */ + /* note that on Windows Server 2008 and 2008 R2, SEHOP is activated by default */ + /* the exception handler chain is tested for the presence of ntdll.dll!FinalExceptionHandler */ + /* at its end by RaiseException all seh andlers are disregarded if not present and the */ + /* program is aborted */ + /* load NT_TIB into ECX */ + movl %fs:(0x0), %ecx + +walk: + /* load 'next' member of current SEH into EDX */ + movl (%ecx), %edx + /* test if 'next' of current SEH is last (== 0xffffffff) */ + incl %edx + jz found + decl %edx + /* exchange content; ECX contains address of next SEH */ + xchgl %ecx, %edx + /* inspect next SEH */ + jmp walk + +found: + /* load 'handler' member of SEH == address of last SEH handler installed by Windows */ + movl 0x04(%ecx), %ecx + /* save address in ECX as SEH handler for context */ + movl %ecx, 0x38(%eax) + /* set ECX to -1 */ + movl $0xffffffff, %ecx + /* save ECX as next SEH item */ + movl %ecx, 0x34(%eax) + /* load address of next SEH item */ + leal 0x34(%eax), %ecx + /* save next SEH */ + movl %ecx, 0x10(%eax) + + /* return pointer to context-data */ + ret + +trampoline: + /* move transport_t for entering context-function */ + /* FCTX == EAX, DATA == EDX */ + movl %eax, (%esp) + movl %edx, 0x4(%esp) + /* label finish as return-address */ + pushl %ebp + /* jump to context-function */ + jmp *%ebx + +finish: + /* ESP points to same address as ESP on entry of context function + 0x4 */ + xorl %eax, %eax + /* exit code is zero */ + movl %eax, (%esp) + /* exit application */ + call __exit + hlt + +.def __exit; .scl 2; .type 32; .endef /* standard C library function */ + +.section .drectve +.ascii " -export:\"make_fcontext\"" diff --git a/app/context_switch/deboost.context/asm/make_i386_ms_pe_masm.asm b/app/context_switch/deboost.context/asm/make_i386_ms_pe_masm.asm new file mode 100644 index 0000000..02ef50b --- /dev/null +++ b/app/context_switch/deboost.context/asm/make_i386_ms_pe_masm.asm @@ -0,0 +1,132 @@ + +; Copyright Oliver Kowalke 2009. +; Distributed under the Boost Software License, Version 1.0. +; (See accompanying file LICENSE_1_0.txt or copy at +; http://www.boost.org/LICENSE_1_0.txt) + +; --------------------------------------------------------------------------------- +; | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | +; --------------------------------------------------------------------------------- +; | 0h | 04h | 08h | 0ch | 010h | 014h | 018h | 01ch | +; --------------------------------------------------------------------------------- +; | fc_strg |fc_deallo| limit | base | fc_seh | EDI | ESI | EBX | +; --------------------------------------------------------------------------------- +; --------------------------------------------------------------------------------- +; | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | +; --------------------------------------------------------------------------------- +; | 020h | 024h | 028h | 02ch | 030h | 034h | 038h | 03ch | +; --------------------------------------------------------------------------------- +; | EBP | EIP | to | data | | EH NXT |SEH HNDLR| | +; --------------------------------------------------------------------------------- + +.386 +.XMM +.model flat, c +; standard C library function +_exit PROTO, value:SDWORD +.code + +make_fcontext PROC BOOST_CONTEXT_EXPORT + ; first arg of make_fcontext() == top of context-stack + mov eax, [esp+04h] + + ; reserve space for first argument of context-function + ; EAX might already point to a 16byte border + lea eax, [eax-08h] + + ; shift address in EAX to lower 16 byte boundary + and eax, -16 + + ; reserve space for context-data on context-stack + ; on context-function entry: (ESP -0x4) % 8 == 0 + ; additional space is required for SEH + lea eax, [eax-048h] + + ; first arg of make_fcontext() == top of context-stack + mov ecx, [esp+04h] + ; save top address of context stack as 'base' + mov [eax+0ch], ecx + ; second arg of make_fcontext() == size of context-stack + mov edx, [esp+08h] + ; negate stack size for LEA instruction (== substraction) + neg edx + ; compute bottom address of context stack (limit) + lea ecx, [ecx+edx] + ; save bottom address of context-stack as 'limit' + mov [eax+08h], ecx + ; save bottom address of context-stack as 'dealloction stack' + mov [eax+04h], ecx + + ; third arg of make_fcontext() == address of context-function + ; stored in EBX + mov ecx, [esp+0ch] + mov [eax+01ch], ecx + + ; compute abs address of label trampoline + mov ecx, trampoline + ; save address of trampoline as return-address for context-function + ; will be entered after calling jump_fcontext() first time + mov [eax+024h], ecx + + ; compute abs address of label finish + mov ecx, finish + ; save address of finish as return-address for context-function + ; will be entered after context-function returns + mov [eax+020h], ecx + + ; traverse current seh chain to get the last exception handler installed by Windows + ; note that on Windows Server 2008 and 2008 R2, SEHOP is activated by default + ; the exception handler chain is tested for the presence of ntdll.dll!FinalExceptionHandler + ; at its end by RaiseException all seh-handlers are disregarded if not present and the + ; program is aborted + assume fs:nothing + ; load NT_TIB into ECX + mov ecx, fs:[0h] + assume fs:error + +walk: + ; load 'next' member of current SEH into EDX + mov edx, [ecx] + ; test if 'next' of current SEH is last (== 0xffffffff) + inc edx + jz found + dec edx + ; exchange content; ECX contains address of next SEH + xchg edx, ecx + ; inspect next SEH + jmp walk + +found: + ; load 'handler' member of SEH == address of last SEH handler installed by Windows + mov ecx, [ecx+04h] + ; save address in ECX as SEH handler for context + mov [eax+038h], ecx + ; set ECX to -1 + mov ecx, 0ffffffffh + ; save ECX as next SEH item + mov [eax+034h], ecx + ; load address of next SEH item + lea ecx, [eax+034h] + ; save next SEH + mov [eax+010h], ecx + + ret ; return pointer to context-data + +trampoline: + ; move transport_t for entering context-function + ; FCTX == EAX, DATA == EDX + mov [esp], eax + mov [esp+04h], edx + push ebp + ; jump to context-function + jmp ebx + +finish: + ; exit code is zero + xor eax, eax + mov [esp], eax + ; exit application + call _exit + hlt +make_fcontext ENDP +END diff --git a/app/context_switch/deboost.context/asm/make_i386_sysv_elf_gas.S b/app/context_switch/deboost.context/asm/make_i386_sysv_elf_gas.S new file mode 100644 index 0000000..a03b047 --- /dev/null +++ b/app/context_switch/deboost.context/asm/make_i386_sysv_elf_gas.S @@ -0,0 +1,94 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/***************************************************************************************** + * * + * ----------------------------------------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ----------------------------------------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * + * ----------------------------------------------------------------------------------- * + * | EDI | ESI | EBX | EBP | EIP | hidden | to | data | * + * ----------------------------------------------------------------------------------- * + * * + *****************************************************************************************/ + +.text +.globl make_fcontext +.align 2 +.type make_fcontext,@function +make_fcontext: + /* first arg of make_fcontext() == top of context-stack */ + movl 0x4(%esp), %eax + + /* reserve space for first argument of context-function + rax might already point to a 16byte border */ + leal -0x8(%eax), %eax + + /* shift address in EAX to lower 16 byte boundary */ + andl $-16, %eax + + /* reserve space for context-data on context-stack */ + leal -0x28(%eax), %eax + + /* third arg of make_fcontext() == address of context-function */ + /* stored in EBX */ + movl 0xc(%esp), %ecx + movl %ecx, 0x8(%eax) + + /* return transport_t */ + /* FCTX == EDI, DATA == ESI */ + leal (%eax), %ecx + movl %ecx, 0x14(%eax) + + /* compute abs address of label trampoline */ + call 1f + /* address of trampoline 1 */ +1: popl %ecx + /* compute abs address of label trampoline */ + addl $trampoline-1b, %ecx + /* save address of trampoline as return address */ + /* will be entered after calling jump_fcontext() first time */ + movl %ecx, 0x10(%eax) + + /* compute abs address of label finish */ + call 2f + /* address of label 2 */ +2: popl %ecx + /* compute abs address of label finish */ + addl $finish-2b, %ecx + /* save address of finish as return-address for context-function */ + /* will be entered after context-function returns */ + movl %ecx, 0xc(%eax) + + ret /* return pointer to context-data */ + +trampoline: + /* move transport_t for entering context-function */ + movl %edi, (%esp) + movl %esi, 0x4(%esp) + pushl %ebp + /* jump to context-function */ + jmp *%ebx + +finish: + call 3f + /* address of label 3 */ +3: popl %ebx + /* compute address of GOT and store it in EBX */ + addl $_GLOBAL_OFFSET_TABLE_+[.-3b], %ebx + + /* exit code is zero */ + xorl %eax, %eax + movl %eax, (%esp) + /* exit application */ + call _exit@PLT + hlt +.size make_fcontext,.-make_fcontext + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/app/context_switch/deboost.context/asm/make_i386_sysv_macho_gas.S b/app/context_switch/deboost.context/asm/make_i386_sysv_macho_gas.S new file mode 100644 index 0000000..ceb1a21 --- /dev/null +++ b/app/context_switch/deboost.context/asm/make_i386_sysv_macho_gas.S @@ -0,0 +1,83 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/***************************************************************************************** + * * + * ----------------------------------------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ----------------------------------------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * + * ----------------------------------------------------------------------------------- * + * | EDI | ESI | EBX | EBP | EIP | hidden | to | data | * + * ----------------------------------------------------------------------------------- * + * * + *****************************************************************************************/ + +.text +.globl _make_fcontext +.align 2 +_make_fcontext: + /* first arg of make_fcontext() == top of context-stack */ + movl 0x4(%esp), %eax + + /* reserve space for first argument of context-function + rax might already point to a 16byte border */ + leal -0x8(%eax), %eax + + /* shift address in EAX to lower 16 byte boundary */ + andl $-16, %eax + + /* reserve space for context-data on context-stack */ + leal -0x28(%eax), %eax + + /* thrid arg of make_fcontext() == address of context-function */ + /* stored in EBX */ + movl 0xc(%esp), %edx + movl %edx, 0x8(%eax) + + /* return transport_t */ + /* FCTX == EDI, DATA == ESI */ + leal (%eax), %ecx + movl %ecx, 0x14(%eax) + + /* compute abs address of label trampoline */ + call 1f + /* address of trampoline 1 */ +1: popl %ecx + /* compute abs address of label trampoline */ + addl $trampoline-1b, %ecx + /* save address of trampoline as return address */ + /* will be entered after calling jump_fcontext() first time */ + movl %ecx, 0x10(%eax) + + /* compute abs address of label finish */ + call 2f + /* address of label 2 */ +2: popl %ecx + /* compute abs address of label finish */ + addl $finish-2b, %ecx + /* save address of finish as return-address for context-function */ + /* will be entered after context-function returns */ + movl %ecx, 0xc(%eax) + + ret /* return pointer to context-data */ + +trampoline: + /* move transport_t for entering context-function */ + movl %edi, (%esp) + movl %esi, 0x4(%esp) + pushl %ebp + /* jump to context-function */ + jmp *%ebx + +finish: + /* exit code is zero */ + xorl %eax, %eax + movl %eax, (%esp) + /* exit application */ + call __exit + hlt diff --git a/app/context_switch/deboost.context/asm/make_i386_x86_64_sysv_macho_gas.S b/app/context_switch/deboost.context/asm/make_i386_x86_64_sysv_macho_gas.S new file mode 100644 index 0000000..e364b2d --- /dev/null +++ b/app/context_switch/deboost.context/asm/make_i386_x86_64_sysv_macho_gas.S @@ -0,0 +1,16 @@ +/* + Copyright Sergue E. Leontiev 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +// Stub file for universal binary + +#if defined(__i386__) + #include "make_i386_sysv_macho_gas.S" +#elif defined(__x86_64__) + #include "make_x86_64_sysv_macho_gas.S" +#else + #error "No arch's" +#endif diff --git a/app/context_switch/deboost.context/asm/make_mips32_o32_elf_gas.S b/app/context_switch/deboost.context/asm/make_mips32_o32_elf_gas.S new file mode 100644 index 0000000..ef00245 --- /dev/null +++ b/app/context_switch/deboost.context/asm/make_mips32_o32_elf_gas.S @@ -0,0 +1,86 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | S0 | S1 | S2 | S3 | S4 | S5 | S6 | S7 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | FP |hiddn| RA | PC | GP | FCTX| DATA| | * + * ------------------------------------------------- * + * * + * *****************************************************/ + +.text +.globl make_fcontext +.align 2 +.type make_fcontext,@function +.ent make_fcontext +make_fcontext: +#ifdef __PIC__ +.set noreorder +.cpload $t9 +.set reorder +#endif + # first arg of make_fcontext() == top address of context-stack + move $v0, $a0 + + # shift address in A0 to lower 16 byte boundary + move $v1, $v0 + li $v0, -16 # 0xfffffffffffffff0 + and $v0, $v1, $v0 + + # reserve space for context-data on context-stack + # including 48 byte of shadow space (sp % 16 == 0) + addiu $v0, $v0, -112 + + # third arg of make_fcontext() == address of context-function + sw $a2, 44($v0) + # save global pointer in context-data + sw $gp, 48($v0) + + # compute address of returned transfer_t + addiu $t0, $v0, 52 + sw $t0, 36($v0) + + # compute abs address of label finish + la $t9, finish + # save address of finish as return-address for context-function + # will be entered after context-function returns + sw $t9, 40($v0) + + jr $ra # return pointer to context-data + +finish: + lw $gp, 0($sp) + # allocate stack space (contains shadow space for subroutines) + addiu $sp, $sp, -32 + # save return address + sw $ra, 28($sp) + + # restore GP (global pointer) +# move $gp, $s1 + # exit code is zero + move $a0, $zero + # address of exit + lw $t9, %call16(_exit)($gp) + # exit application + jalr $t9 +.end make_fcontext +.size make_fcontext, .-make_fcontext + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/app/context_switch/deboost.context/asm/make_ppc32_ppc64_sysv_macho_gas.S b/app/context_switch/deboost.context/asm/make_ppc32_ppc64_sysv_macho_gas.S new file mode 100644 index 0000000..52e7220 --- /dev/null +++ b/app/context_switch/deboost.context/asm/make_ppc32_ppc64_sysv_macho_gas.S @@ -0,0 +1,16 @@ +/* + Copyright Sergue E. Leontiev 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +// Stub file for universal binary + +#if defined(__ppc__) + #include "make_ppc32_sysv_macho_gas.S" +#elif defined(__ppc64__) + #include "make_ppc64_sysv_macho_gas.S" +#else + #error "No arch's" +#endif diff --git a/app/context_switch/deboost.context/asm/make_ppc32_sysv_elf_gas.S b/app/context_switch/deboost.context/asm/make_ppc32_sysv_elf_gas.S new file mode 100644 index 0000000..b826e18 --- /dev/null +++ b/app/context_switch/deboost.context/asm/make_ppc32_sysv_elf_gas.S @@ -0,0 +1,104 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/****************************************************** + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | R13 | R14 | R15 | R16 | R17 | R18 | R19 | R20 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | R21 | R22 | R23 | R24 | R25 | R26 | R27 | R28 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | R29 | R30 | R31 |hiddn| CR | LR | PC |bchai| * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * |linkr| FCTX| DATA| | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.globl make_fcontext +.align 2 +.type make_fcontext,@function +make_fcontext: + # save return address into R6 + mflr %r6 + + # first arg of make_fcontext() == top address of context-function + # shift address in R3 to lower 16 byte boundary + clrrwi %r3, %r3, 4 + + # reserve space for context-data on context-stack + # including 64 byte of linkage + parameter area (R1 % 16 == 0) + subi %r3, %r3, 172 + + # third arg of make_fcontext() == address of context-function + stw %r5, 88(%r3) + + # set back-chain to zero + li %r0, 0 + stw %r0, 92(%r3) + + # compute address of returned transfer_t + addi %r0, %r3, 100 + mr %r4, %r0 + stw %r4, 76(%r3) + + # load LR + mflr %r0 + # jump to label 1 + bl 1f +1: + # load LR into R4 + mflr %r4 + # compute abs address of label finish + addi %r4, %r4, finish - 1b + # restore LR + mtlr %r0 + # save address of finish as return-address for context-function + # will be entered after context-function returns + stw %r4, 84(%r3) + + # restore return address from R6 + mtlr %r6 + + blr # return pointer to context-data + +finish: + # save return address into R0 + mflr %r0 + # save return address on stack, set up stack frame + stw %r0, 4(%r1) + # allocate stack space, R1 % 16 == 0 + stwu %r1, -16(%r1) + + # exit code is zero + li %r3, 0 + # exit application + bl _exit@plt +.size make_fcontext, .-make_fcontext + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/app/context_switch/deboost.context/asm/make_ppc32_sysv_macho_gas.S b/app/context_switch/deboost.context/asm/make_ppc32_sysv_macho_gas.S new file mode 100644 index 0000000..d1deef5 --- /dev/null +++ b/app/context_switch/deboost.context/asm/make_ppc32_sysv_macho_gas.S @@ -0,0 +1,99 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/****************************************************** + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | R13 | R14 | R15 | R16 | R17 | R18 | R19 | R20 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | R21 | R22 | R23 | R24 | R25 | R26 | R27 | R28 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | R29 | R30 | R31 |hiddn| CR | LR | PC |bchai| * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * |linkr| FCTX| DATA| | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.globl _make_fcontext +.align 2 +_make_fcontext: + ; save return address into R6 + mflr r6 + + ; first arg of make_fcontext() == top address of context-function + ; shift address in R3 to lower 16 byte boundary + clrrwi r3, r3, 4 + + ; reserve space for context-data on context-stack + ; including 64 byte of linkage + parameter area (R1 % 16 == 0) + subi r3, r3, 172 + + ; third arg of make_fcontext() == address of context-function + stw r5, 88(r3) + + ; set back-chain to zero + li r0, 0 + stw r0, 92(r3) + + ; compute address of returned transfer_t + addi r0, r3, 100 + mr r4, r0 + stw r4, 76(r3) + + ; load LR + mflr r0 + ; jump to label 1 + bl l1 +l1: + ; load LR into R4 + mflr r4 + ; compute abs address of label finish + addi r4, r4, lo16((finish - .)+4) + # restore LR + mtlr r0 + ; save address of finish as return-address for context-function + ; will be entered after context-function returns + stw r4, 84(r3) + + ; restore return address from R6 + mtlr r6 + + blr ; return pointer to context-data + +finish: + ; save return address into R0 + mflr r0 + ; save return address on stack, set up stack frame + stw r0, 4(r1) + ; allocate stack space, R1 % 16 == 0 + stwu r1, -16(r1) + + ; exit code is zero + li r3, 0 + ; exit application + bl __exit diff --git a/app/context_switch/deboost.context/asm/make_ppc32_sysv_xcoff_gas.S b/app/context_switch/deboost.context/asm/make_ppc32_sysv_xcoff_gas.S new file mode 100644 index 0000000..62f8683 --- /dev/null +++ b/app/context_switch/deboost.context/asm/make_ppc32_sysv_xcoff_gas.S @@ -0,0 +1,64 @@ + .globl make_fcontext[DS] + .globl .make_fcontext[PR] + .align 2 + .csect make_fcontext[DS] +make_fcontext: + .long .make_fcontext[PR] + .csect .make_fcontext[PR], 3 +#.make_fcontext: + # save return address into R6 + mflr 6 + + # first arg of make_fcontext() == top address of context-function + # shift address in R3 to lower 16 byte boundary + clrrwi 3, 3, 4 + + # reserve space for context-data on context-stack + # including 64 byte of linkage + parameter area (R1 % 16 == 0) + subi 3, 3, 172 + + # third arg of make_fcontext() == address of context-function + stw 5, 88(3) + + # set back-chain to zero + li 0, 0 + stw 0, 92(3) + + # compute address of returned transfer_t + addi 0, 3, 100 + mr 4, 0 + stw 4, 76(3) + + # load LR + mflr 0 + # jump to label 1 + bl .Label +.Label: + # load LR into R4 + mflr 4 + # compute abs address of label .L_finish + addi 4, 4, .L_finish - .Label + # restore LR + mtlr 0 + # save address of finish as return-address for context-function + # will be entered after context-function returns + stw 4, 84(3) + + # restore return address from R6 + mtlr 6 + + blr # return pointer to context-data + +.L_finish: + # save return address into R0 + mflr 0 + # save return address on stack, set up stack frame + stw 0, 4(1) + # allocate stack space, R1 % 16 == 0 + stwu 1, -16(1) + + # exit code is zero + li 3, 0 + # exit application + bl ._exit + nop diff --git a/app/context_switch/deboost.context/asm/make_ppc64_sysv_elf_gas.S b/app/context_switch/deboost.context/asm/make_ppc64_sysv_elf_gas.S new file mode 100644 index 0000000..71cb70d --- /dev/null +++ b/app/context_switch/deboost.context/asm/make_ppc64_sysv_elf_gas.S @@ -0,0 +1,176 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | TOC | R14 | R15 | R16 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | R17 | R18 | R19 | R20 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | R21 | R22 | R23 | R24 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | R25 | R26 | R27 | R28 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | * + * ------------------------------------------------- * + * | R29 | R30 | R31 | hidden | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | * + * ------------------------------------------------- * + * | 160 | 164 | 168 | 172 | 176 | 180 | 184 | 188 | * + * ------------------------------------------------- * + * | CR | LR | PC | back-chain| * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | * + * ------------------------------------------------- * + * | 192 | 196 | 200 | 204 | 208 | 212 | 216 | 220 | * + * ------------------------------------------------- * + * | cr saved | lr saved | compiler | linker | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | * + * ------------------------------------------------- * + * | 224 | 228 | 232 | 236 | 240 | 244 | 248 | 252 | * + * ------------------------------------------------- * + * | TOC saved | FCTX | DATA | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.globl make_fcontext +#if _CALL_ELF == 2 + .text + .align 2 +make_fcontext: + addis %r2, %r12, .TOC.-make_fcontext@ha + addi %r2, %r2, .TOC.-make_fcontext@l + .localentry make_fcontext, . - make_fcontext +#else + .section ".opd","aw" + .align 3 +make_fcontext: +# ifdef _CALL_LINUX + .quad .L.make_fcontext,.TOC.@tocbase,0 + .type make_fcontext,@function + .text + .align 2 +.L.make_fcontext: +# else + .hidden .make_fcontext + .globl .make_fcontext + .quad .make_fcontext,.TOC.@tocbase,0 + .size make_fcontext,24 + .type .make_fcontext,@function + .text + .align 2 +.make_fcontext: +# endif +#endif + # save return address into R6 + mflr %r6 + + # first arg of make_fcontext() == top address of context-stack + # shift address in R3 to lower 16 byte boundary + clrrdi %r3, %r3, 4 + + # reserve space for context-data on context-stack + # including 64 byte of linkage + parameter area (R1 % 16 == 0) + subi %r3, %r3, 248 + + # third arg of make_fcontext() == address of context-function + # entry point (ELFv2) or descriptor (ELFv1) +#if _CALL_ELF == 2 + # save address of context-function entry point + std %r5, 176(%r3) +#else + # save address of context-function entry point + ld %r4, 0(%r5) + std %r4, 176(%r3) + # save TOC of context-function + ld %r4, 8(%r5) + std %r4, 0(%r3) +#endif + + # set back-chain to zero + li %r0, 0 + std %r0, 184(%r3) + + # compute address of returned transfer_t + addi %r0, %r3, 232 + mr %r4, %r0 + std %r4, 152(%r3) + + # load LR + mflr %r0 + # jump to label 1 + bl 1f +1: + # load LR into R4 + mflr %r4 + # compute abs address of label finish + addi %r4, %r4, finish - 1b + # restore LR + mtlr %r0 + # save address of finish as return-address for context-function + # will be entered after context-function returns + std %r4, 168(%r3) + + # restore return address from R6 + mtlr %r6 + + blr # return pointer to context-data + +finish: + # save return address into R0 + mflr %r0 + # save return address on stack, set up stack frame + std %r0, 8(%r1) + # allocate stack space, R1 % 16 == 0 + stdu %r1, -32(%r1) + + # exit code is zero + li %r3, 0 + # exit application + bl _exit + nop +#if _CALL_ELF == 2 + .size make_fcontext, .-make_fcontext +#else +# ifdef _CALL_LINUX + .size .make_fcontext, .-.L.make_fcontext +# else + .size .make_fcontext, .-.make_fcontext +# endif +#endif + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/app/context_switch/deboost.context/asm/make_ppc64_sysv_macho_gas.S b/app/context_switch/deboost.context/asm/make_ppc64_sysv_macho_gas.S new file mode 100644 index 0000000..7b947bb --- /dev/null +++ b/app/context_switch/deboost.context/asm/make_ppc64_sysv_macho_gas.S @@ -0,0 +1,126 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | TOC | R14 | R15 | R16 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | R17 | R18 | R19 | R20 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | R21 | R22 | R23 | R24 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | R25 | R26 | R27 | R28 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | * + * ------------------------------------------------- * + * | R29 | R30 | R31 | hidden | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | * + * ------------------------------------------------- * + * | 160 | 164 | 168 | 172 | 176 | 180 | 184 | 188 | * + * ------------------------------------------------- * + * | CR | LR | PC | back-chain| * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | * + * ------------------------------------------------- * + * | 192 | 196 | 200 | 204 | 208 | 212 | 216 | 220 | * + * ------------------------------------------------- * + * | cr saved | lr saved | compiler | linker | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | * + * ------------------------------------------------- * + * | 224 | 228 | 232 | 236 | 240 | 244 | 248 | 252 | * + * ------------------------------------------------- * + * | TOC saved | FCTX | DATA | | * + * ------------------------------------------------- * + * * + +.text +.globl _make_fcontext +_make_fcontext: + ; save return address into R6 + mflr r6 + + ; first arg of make_fcontext() == top address of context-function + ; shift address in R3 to lower 16 byte boundary + clrrwi r3, r3, 4 + + ; reserve space for context-data on context-stack + ; including 64 byte of linkage + parameter area (R1 16 == 0) + subi r3, r3, 248 + + ; third arg of make_fcontext() == address of context-function + stw r5, 176(r3) + + ; set back-chain to zero + li %r0, 0 + std %r0, 184(%r3) + + ; compute address of returned transfer_t + addi %r0, %r3, 232 + mr %r4, %r0 + std %r4, 152(%r3) + + ; load LR + mflr r0 + ; jump to label 1 + bl l1 +l1: + ; load LR into R4 + mflr r4 + ; compute abs address of label finish + addi r4, r4, lo16((finish - .) + 4) + ; restore LR + mtlr r0 + ; save address of finish as return-address for context-function + ; will be entered after context-function returns + std r4, 168(r3) + + ; restore return address from R6 + mtlr r6 + + blr ; return pointer to context-data + +finish: + ; save return address into R0 + mflr r0 + ; save return address on stack, set up stack frame + stw r0, 8(r1) + ; allocate stack space, R1 16 == 0 + stwu r1, -32(r1) + + ; set return value to zero + li r3, 0 + ; exit application + bl __exit + nop diff --git a/app/context_switch/deboost.context/asm/make_ppc64_sysv_xcoff_gas.S b/app/context_switch/deboost.context/asm/make_ppc64_sysv_xcoff_gas.S new file mode 100644 index 0000000..b229ab1 --- /dev/null +++ b/app/context_switch/deboost.context/asm/make_ppc64_sysv_xcoff_gas.S @@ -0,0 +1,62 @@ + .globl make_fcontext[DS] + .globl .make_fcontext[PR] + .align 2 + .csect .make_fcontext[PR], 3 + .globl _make_fcontext +#._make_fcontext: + # save return address into R6 + mflr 6 + + # first arg of make_fcontext() == top address of context-function + # shift address in R3 to lower 16 byte boundary + clrrwi 3, 3, 4 + + # reserve space for context-data on context-stack + # including 64 byte of linkage + parameter area (R1 % 16 == 0) + subi 3, 3, 248 + + # third arg of make_fcontext() == address of context-function + stw 5, 176(3) + + # set back-chain to zero + li 0, 0 + std 0, 184(3) + + # compute address of returned transfer_t + addi 0, 3, 232 + mr 4, 0 + std 4, 152(3) + + # load LR + mflr 0 + # jump to label 1 + bl .Label +.Label: + # load LR into R4 + mflr 4 + # compute abs address of label .L_finish + addi 4, 4, .L_finish - .Label + # restore LR + mtlr 0 + # save address of finish as return-address for context-function + # will be entered after context-function returns + stw 4, 168(3) + + # restore return address from R6 + mtlr 6 + + blr # return pointer to context-data + +.L_finish: + # save return address into R0 + mflr 0 + # save return address on stack, set up stack frame + stw 0, 8(1) + # allocate stack space, R1 % 16 == 0 + stwu 1, -32(1) + + # exit code is zero + li 3, 0 + # exit application + bl ._exit + nop diff --git a/app/context_switch/deboost.context/asm/make_x86_64_ms_pe_gas.asm b/app/context_switch/deboost.context/asm/make_x86_64_ms_pe_gas.asm new file mode 100644 index 0000000..1387599 --- /dev/null +++ b/app/context_switch/deboost.context/asm/make_x86_64_ms_pe_gas.asm @@ -0,0 +1,120 @@ +/* + Copyright Oliver Kowalke 2009. + Copyright Thomas Sailer 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/************************************************************************************** + * * + * ---------------------------------------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ---------------------------------------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * + * ---------------------------------------------------------------------------------- * + * | fbr_strg | fc_dealloc | limit | base | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ---------------------------------------------------------------------------------- * + * | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | * + * ---------------------------------------------------------------------------------- * + * | R12 | R13 | R14 | R15 | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ---------------------------------------------------------------------------------- * + * | 0xe40 | 0x44 | 0x48 | 0x4c | 0x50 | 0x54 | 0x58 | 0x5c | * + * ---------------------------------------------------------------------------------- * + * | RDI | RSI | RBX | RBP | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ---------------------------------------------------------------------------------- * + * | 0x60 | 0x64 | 0x68 | 0x6c | 0x70 | 0x74 | 0x78 | 0x7c | * + * ---------------------------------------------------------------------------------- * + * | hidden | RIP | EXIT | parameter area | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 32 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | * + * ---------------------------------------------------------------------------------- * + * | 0x80 | 0x84 | 0x88 | 0x8c | 0x90 | 0x94 | 0x98 | 0x9c | * + * ---------------------------------------------------------------------------------- * + * | parameter area | FCTX | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | * + * ---------------------------------------------------------------------------------- * + * | 0xa0 | 0xa4 | 0xa8 | 0xac | 0xb0 | 0xb4 | 0xb8 | 0xbc | * + * ---------------------------------------------------------------------------------- * + * | DATA | | | | * + * ---------------------------------------------------------------------------------- * + * * + * ***********************************************************************************/ + +.file "make_x86_64_ms_pe_gas.asm" +.text +.p2align 4,,15 +.globl make_fcontext +.def make_fcontext; .scl 2; .type 32; .endef +.seh_proc make_fcontext +make_fcontext: +.seh_endprologue + + /* first arg of make_fcontext() == top of context-stack */ + movq %rcx, %rax + + /* shift address in RAX to lower 16 byte boundary */ + /* == pointer to fcontext_t and address of context stack */ + andq $-16, %rax + + /* reserve space for context-data on context-stack */ + /* on context-function entry: (RSP -0x8) % 16 == 0 */ + leaq -0xb8(%rax), %rax + + /* third arg of make_fcontext() == address of context-function */ + movq %r8, 0x68(%rax) + + /* first arg of make_fcontext() == top of context-stack */ + /* save top address of context stack as 'base' */ + movq %rcx, 0x18(%rax) + /* second arg of make_fcontext() == size of context-stack */ + /* negate stack size for LEA instruction (== substraction) */ + negq %rdx + /* compute bottom address of context stack (limit) */ + leaq (%rcx,%rdx), %rcx + /* save bottom address of context stack as 'limit' */ + movq %rcx, 0x10(%rax) + /* save address of context stack limit as 'dealloction stack' */ + movq %rcx, 0x8(%rax) + + /* compute address of transport_t */ + leaq 0x98(%rax), %rcx + /* store address of transport_t in hidden field */ + movq %rcx, 0x60(%rax) + + /* compute abs address of label finish */ + leaq finish(%rip), %rcx + /* save address of finish as return-address for context-function */ + /* will be entered after context-function returns */ + movq %rcx, 0x70(%rax) + + ret /* return pointer to context-data */ + +finish: + /* 32byte shadow-space for _exit() */ + andq $-32, %rsp + /* 32byte shadow-space for _exit() are */ + /* already reserved by make_fcontext() */ + /* exit code is zero */ + xorq %rcx, %rcx + /* exit application */ + call _exit + hlt +.seh_endproc + +.def _exit; .scl 2; .type 32; .endef /* standard C library function */ + +.section .drectve +.ascii " -export:\"make_fcontext\"" diff --git a/app/context_switch/deboost.context/asm/make_x86_64_ms_pe_masm.asm b/app/context_switch/deboost.context/asm/make_x86_64_ms_pe_masm.asm new file mode 100644 index 0000000..8cfaf5b --- /dev/null +++ b/app/context_switch/deboost.context/asm/make_x86_64_ms_pe_masm.asm @@ -0,0 +1,106 @@ + +; Copyright Oliver Kowalke 2009. +; Distributed under the Boost Software License, Version 1.0. +; (See accompanying file LICENSE_1_0.txt or copy at +; http://www.boost.org/LICENSE_1_0.txt) + +; ---------------------------------------------------------------------------------- +; | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | +; ---------------------------------------------------------------------------------- +; | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | +; ---------------------------------------------------------------------------------- +; | fbr_strg | fc_dealloc | limit | base | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | +; ---------------------------------------------------------------------------------- +; | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | +; ---------------------------------------------------------------------------------- +; | R12 | R13 | R14 | R15 | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | +; ---------------------------------------------------------------------------------- +; | 0xe40 | 0x44 | 0x48 | 0x4c | 0x50 | 0x54 | 0x58 | 0x5c | +; ---------------------------------------------------------------------------------- +; | RDI | RSI | RBX | RBP | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | +; ---------------------------------------------------------------------------------- +; | 0x60 | 0x64 | 0x68 | 0x6c | 0x70 | 0x74 | 0x78 | 0x7c | +; ---------------------------------------------------------------------------------- +; | hidden | RIP | EXIT | parameter area | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 32 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | +; ---------------------------------------------------------------------------------- +; | 0x80 | 0x84 | 0x88 | 0x8c | 0x90 | 0x94 | 0x98 | 0x9c | +; ---------------------------------------------------------------------------------- +; | parameter area | FCTX | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | +; ---------------------------------------------------------------------------------- +; | 0xa0 | 0xa4 | 0xa8 | 0xac | 0xb0 | 0xb4 | 0xb8 | 0xbc | +; ---------------------------------------------------------------------------------- +; | DATA | | | | +; ---------------------------------------------------------------------------------- + +; standard C library function +EXTERN _exit:PROC +.code + +; generate function table entry in .pdata and unwind information in +make_fcontext PROC BOOST_CONTEXT_EXPORT FRAME + ; .xdata for a function's structured exception handling unwind behavior + .endprolog + + ; first arg of make_fcontext() == top of context-stack + mov rax, rcx + + ; shift address in RAX to lower 16 byte boundary + ; == pointer to fcontext_t and address of context stack + and rax, -16 + + ; reserve space for context-data on context-stack + ; on context-function entry: (RSP -0x8) % 16 == 0 + sub rax, 0b8h + + ; third arg of make_fcontext() == address of context-function + mov [rax+068h], r8 + + ; first arg of make_fcontext() == top of context-stack + ; save top address of context stack as 'base' + mov [rax+018h], rcx + ; second arg of make_fcontext() == size of context-stack + ; negate stack size for LEA instruction (== substraction) + neg rdx + ; compute bottom address of context stack (limit) + lea rcx, [rcx+rdx] + ; save bottom address of context stack as 'limit' + mov [rax+010h], rcx + ; save address of context stack limit as 'dealloction stack' + mov [rax+08h], rcx + + ; compute address of transport_t + lea rcx, [rax+098h] + ; store address of transport_t in hidden field + mov [rax+060h], rcx + + ; compute abs address of label finish + lea rcx, finish + ; save address of finish as return-address for context-function + ; will be entered after context-function returns + mov [rax+070h], rcx + + ret ; return pointer to context-data + +finish: + ; exit code is zero + xor rcx, rcx + ; exit application + call _exit + hlt +make_fcontext ENDP +END diff --git a/app/context_switch/deboost.context/asm/make_x86_64_sysv_elf_gas.S b/app/context_switch/deboost.context/asm/make_x86_64_sysv_elf_gas.S new file mode 100644 index 0000000..ca0da32 --- /dev/null +++ b/app/context_switch/deboost.context/asm/make_x86_64_sysv_elf_gas.S @@ -0,0 +1,62 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/**************************************************************************************** + * * + * ---------------------------------------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ---------------------------------------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * + * ---------------------------------------------------------------------------------- * + * | R12 | R13 | R14 | R15 | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ---------------------------------------------------------------------------------- * + * | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | * + * ---------------------------------------------------------------------------------- * + * | RBX | RBP | RIP | EXIT | * + * ---------------------------------------------------------------------------------- * + * * + ****************************************************************************************/ + +.text +.globl make_fcontext +.type make_fcontext,@function +.align 16 +make_fcontext: + /* first arg of make_fcontext() == top of context-stack */ + movq %rdi, %rax + + /* shift address in RAX to lower 16 byte boundary */ + andq $-16, %rax + + /* reserve space for context-data on context-stack */ + /* on context-function entry: (RSP -0x8) % 16 == 0 */ + leaq -0x40(%rax), %rax + + /* third arg of make_fcontext() == address of context-function */ + movq %rdx, 0x30(%rax) + + /* compute abs address of label finish */ + leaq finish(%rip), %rcx + /* save address of finish as return-address for context-function */ + /* will be entered after context-function returns */ + movq %rcx, 0x38(%rax) + + ret /* return pointer to context-data */ + +finish: + /* exit code is zero */ + xorq %rdi, %rdi + /* exit application */ + call _exit@PLT + hlt +.size make_fcontext,.-make_fcontext + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/app/context_switch/deboost.context/asm/make_x86_64_sysv_macho_gas.S b/app/context_switch/deboost.context/asm/make_x86_64_sysv_macho_gas.S new file mode 100644 index 0000000..8ccafc3 --- /dev/null +++ b/app/context_switch/deboost.context/asm/make_x86_64_sysv_macho_gas.S @@ -0,0 +1,58 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/**************************************************************************************** + * * + * ---------------------------------------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ---------------------------------------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * + * ---------------------------------------------------------------------------------- * + * | R12 | R13 | R14 | R15 | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ---------------------------------------------------------------------------------- * + * | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | * + * ---------------------------------------------------------------------------------- * + * | RBX | RBP | RIP | EXIT | * + * ---------------------------------------------------------------------------------- * + * * + ****************************************************************************************/ + +.text +.globl _make_fcontext +.align 8 +_make_fcontext: + /* first arg of make_fcontext() == top of context-stack */ + movq %rdi, %rax + + /* shift address in RAX to lower 16 byte boundary */ + movabs $-16, %r8 + andq %r8, %rax + + /* reserve space for context-data on context-stack */ + /* on context-function entry: (RSP -0x8) % 16 == 0 */ + leaq -0x40(%rax), %rax + + /* third arg of make_fcontext() == address of context-function */ + movq %rdx, 0x30(%rax) + + /* compute abs address of label finish */ + leaq finish(%rip), %rcx + /* save address of finish as return-address for context-function */ + /* will be entered after context-function returns */ + movq %rcx, 0x38(%rax) + + ret /* return pointer to context-data */ + +finish: + /* exit code is zero */ + xorq %rdi, %rdi + /* exit application */ + call __exit + hlt diff --git a/app/context_switch/deboost.context/asm/ontop_arm64_aapcs_elf_gas.S b/app/context_switch/deboost.context/asm/ontop_arm64_aapcs_elf_gas.S new file mode 100644 index 0000000..f33599a --- /dev/null +++ b/app/context_switch/deboost.context/asm/ontop_arm64_aapcs_elf_gas.S @@ -0,0 +1,87 @@ +/* + Copyright Edward Nevill + Oliver Kowalke 2015 + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | x19 | x20 | x21 | x22 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | x23 | x24 | x25 | x26 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 0x40| 0x44| 0x48| 0x4c| 0x50| 0x54| 0x58| 0x5c| * + * ------------------------------------------------- * + * | x27 | x28 | FP | LR | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 0x60| 0x64| 0x68| 0x6c| 0x70| 0x74| 0x78| 0x7c| * + * ------------------------------------------------- * + * | PC | align | | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.cpu generic+fp+simd +.text +.align 2 +.global ontop_fcontext +.type ontop_fcontext, %function +ontop_fcontext: + # prepare stack for GP + FPU + sub sp, sp, #0x70 + + # save x19-x30 + stp x19, x20, [sp, #0x00] + stp x21, x22, [sp, #0x10] + stp x23, x24, [sp, #0x20] + stp x25, x26, [sp, #0x30] + stp x27, x28, [sp, #0x40] + stp x29, x30, [sp, #0x50] + + # save LR as PC + str x30, [sp, #0x60] + + # store RSP (pointing to context-data) in X5 + mov x4, sp + + # restore RSP (pointing to context-data) from X1 + mov sp, x0 + + # load x19-x30 + ldp x19, x20, [sp, #0x00] + ldp x21, x22, [sp, #0x10] + ldp x23, x24, [sp, #0x20] + ldp x25, x26, [sp, #0x30] + ldp x27, x28, [sp, #0x40] + ldp x29, x30, [sp, #0x50] + + # return transfer_t from jump + # pass transfer_t as first arg in context function + # X0 == FCTX, X1 == DATA + mov x0, x4 + + # skip pc + # restore stack from GP + FPU + add sp, sp, #0x70 + + # jump to ontop-function + ret x2 +.size ontop_fcontext,.-ontop_fcontext +# Mark that we don't need executable stack. +.section .note.GNU-stack,"",%progbits diff --git a/app/context_switch/deboost.context/asm/ontop_arm64_aapcs_macho_gas.S b/app/context_switch/deboost.context/asm/ontop_arm64_aapcs_macho_gas.S new file mode 100644 index 0000000..d046427 --- /dev/null +++ b/app/context_switch/deboost.context/asm/ontop_arm64_aapcs_macho_gas.S @@ -0,0 +1,82 @@ +/* + Copyright Edward Nevill + Oliver Kowalke 2015 + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | x19 | x20 | x21 | x22 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | x23 | x24 | x25 | x26 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 0x40| 0x44| 0x48| 0x4c| 0x50| 0x54| 0x58| 0x5c| * + * ------------------------------------------------- * + * | x27 | x28 | FP | LR | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 0x60| 0x64| 0x68| 0x6c| 0x70| 0x74| 0x78| 0x7c| * + * ------------------------------------------------- * + * | PC | align | | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.global _ontop_fcontext +.balign 16 +_ontop_fcontext: + # prepare stack for GP + FPU + sub sp, sp, #0x70 + + # save x19-x30 + stp x19, x20, [sp, #0x00] + stp x21, x22, [sp, #0x10] + stp x23, x24, [sp, #0x20] + stp x25, x26, [sp, #0x30] + stp x27, x28, [sp, #0x40] + stp x29, x30, [sp, #0x50] + + # save LR as PC + str x30, [sp, #0x60] + + # store RSP (pointing to context-data) in X5 + mov x4, sp + + # restore RSP (pointing to context-data) from X1 + mov sp, x0 + + # load x19-x30 + ldp x19, x20, [sp, #0x00] + ldp x21, x22, [sp, #0x10] + ldp x23, x24, [sp, #0x20] + ldp x25, x26, [sp, #0x30] + ldp x27, x28, [sp, #0x40] + ldp x29, x30, [sp, #0x50] + + # return transfer_t from jump + # pass transfer_t as first arg in context function + # X0 == FCTX, X1 == DATA + mov x0, x4 + + # skip pc + # restore stack from GP + FPU + add sp, sp, #0x70 + + # jump to ontop-function + ret x2 diff --git a/app/context_switch/deboost.context/asm/ontop_arm_aapcs_elf_gas.S b/app/context_switch/deboost.context/asm/ontop_arm_aapcs_elf_gas.S new file mode 100644 index 0000000..8c84570 --- /dev/null +++ b/app/context_switch/deboost.context/asm/ontop_arm_aapcs_elf_gas.S @@ -0,0 +1,63 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * |hiddn| v1 | v2 | v3 | v4 | v5 | v6 | v7 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | v8 | lr | pc | FCTX| DATA| | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.globl ontop_fcontext +.align 2 +.type ontop_fcontext,%function +ontop_fcontext: + @ save LR as PC + push {lr} + @ save hidden,V1-V8,LR + push {a1,v1-v8,lr} + + @ store RSP (pointing to context-data) in A1 + mov a1, sp + + @ restore RSP (pointing to context-data) from A2 + mov sp, a2 + + @ store parent context in A2 + mov a2, a1 + + @ restore hidden,V1-V8,LR + pop {a1,v1-v8,lr} + + @ return transfer_t from jump + str a2, [a1, #0] + str a3, [a1, #4] + @ pass transfer_t as first arg in context function + @ A1 == hidden, A2 == FCTX, A3 == DATA + + @ skip PC + add sp, sp, #4 + + @ jump to ontop-function + bx a4 +.size ontop_fcontext,.-ontop_fcontext + +@ Mark that we don't need executable stack. +.section .note.GNU-stack,"",%progbits diff --git a/app/context_switch/deboost.context/asm/ontop_arm_aapcs_macho_gas.S b/app/context_switch/deboost.context/asm/ontop_arm_aapcs_macho_gas.S new file mode 100644 index 0000000..9d246c3 --- /dev/null +++ b/app/context_switch/deboost.context/asm/ontop_arm_aapcs_macho_gas.S @@ -0,0 +1,72 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | sjlj|hiddn| v1 | v2 | v3 | v4 | v5 | v6 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | v7 | v8 | lr | pc | FCTX| DATA| | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.globl _ontop_fcontext +.align 2 +_ontop_fcontext: + @ save LR as PC + push {lr} + @ save hidden,V1-V8,LR + push {a1,v1-v8,lr} + + @ locate TLS to save/restore SjLj handler + mrc p15, 0, v2, c13, c0, #3 + bic v2, v2, #3 + + @ load TLS[__PTK_LIBC_DYLD_Unwind_SjLj_Key] + ldr v1, [v2, #8] + @ save SjLj handler + push {v1} + + @ store RSP (pointing to context-data) in A1 + mov a1, sp + + @ restore RSP (pointing to context-data) from A2 + mov sp, a2 + + @ restore SjLj handler + pop {v1} + @ store SjLj handler in TLS + str v1, [v2, #8] + + @ store parent context in A2 + mov a2, a1 + + @ restore hidden,V1-V8,LR + pop {a1,v1-v8,lr} + + @ return transfer_t from jump + str a2, [a1, #0] + str a3, [a1, #4] + @ pass transfer_t as first arg in context function + @ A1 == hidden, A2 == FCTX, A3 == DATA + + @ skip PC + add sp, sp, #4 + + @ jump to ontop-function + bx a4 diff --git a/app/context_switch/deboost.context/asm/ontop_arm_aapcs_pe_armasm.asm b/app/context_switch/deboost.context/asm/ontop_arm_aapcs_pe_armasm.asm new file mode 100644 index 0000000..f360a8f --- /dev/null +++ b/app/context_switch/deboost.context/asm/ontop_arm_aapcs_pe_armasm.asm @@ -0,0 +1,86 @@ +;/* +; Copyright Oliver Kowalke 2009. +; Distributed under the Boost Software License, Version 1.0. +; (See accompanying file LICENSE_1_0.txt or copy at +; http://www.boost.org/LICENSE_1_0.txt) +;*/ + +; ******************************************************* +; * * +; * ------------------------------------------------- * +; * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * +; * ------------------------------------------------- * +; * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * +; * ------------------------------------------------- * +; * |deall|limit| base|hiddn| v1 | v2 | v3 | v4 | * +; * ------------------------------------------------- * +; * ------------------------------------------------- * +; * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * +; * ------------------------------------------------- * +; * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * +; * ------------------------------------------------- * +; * | v5 | v6 | v7 | v8 | lr | pc | FCTX| DATA| * +; * ------------------------------------------------- * +; * * +; ******************************************************* + + AREA |.text|, CODE + ALIGN 4 + EXPORT ontop_fcontext + +ontop_fcontext PROC + ; save LR as PC + push {lr} + ; save hidden,V1-V8,LR + push {a1,v1-v8,lr} + + ; load TIB to save/restore thread size and limit. + ; we do not need preserve CPU flag and can use it's arg register + mrc p15, #0, v1, c13, c0, #2 + + ; save current stack base + ldr a1, [v1, #0x04] + push {a1} + ; save current stack limit + ldr a1, [v1, #0x08] + push {a1} + ; save current deallocation stack + ldr a1, [v1, #0xe0c] + push {a1} + + ; store RSP (pointing to context-data) in A1 + mov a1, sp + + ; restore RSP (pointing to context-data) from A2 + mov sp, a2 + + ; restore stack base + pop {a1} + str a1, [v1, #0x04] + ; restore stack limit + pop {a1} + str a1, [v1, #0x08] + ; restore deallocation stack + pop {a1} + str a1, [v1, #0xe0c] + + ; store parent context in A2 + mov a2, a1 + + ; restore hidden,V1-V8,LR + pop {a1,v1-v8,lr} + + ; return transfer_t from jump + str a2, [a1, #0] + str a3, [a1, #4] + ; pass transfer_t as first arg in context function + ; A1 == hidden, A2 == FCTX, A3 == DATA + + ; skip PC + add sp, sp, #4 + + ; jump to ontop-function + bx a4 + + ENDP + END diff --git a/app/context_switch/deboost.context/asm/ontop_combined_all_macho_gas.S b/app/context_switch/deboost.context/asm/ontop_combined_all_macho_gas.S new file mode 100644 index 0000000..e0d6e32 --- /dev/null +++ b/app/context_switch/deboost.context/asm/ontop_combined_all_macho_gas.S @@ -0,0 +1,28 @@ +/* + Copyright Sergue E. Leontiev 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +// Stub file for universal binary + +#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) + #if defined(__aarch64__) + #include "make_arm64_aapcs_macho_gas.S" + #else + #include "make_arm_aapcs_macho_gas.S" + #endif +#else + #if defined(__i386__) + #include "ontop_i386_sysv_macho_gas.S" + #elif defined(__x86_64__) + #include "ontop_x86_64_sysv_macho_gas.S" + #elif defined(__ppc__) + #include "ontop_ppc32_sysv_macho_gas.S" + #elif defined(__ppc64__) + #include "ontop_ppc64_sysv_macho_gas.S" + #else + #error "No arch's" + #endif +#endif diff --git a/app/context_switch/deboost.context/asm/ontop_combined_sysv_macho_gas.S b/app/context_switch/deboost.context/asm/ontop_combined_sysv_macho_gas.S new file mode 100644 index 0000000..20cbeb9 --- /dev/null +++ b/app/context_switch/deboost.context/asm/ontop_combined_sysv_macho_gas.S @@ -0,0 +1,20 @@ +/* + Copyright Sergue E. Leontiev 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +// Stub file for universal binary + +#if defined(__i386__) + #include "ontop_i386_sysv_macho_gas.S" +#elif defined(__x86_64__) + #include "ontop_x86_64_sysv_macho_gas.S" +#elif defined(__ppc__) + #include "ontop_ppc32_sysv_macho_gas.S" +#elif defined(__ppc64__) + #include "ontop_ppc64_sysv_macho_gas.S" +#else + #error "No arch's" +#endif diff --git a/app/context_switch/deboost.context/asm/ontop_i386_ms_pe_gas.asm b/app/context_switch/deboost.context/asm/ontop_i386_ms_pe_gas.asm new file mode 100644 index 0000000..a40b86d --- /dev/null +++ b/app/context_switch/deboost.context/asm/ontop_i386_ms_pe_gas.asm @@ -0,0 +1,113 @@ +/* + Copyright Oliver Kowalke 2009. + Copyright Thomas Sailer 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/************************************************************************************* +* --------------------------------------------------------------------------------- * +* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * +* --------------------------------------------------------------------------------- * +* | 0h | 04h | 08h | 0ch | 010h | 014h | 018h | 01ch | * +* --------------------------------------------------------------------------------- * +* | fc_strg |fc_deallo| limit | base | fc_seh | EDI | ESI | EBX | * +* --------------------------------------------------------------------------------- * +* --------------------------------------------------------------------------------- * +* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * +* --------------------------------------------------------------------------------- * +* | 020h | 024h | 028h | 02ch | 030h | 034h | 038h | 03ch | * +* --------------------------------------------------------------------------------- * +* | EBP | EIP | to | data | | EH NXT |SEH HNDLR| | * +* --------------------------------------------------------------------------------- * +*************************************************************************************/ + +.file "ontop_i386_ms_pe_gas.asm" +.text +.p2align 4,,15 +.globl _ontop_fcontext +.def _ontop_fcontext; .scl 2; .type 32; .endef +_ontop_fcontext: + pushl %ebp /* save EBP */ + pushl %ebx /* save EBX */ + pushl %esi /* save ESI */ + pushl %edi /* save EDI */ + + /* load NT_TIB */ + movl %fs:(0x18), %edx + + /* load current SEH exception list */ + movl (%edx), %eax + push %eax + + /* load current stack base */ + movl 0x04(%edx), %eax + push %eax + + /* load current stack limit */ + movl 0x08(%edx), %eax + push %eax + + /* load current dealloction stack */ + movl 0xe0c(%edx), %eax + push %eax + + /* load fiber local storage */ + movl 0x10(%edx), %eax + push %eax + + /* store ESP (pointing to context-data) in EAX */ + movl %esp, %ecx + + /* first arg of ontop_fcontext() == fcontext to jump to */ + movl 0x28(%esp), %eax + + /* pass parent fcontext_t */ + movl %ecx, 0x28(%eax) + + /* second arg of ontop_fcontext() == data to be transferred */ + movl 0x2c(%esp), %ecx + + /* pass data */ + movl %ecx, 0x2c(%eax) + + /* third arg of ontop_fcontext() == ontop-function */ + movl 0x30(%esp), %ecx + + /* restore ESP (pointing to context-data) from EDX */ + movl %eax, %esp + + /* load NT_TIB into ECX */ + movl %fs:(0x18), %edx + + /* restore fiber local storage */ + popl %eax + movl %eax, 0x10(%edx) + + /* restore current deallocation stack */ + popl %eax + movl %eax, 0xe0c(%edx) + + /* restore current stack limit */ + popl %eax + movl %eax, 0x08(%edx) + + /* restore current stack base */ + popl %eax + movl %eax, 0x04(%edx) + + /* restore current SEH exception list */ + popl %eax + movl %eax, (%edx) + + popl %edi /* save EDI */ + popl %esi /* save ESI */ + popl %ebx /* save EBX */ + popl %ebp /* save EBP */ + + /* jump to context */ + jmp *%ecx + +.section .drectve +.ascii " -export:\"ontop_fcontext\"" diff --git a/app/context_switch/deboost.context/asm/ontop_i386_ms_pe_masm.asm b/app/context_switch/deboost.context/asm/ontop_i386_ms_pe_masm.asm new file mode 100644 index 0000000..f60b98e --- /dev/null +++ b/app/context_switch/deboost.context/asm/ontop_i386_ms_pe_masm.asm @@ -0,0 +1,112 @@ + +; Copyright Oliver Kowalke 2009. +; Distributed under the Boost Software License, Version 1.0. +; (See accompanying file LICENSE_1_0.txt or copy at +; http://www.boost.org/LICENSE_1_0.txt) + +; --------------------------------------------------------------------------------- +; | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | +; --------------------------------------------------------------------------------- +; | 0h | 04h | 08h | 0ch | 010h | 014h | 018h | 01ch | +; --------------------------------------------------------------------------------- +; | fc_strg |fc_deallo| limit | base | fc_seh | EDI | ESI | EBX | +; --------------------------------------------------------------------------------- +; --------------------------------------------------------------------------------- +; | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | +; --------------------------------------------------------------------------------- +; | 020h | 024h | 028h | 02ch | 030h | 034h | 038h | 03ch | +; --------------------------------------------------------------------------------- +; | EBP | EIP | to | data | | EH NXT |SEH HNDLR| | +; --------------------------------------------------------------------------------- + +.386 +.XMM +.model flat, c +.code + +ontop_fcontext PROC BOOST_CONTEXT_EXPORT + push ebp ; save EBP + push ebx ; save EBX + push esi ; save ESI + push edi ; save EDI + + assume fs:nothing + ; load NT_TIB into EDX + mov edx, fs:[018h] + assume fs:error + + ; load current SEH exception list + mov eax, [edx] + push eax + + ; load current stack base + mov eax, [edx+04h] + push eax + + ; load current stack limit + mov eax, [edx+08h] + push eax + + ; load current deallocation stack + mov eax, [edx+0e0ch] + push eax + + ; load fiber local storage + mov eax, [edx+010h] + push eax + + ; store ESP (pointing to context-data) in ECX + mov ecx, esp + + ; first arg of ontop_fcontext() == fcontext to jump to + mov eax, [esp+028h] + + ; pass parent fcontext_t + mov [eax+028h], ecx + + ; second arg of ontop_fcontext() == data to be transferred + mov ecx, [esp+02ch] + + ; pass data + mov [eax+02ch], ecx + + ; third arg of ontop_fcontext() == ontop-function + mov ecx, [esp+030h] + + ; restore ESP (pointing to context-data) from EAX + mov esp, eax + + assume fs:nothing + ; load NT_TIB into EDX + mov edx, fs:[018h] + assume fs:error + + ; restore fiber local storage + pop eax + mov [edx+010h], eax + + ; restore current deallocation stack + pop eax + mov [edx+0e0ch], eax + + ; restore current stack limit + pop eax + mov [edx+08h], eax + + ; restore current stack base + pop eax + mov [edx+04h], eax + + ; restore current SEH exception list + pop eax + mov [edx], eax + + pop edi ; save EDI + pop esi ; save ESI + pop ebx ; save EBX + pop ebp ; save EBP + + ; jump to context + jmp ecx +ontop_fcontext ENDP +END diff --git a/app/context_switch/deboost.context/asm/ontop_i386_sysv_elf_gas.S b/app/context_switch/deboost.context/asm/ontop_i386_sysv_elf_gas.S new file mode 100644 index 0000000..64f2b5e --- /dev/null +++ b/app/context_switch/deboost.context/asm/ontop_i386_sysv_elf_gas.S @@ -0,0 +1,61 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/***************************************************************************************** + * * + * ----------------------------------------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ----------------------------------------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * + * ----------------------------------------------------------------------------------- * + * | EDI | ESI | EBX | EBP | EIP | hidden | to | data | * + * ----------------------------------------------------------------------------------- * + * * + *****************************************************************************************/ + +.text +.globl ontop_fcontext +.align 2 +.type ontop_fcontext,@function +ontop_fcontext: + pushl %ebp /* save EBP */ + pushl %ebx /* save EBX */ + pushl %esi /* save ESI */ + pushl %edi /* save EDI */ + + /* store fcontext_t in ECX */ + movl %esp, %ecx + + /* first arg of ontop_fcontext() == fcontext to jump to */ + movl 0x18(%esp), %eax + + /* pass parent fcontext_t */ + movl %ecx, 0x18(%eax) + + /* second arg of ontop_fcontext() == data to be transferred */ + movl 0x1c(%esp), %ecx + + /* pass data */ + movl %ecx, 0x1c(%eax) + + /* third arg of ontop_fcontext() == ontop-function */ + movl 0x20(%esp), %ecx + + /* restore ESP (pointing to context-data) from EDX */ + movl %eax, %esp + + popl %edi /* restore EDI */ + popl %esi /* restore ESI */ + popl %ebx /* restore EBX */ + popl %ebp /* restore EBP */ + + /* jump to context */ + jmp *%ecx +.size ontop_fcontext,.-ontop_fcontext + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/app/context_switch/deboost.context/asm/ontop_i386_sysv_macho_gas.S b/app/context_switch/deboost.context/asm/ontop_i386_sysv_macho_gas.S new file mode 100644 index 0000000..643849d --- /dev/null +++ b/app/context_switch/deboost.context/asm/ontop_i386_sysv_macho_gas.S @@ -0,0 +1,56 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/***************************************************************************************** + * * + * ----------------------------------------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ----------------------------------------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * + * ----------------------------------------------------------------------------------- * + * | EDI | ESI | EBX | EBP | EIP | hidden | to | data | * + * ----------------------------------------------------------------------------------- * + * * + *****************************************************************************************/ + +.text +.globl _ontop_fcontext +.align 2 +_ontop_fcontext: + pushl %ebp /* save EBP */ + pushl %ebx /* save EBX */ + pushl %esi /* save ESI */ + pushl %edi /* save EDI */ + + /* store fcontext_t in ECX */ + movl %esp, %ecx + + /* first arg of ontop_fcontext() == fcontext to jump to */ + movl 0x18(%esp), %eax + + /* pass parent fcontext_t */ + movl %ecx, 0x18(%eax) + + /* second arg of ontop_fcontext() == data to be transferred */ + movl 0x1c(%esp), %ecx + + /* pass data */ + movl %ecx, 0x1c(%eax) + + /* third arg of ontop_fcontext() == ontop-function */ + movl 0x20(%esp), %ecx + + /* restore ESP (pointing to context-data) from EDX */ + movl %eax, %esp + + popl %edi /* restore EDI */ + popl %esi /* restore ESI */ + popl %ebx /* restore EBX */ + popl %ebp /* restore EBP */ + + /* jump to context */ + jmp *%ecx diff --git a/app/context_switch/deboost.context/asm/ontop_i386_x86_64_sysv_macho_gas.S b/app/context_switch/deboost.context/asm/ontop_i386_x86_64_sysv_macho_gas.S new file mode 100644 index 0000000..393c5fe --- /dev/null +++ b/app/context_switch/deboost.context/asm/ontop_i386_x86_64_sysv_macho_gas.S @@ -0,0 +1,16 @@ +/* + Copyright Sergue E. Leontiev 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +// Stub file for universal binary + +#if defined(__i386__) + #include "ontop_i386_sysv_macho_gas.S" +#elif defined(__x86_64__) + #include "ontop_x86_64_sysv_macho_gas.S" +#else + #error "No arch's" +#endif diff --git a/app/context_switch/deboost.context/asm/ontop_mips32_o32_elf_gas.S b/app/context_switch/deboost.context/asm/ontop_mips32_o32_elf_gas.S new file mode 100644 index 0000000..8ae09a2 --- /dev/null +++ b/app/context_switch/deboost.context/asm/ontop_mips32_o32_elf_gas.S @@ -0,0 +1,87 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | S0 | S1 | S2 | S3 | S4 | S5 | S6 | S7 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | FP |hiddn| RA | PC | GP | FCTX| DATA| | * + * ------------------------------------------------- * + * * + * *****************************************************/ + +.text +.globl ontop_fcontext +.align 2 +.type ontop_fcontext,@function +.ent ontop_fcontext +ontop_fcontext: + # reserve space on stack + addiu $sp, $sp, -112 + + sw $s0, ($sp) # save S0 + sw $s1, 4($sp) # save S1 + sw $s2, 8($sp) # save S2 + sw $s3, 12($sp) # save S3 + sw $s4, 16($sp) # save S4 + sw $s5, 20($sp) # save S5 + sw $s6, 24($sp) # save S6 + sw $s7, 28($sp) # save S7 + sw $fp, 32($sp) # save FP + sw $a0, 36($sp) # save hidden, address of returned transfer_t + sw $ra, 40($sp) # save RA + sw $ra, 44($sp) # save RA as PC + + # store SP (pointing to context-data) in A0 + move $a0, $sp + + # restore SP (pointing to context-data) from A1 + move $sp, $a1 + + lw $s0, ($sp) # restore S0 + lw $s1, 4($sp) # restore S1 + lw $s2, 8($sp) # restore S2 + lw $s3, 12($sp) # restore S3 + lw $s4, 16($sp) # restore S4 + lw $s5, 20($sp) # restore S5 + lw $s6, 24($sp) # restore S6 + lw $s7, 28($sp) # restore S7 + lw $fp, 32($sp) # restore FP + lw $t0, 36($sp) # restore hidden, address of returned transfer_t + lw $ra, 40($sp) # restore RA + + # load PC + lw $t9, 44($sp) + + # adjust stack + addiu $sp, $sp, 112 + + # return transfer_t from jump + sw $a0, ($t0) # fctx of transfer_t + sw $a2, 4($t0) # data of transfer_t + # pass transfer_t as first arg in context function + # A0 == hidden, A1 == fctx, A2 == data + move $a1, $a0 + move $a0, $t0 + + # jump to context + jr $a3 +.end ontop_fcontext +.size ontop_fcontext, .-ontop_fcontext + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/app/context_switch/deboost.context/asm/ontop_ppc32_ppc64_sysv_macho_gas.S b/app/context_switch/deboost.context/asm/ontop_ppc32_ppc64_sysv_macho_gas.S new file mode 100644 index 0000000..4632f4c --- /dev/null +++ b/app/context_switch/deboost.context/asm/ontop_ppc32_ppc64_sysv_macho_gas.S @@ -0,0 +1,16 @@ +/* + Copyright Sergue E. Leontiev 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +// Stub file for universal binary + +#if defined(__ppc__) + #include "ontop_ppc32_sysv_macho_gas.S" +#elif defined(__ppc64__) + #include "ontop_ppc64_sysv_macho_gas.S" +#else + #error "No arch's" +#endif diff --git a/app/context_switch/deboost.context/asm/ontop_ppc32_sysv_elf_gas.S b/app/context_switch/deboost.context/asm/ontop_ppc32_sysv_elf_gas.S new file mode 100644 index 0000000..a2a9ae2 --- /dev/null +++ b/app/context_switch/deboost.context/asm/ontop_ppc32_sysv_elf_gas.S @@ -0,0 +1,129 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | R13 | R14 | R15 | R16 | R17 | R18 | R19 | R20 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | R21 | R22 | R23 | R24 | R25 | R26 | R27 | R28 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | R29 | R30 | R31 |hiddn| CR | LR | PC | FCTX| * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | DATA| | | | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.globl ontop_fcontext +.align 2 +.type ontop_fcontext,@function +ontop_fcontext: + # reserve space on stack + subi %r1, %r1, 92 + + stw %r13, 0(%r1) # save R13 + stw %r14, 4(%r1) # save R14 + stw %r15, 8(%r1) # save R15 + stw %r16, 12(%r1) # save R16 + stw %r17, 16(%r1) # save R17 + stw %r18, 20(%r1) # save R18 + stw %r19, 24(%r1) # save R19 + stw %r20, 28(%r1) # save R20 + stw %r21, 32(%r1) # save R21 + stw %r22, 36(%r1) # save R22 + stw %r23, 40(%r1) # save R23 + stw %r24, 44(%r1) # save R24 + stw %r25, 48(%r1) # save R25 + stw %r26, 52(%r1) # save R26 + stw %r27, 56(%r1) # save R27 + stw %r28, 60(%r1) # save R28 + stw %r29, 64(%r1) # save R29 + stw %r30, 68(%r1) # save R30 + stw %r31, 72(%r1) # save R31 + stw %r3, 76(%r1) # save hidden + + # save CR + mfcr %r0 + stw %r0, 80(%r1) + # save LR + mflr %r0 + stw %r0, 84(%r1) + # save LR as PC + stw %r0, 88(%r1) + + # store RSP (pointing to context-data) in R7 + mr %r7, %r1 + + # restore RSP (pointing to context-data) from R4 + mr %r1, %r4 + + lwz %r13, 0(%r1) # restore R13 + lwz %r14, 4(%r1) # restore R14 + lwz %r15, 8(%r1) # restore R15 + lwz %r16, 12(%r1) # restore R16 + lwz %r17, 16(%r1) # restore R17 + lwz %r18, 20(%r1) # restore R18 + lwz %r19, 24(%r1) # restore R19 + lwz %r20, 28(%r1) # restore R20 + lwz %r21, 32(%r1) # restore R21 + lwz %r22, 36(%r1) # restore R22 + lwz %r23, 40(%r1) # restore R23 + lwz %r24, 44(%r1) # restore R24 + lwz %r25, 48(%r1) # restore R25 + lwz %r26, 52(%r1) # restore R26 + lwz %r27, 56(%r1) # restore R27 + lwz %r28, 60(%r1) # restore R28 + lwz %r29, 64(%r1) # restore R29 + lwz %r30, 68(%r1) # restore R30 + lwz %r31, 72(%r1) # restore R31 + lwz %r4, 76(%r1) # restore hidden + + # restore CR + lwz %r0, 80(%r1) + mtcr %r0 + # restore LR + lwz %r0, 84(%r1) + mtlr %r0 + # ignore PC + + # adjust stack + addi %r1, %r1, 92 + + # return transfer_t + stw %r7, 0(%r4) + stw %r5, 4(%r4) + + # restore CTR + mtctr %r6 + + # jump to ontop-function + bctr +.size ontop_fcontext, .-ontop_fcontext + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/app/context_switch/deboost.context/asm/ontop_ppc32_sysv_macho_gas.S b/app/context_switch/deboost.context/asm/ontop_ppc32_sysv_macho_gas.S new file mode 100644 index 0000000..bc38216 --- /dev/null +++ b/app/context_switch/deboost.context/asm/ontop_ppc32_sysv_macho_gas.S @@ -0,0 +1,124 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | R13 | R14 | R15 | R16 | R17 | R18 | R19 | R20 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | R21 | R22 | R23 | R24 | R25 | R26 | R27 | R28 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | R29 | R30 | R31 |hiddn| CR | LR | PC | FCTX| * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | DATA| | | | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.globl _ontop_fcontext +.align 2 +_ontop_fcontext: + # reserve space on stack + subi r1, r1, 92 + + stw r13, 0(r1) # save R13 + stw r14, 4(r1) # save R14 + stw r15, 8(r1) # save R15 + stw r16, 12(r1) # save R16 + stw r17, 16(r1) # save R17 + stw r18, 20(r1) # save R18 + stw r19, 24(r1) # save R19 + stw r20, 28(r1) # save R20 + stw r21, 32(r1) # save R21 + stw r22, 36(r1) # save R22 + stw r23, 40(r1) # save R23 + stw r24, 44(r1) # save R24 + stw r25, 48(r1) # save R25 + stw r26, 52(r1) # save R26 + stw r27, 56(r1) # save R27 + stw r28, 60(r1) # save R28 + stw r29, 64(r1) # save R29 + stw r30, 68(r1) # save R30 + stw r31, 72(r1) # save R31 + stw r3, 76(r1) # save hidden + + # save CR + mfcr r0 + stw r0, 80(r1) + # save LR + mflr r0 + stw r0, 84(r1) + # save LR as PC + stw r0, 88(r1) + + # store RSP (pointing to context-data) in R7 + mr r7, r1 + + # restore RSP (pointing to context-data) from R4 + mr r1, r4 + + lwz r13, 0(r1) # restore R13 + lwz r14, 4(r1) # restore R14 + lwz r15, 8(r1) # restore R15 + lwz r16, 12(r1) # restore R16 + lwz r17, 16(r1) # restore R17 + lwz r18, 20(r1) # restore R18 + lwz r19, 24(r1) # restore R19 + lwz r20, 28(r1) # restore R20 + lwz r21, 32(r1) # restore R21 + lwz r22, 36(r1) # restore R22 + lwz r23, 40(r1) # restore R23 + lwz r24, 44(r1) # restore R24 + lwz r25, 48(r1) # restore R25 + lwz r26, 52(r1) # restore R26 + lwz r27, 56(r1) # restore R27 + lwz r28, 60(r1) # restore R28 + lwz r29, 64(r1) # restore R29 + lwz r30, 68(r1) # restore R30 + lwz r31, 72(r1) # restore R31 + lwz r4, 76(r1) # restore hidden + + # restore CR + lwz r0, 80(r1) + mtcr r0 + # restore LR + lwz r0, 84(r1) + mtlr r0 + # ignore PC + + # adjust stack + addi r1, r1, 92 + + # return transfer_t + stw r7, 0(r4) + stw r5, 4(r4) + + # restore CTR + mtctr r6 + + # jump to ontop-function + bctr diff --git a/app/context_switch/deboost.context/asm/ontop_ppc32_sysv_xcoff_gas.S b/app/context_switch/deboost.context/asm/ontop_ppc32_sysv_xcoff_gas.S new file mode 100644 index 0000000..cd1428d --- /dev/null +++ b/app/context_switch/deboost.context/asm/ontop_ppc32_sysv_xcoff_gas.S @@ -0,0 +1,87 @@ +.globl .ontop_fcontext +.globl ontop_fcontext[DS] +.align 2 +.csect ontop_fcontext[DS] +ontop_fcontext: + .long .ontop_fcontext +.ontop_fcontext: + # reserve space on stack + subi 1, 1, 92 + + stw 13, 0(1) # save R13 + stw 14, 4(1) # save R14 + stw 15, 8(1) # save R15 + stw 16, 12(1) # save R16 + stw 17, 16(1) # save R17 + stw 18, 20(1) # save R18 + stw 19, 24(1) # save R19 + stw 20, 28(1) # save R20 + stw 21, 32(1) # save R21 + stw 22, 36(1) # save R22 + stw 23, 40(1) # save R23 + stw 24, 44(1) # save R24 + stw 25, 48(1) # save R25 + stw 26, 52(1) # save R26 + stw 27, 56(1) # save R27 + stw 28, 60(1) # save R28 + stw 29, 64(1) # save R29 + stw 30, 68(1) # save R30 + stw 31, 72(1) # save R31 + stw 3, 76(1) # save hidden + + # save CR + mfcr 0 + stw 0, 80(1) + # save LR + mflr 0 + stw 0, 84(1) + # save LR as PC + stw 0, 88(1) + + # store RSP (pointing to context-data) in R6 + mr 7, 1 + + # restore RSP (pointing to context-data) from R4 + mr 1, 4 + + lwz 13, 0(1) # restore R13 + lwz 14, 4(1) # restore R14 + lwz 15, 8(1) # restore R15 + lwz 16, 12(1) # restore R16 + lwz 17, 16(1) # restore R17 + lwz 18, 20(1) # restore R18 + lwz 19, 24(1) # restore R19 + lwz 20, 28(1) # restore R20 + lwz 21, 32(1) # restore R21 + lwz 22, 36(1) # restore R22 + lwz 23, 40(1) # restore R23 + lwz 24, 44(1) # restore R24 + lwz 25, 48(1) # restore R25 + lwz 26, 52(1) # restore R26 + lwz 27, 56(1) # restore R27 + lwz 28, 60(1) # restore R28 + lwz 29, 64(1) # restore R29 + lwz 30, 68(1) # restore R30 + lwz 31, 72(1) # restore R31 + lwz 4, 76(1) # restore hidden + + # restore CR + lwz 0, 80(1) + mtcr 0 + # restore LR + lwz 0, 84(1) + mtlr 0 + # ignore PC + + # adjust stack + addi 1, 1, 92 + + # return transfer_t + stw 7, 0(3) + stw 5, 4(3) + + # restore CTR + mtctr 6 + + # jump to context + bctr diff --git a/app/context_switch/deboost.context/asm/ontop_ppc64_sysv_elf_gas.S b/app/context_switch/deboost.context/asm/ontop_ppc64_sysv_elf_gas.S new file mode 100644 index 0000000..437f957 --- /dev/null +++ b/app/context_switch/deboost.context/asm/ontop_ppc64_sysv_elf_gas.S @@ -0,0 +1,194 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | TOC | R14 | R15 | R16 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | R17 | R18 | R19 | R20 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | R21 | R22 | R23 | R24 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | R25 | R26 | R27 | R28 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | * + * ------------------------------------------------- * + * | R29 | R30 | R31 | hidden | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | * + * ------------------------------------------------- * + * | 160 | 164 | 168 | 172 | 176 | 180 | 184 | 188 | * + * ------------------------------------------------- * + * | CR | LR | PC | back-chain| * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | * + * ------------------------------------------------- * + * | 192 | 196 | 200 | 204 | 208 | 212 | 216 | 220 | * + * ------------------------------------------------- * + * | cr saved | lr saved | compiler | linker | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | * + * ------------------------------------------------- * + * | 224 | 228 | 232 | 236 | 240 | 244 | 248 | 252 | * + * ------------------------------------------------- * + * | TOC saved | FCTX | DATA | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.globl ontop_fcontext +#if _CALL_ELF == 2 + .text + .align 2 +ontop_fcontext: + addis %r2, %r12, .TOC.-ontop_fcontext@ha + addi %r2, %r2, .TOC.-ontop_fcontext@l + .localentry ontop_fcontext, . - ontop_fcontext +#else + .section ".opd","aw" + .align 3 +ontop_fcontext: +# ifdef _CALL_LINUX + .quad .L.ontop_fcontext,.TOC.@tocbase,0 + .type ontop_fcontext,@function + .text + .align 2 +.L.ontop_fcontext: +# else + .hidden .ontop_fcontext + .globl .ontop_fcontext + .quad .ontop_fcontext,.TOC.@tocbase,0 + .size ontop_fcontext,24 + .type .ontop_fcontext,@function + .text + .align 2 +.ontop_fcontext: +# endif +#endif + # reserve space on stack + subi %r1, %r1, 184 + +#if _CALL_ELF != 2 + std %r2, 0(%r1) # save TOC +#endif + std %r14, 8(%r1) # save R14 + std %r15, 16(%r1) # save R15 + std %r16, 24(%r1) # save R16 + std %r17, 32(%r1) # save R17 + std %r18, 40(%r1) # save R18 + std %r19, 48(%r1) # save R19 + std %r20, 56(%r1) # save R20 + std %r21, 64(%r1) # save R21 + std %r22, 72(%r1) # save R22 + std %r23, 80(%r1) # save R23 + std %r24, 88(%r1) # save R24 + std %r25, 96(%r1) # save R25 + std %r26, 104(%r1) # save R26 + std %r27, 112(%r1) # save R27 + std %r29, 120(%r1) # save R28 + std %r29, 128(%r1) # save R29 + std %r30, 136(%r1) # save R30 + std %r31, 144(%r1) # save R31 + std %r3, 152(%r1) # save hidden + + # save CR + mfcr %r0 + std %r0, 160(%r1) + # save LR + mflr %r0 + std %r0, 168(%r1) + # save LR as PC + std %r0, 176(%r1) + + # store RSP (pointing to context-data) in R7 + mr %r7, %r1 + + # restore RSP (pointing to context-data) from R4 + mr %r1, %r4 + +#if _CALL_ELF != 2 + ld %r2, 0(%r1) # restore TOC +#endif + ld %r14, 8(%r1) # restore R14 + ld %r15, 16(%r1) # restore R15 + ld %r16, 24(%r1) # restore R16 + ld %r17, 32(%r1) # restore R17 + ld %r18, 40(%r1) # restore R18 + ld %r19, 48(%r1) # restore R19 + ld %r20, 56(%r1) # restore R20 + ld %r21, 64(%r1) # restore R21 + ld %r22, 72(%r1) # restore R22 + ld %r23, 80(%r1) # restore R23 + ld %r24, 88(%r1) # restore R24 + ld %r25, 96(%r1) # restore R25 + ld %r26, 104(%r1) # restore R26 + ld %r27, 112(%r1) # restore R27 + ld %r28, 120(%r1) # restore R28 + ld %r29, 128(%r1) # restore R29 + ld %r30, 136(%r1) # restore R30 + ld %r31, 144(%r1) # restore R31 + ld %r4, 152(%r1) # restore hidden + + # restore CR + ld %r0, 160(%r1) + mtcr %r0 + # restore LR + ld %r0, 168(%r1) + mtlr %r0 + # ignore PC + + # adjust stack + addi %r1, %r1, 184 + + # return transfer_t + std %r7, 0(%r4) + std %r5, 8(%r4) + + # restore CTR + mtctr %r6 + + # jump to context + bctr +#if _CALL_ELF == 2 + .size ontop_fcontext, .-ontop_fcontext +#else +# ifdef _CALL_LINUX + .size .ontop_fcontext, .-.L.ontop_fcontext +# else + .size .ontop_fcontext, .-.ontop_fcontext +# endif +#endif + + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/app/context_switch/deboost.context/asm/ontop_ppc64_sysv_macho_gas.S b/app/context_switch/deboost.context/asm/ontop_ppc64_sysv_macho_gas.S new file mode 100644 index 0000000..df9363e --- /dev/null +++ b/app/context_switch/deboost.context/asm/ontop_ppc64_sysv_macho_gas.S @@ -0,0 +1,151 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | TOC | R14 | R15 | R16 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | R17 | R18 | R19 | R20 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | R21 | R22 | R23 | R24 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | R25 | R26 | R27 | R28 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | * + * ------------------------------------------------- * + * | R29 | R30 | R31 | hidden | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | * + * ------------------------------------------------- * + * | 160 | 164 | 168 | 172 | 176 | 180 | 184 | 188 | * + * ------------------------------------------------- * + * | CR | LR | PC | back-chain| * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | * + * ------------------------------------------------- * + * | 192 | 196 | 200 | 204 | 208 | 212 | 216 | 220 | * + * ------------------------------------------------- * + * | cr saved | lr saved | compiler | linker | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | * + * ------------------------------------------------- * + * | 224 | 228 | 232 | 236 | 240 | 244 | 248 | 252 | * + * ------------------------------------------------- * + * | TOC saved | FCTX | DATA | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.align 2 +.globl ontop_fcontext + +_ontop_fcontext: + ; reserve space on stack + subi r1, r1, 184 + + std %r14, 8(%r1) ; save R14 + std %r15, 16(%r1) ; save R15 + std %r16, 24(%r1) ; save R16 + std %r17, 32(%r1) ; save R17 + std %r18, 40(%r1) ; save R18 + std %r19, 48(%r1) ; save R19 + std %r20, 56(%r1) ; save R20 + std %r21, 64(%r1) ; save R21 + std %r22, 72(%r1) ; save R22 + std %r23, 80(%r1) ; save R23 + std %r24, 88(%r1) ; save R24 + std %r25, 96(%r1) ; save R25 + std %r26, 104(%r1) ; save R26 + std %r27, 112(%r1) ; save R27 + std %r29, 120(%r1) ; save R28 + std %r29, 128(%r1) ; save R29 + std %r30, 136(%r1) ; save R30 + std %r31, 144(%r1) ; save R31 + std %r3, 152(%r1) ; save hidden + + ; save CR + mfcr r0 + std r0, 160(r1) + ; save LR + mflr r0 + std r0, 168(r1) + ; save LR as PC + std r0, 176(r1) + + ; store RSP (pointing to context-data) in R7 + mr %r7, %r1 + + ; restore RSP (pointing to context-data) from R4 + mr r1, r4 + + ld %r14, 8(%r1) ; restore R14 + ld %r15, 16(%r1) ; restore R15 + ld %r16, 24(%r1) ; restore R16 + ld %r17, 32(%r1) ; restore R17 + ld %r18, 40(%r1) ; restore R18 + ld %r19, 48(%r1) ; restore R19 + ld %r20, 56(%r1) ; restore R20 + ld %r21, 64(%r1) ; restore R21 + ld %r22, 72(%r1) ; restore R22 + ld %r23, 80(%r1) ; restore R23 + ld %r24, 88(%r1) ; restore R24 + ld %r25, 96(%r1) ; restore R25 + ld %r26, 104(%r1) ; restore R26 + ld %r27, 112(%r1) ; restore R27 + ld %r28, 120(%r1) ; restore R28 + ld %r29, 128(%r1) ; restore R29 + ld %r30, 136(%r1) ; restore R30 + ld %r31, 144(%r1) ; restore R31 + ld %r4, 152(%r1) ; restore hidden + + ; restore CR + ld r0, 160(r1) + mtcr r0 + ; restore LR + ld r0, 168(r1) + mtlr r0 + ; ignore PC + + ; adjust stack + addi r1, r1, 184 + + ; return transfer_t + std %r7, 0(%r4) + std %r5, 8(%r4) + + ; restore CTR + mtctr r6 + + ; jump to context + bctr diff --git a/app/context_switch/deboost.context/asm/ontop_ppc64_sysv_xcoff_gas.S b/app/context_switch/deboost.context/asm/ontop_ppc64_sysv_xcoff_gas.S new file mode 100644 index 0000000..4213bca --- /dev/null +++ b/app/context_switch/deboost.context/asm/ontop_ppc64_sysv_xcoff_gas.S @@ -0,0 +1,83 @@ +.align 2 +.globl .jump_fcontext +.jump_fcontext: + # reserve space on stack + subi 1, 1, 184 + + std 13, 0(1) # save R13 + std 14, 8(1) # save R14 + std 15, 16(1) # save R15 + std 16, 24(1) # save R16 + std 17, 32(1) # save R17 + std 18, 40(1) # save R18 + std 19, 48(1) # save R19 + std 20, 56(1) # save R20 + std 21, 64(1) # save R21 + std 22, 72(1) # save R22 + std 23, 80(1) # save R23 + std 24, 88(1) # save R24 + std 25, 96(1) # save R25 + std 26, 104(1) # save R26 + std 27, 112(1) # save R27 + std 29, 120(1) # save R28 + std 29, 128(1) # save R29 + std 30, 136(1) # save R30 + std 31, 144(1) # save R31 + std 3, 152(1) # save hidden + + # save CR + mfcr 0 + std 0, 160(1) + # save LR + mflr 0 + std 0, 168(1) + # save LR as PC + std 0, 176(1) + + # store RSP (pointing to context-data) in R7 + mr 7, 1 + + # restore RSP (pointing to context-data) from R4 + mr 1, 4 + + ld 13, 0(1) # restore R13 + ld 14, 8(1) # restore R14 + ld 15, 16(1) # restore R15 + ld 16, 24(1) # restore R16 + ld 17, 32(1) # restore R17 + ld 18, 40(1) # restore R18 + ld 19, 48(1) # restore R19 + ld 20, 56(1) # restore R20 + ld 21, 64(1) # restore R21 + ld 22, 72(1) # restore R22 + ld 23, 80(1) # restore R23 + ld 24, 88(1) # restore R24 + ld 25, 96(1) # restore R25 + ld 26, 104(1) # restore R26 + ld 27, 112(1) # restore R27 + ld 28, 120(1) # restore R28 + ld 29, 128(1) # restore R29 + ld 30, 136(1) # restore R30 + ld 31, 144(1) # restore R31 + ld 4, 152(1) # restore hidden + + # restore CR + ld 0, 160(1) + mtcr 0 + # restore LR + ld 0, 168(1) + mtlr 0 + # ignore PC + + # adjust stack + addi 1, 1, 184 + + # return transfer_t + std 7, 0(4) + std 5, 8(4) + + # restore CTR + mtctr 6 + + # jump to context + bctr diff --git a/app/context_switch/deboost.context/asm/ontop_x86_64_ms_pe_gas.asm b/app/context_switch/deboost.context/asm/ontop_x86_64_ms_pe_gas.asm new file mode 100644 index 0000000..4fda24d --- /dev/null +++ b/app/context_switch/deboost.context/asm/ontop_x86_64_ms_pe_gas.asm @@ -0,0 +1,142 @@ +/* + Copyright Oliver Kowalke 2009. + Copyright Thomas Sailer 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/************************************************************************************** + * * + * ---------------------------------------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ---------------------------------------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * + * ---------------------------------------------------------------------------------- * + * | fbr_strg | fc_dealloc | limit | base | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ---------------------------------------------------------------------------------- * + * | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | * + * ---------------------------------------------------------------------------------- * + * | R12 | R13 | R14 | R15 | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ---------------------------------------------------------------------------------- * + * | 0xe40 | 0x44 | 0x48 | 0x4c | 0x50 | 0x54 | 0x58 | 0x5c | * + * ---------------------------------------------------------------------------------- * + * | RDI | RSI | RBX | RBP | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ---------------------------------------------------------------------------------- * + * | 0x60 | 0x64 | 0x68 | 0x6c | 0x70 | 0x74 | 0x78 | 0x7c | * + * ---------------------------------------------------------------------------------- * + * | hidden | RIP | EXIT | parameter area | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 32 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | * + * ---------------------------------------------------------------------------------- * + * | 0x80 | 0x84 | 0x88 | 0x8c | 0x90 | 0x94 | 0x98 | 0x9c | * + * ---------------------------------------------------------------------------------- * + * | parameter area | FCTX | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | * + * ---------------------------------------------------------------------------------- * + * | 0xa0 | 0xa4 | 0xa8 | 0xac | 0xb0 | 0xb4 | 0xb8 | 0xbc | * + * ---------------------------------------------------------------------------------- * + * | DATA | | | | * + * ---------------------------------------------------------------------------------- * + * * + * ***********************************************************************************/ + +.file "ontop_x86_64_ms_pe_gas.asm" +.text +.p2align 4,,15 +.globl ontop_fcontext +.def ontop_fcontext; .scl 2; .type 32; .endef +.seh_proc ontop_fcontext +ontop_fcontext: +.seh_endprologue + + pushq %rcx /* save hidden address of transport_t */ + + pushq %rbp /* save RBP */ + pushq %rbx /* save RBX */ + pushq %rsi /* save RSI */ + pushq %rdi /* save RDI */ + pushq %r15 /* save R15 */ + pushq %r14 /* save R14 */ + pushq %r13 /* save R13 */ + pushq %r12 /* save R12 */ + + /* load NT_TIB */ + movq %gs:(0x30), %r10 + /* save current stack base */ + movq 0x08(%r10), %rax + pushq %rax + /* save current stack limit */ + movq 0x10(%r10), %rax + pushq %rax + /* save current deallocation stack */ + movq 0x1478(%r10), %rax + pushq %rax + /* save fiber local storage */ + movq 0x18(%r10), %rax + pushq %rax + + /* preserve RSP (pointing to context-data) in RCX */ + movq %rsp, %rcx + + /* restore RSP (pointing to context-data) from RDX */ + movq %rdx, %rsp + + /* load NT_TIB */ + movq %gs:(0x30), %r10 + /* restore fiber local storage */ + popq %rax + movq %rax, 0x18(%r10) + /* restore deallocation stack */ + popq %rax + movq %rax, 0x1478(%r10) + /* restore stack limit */ + popq %rax + movq %rax, 0x10(%r10) + /* restore stack base */ + popq %rax + movq %rax, 0x8(%r10) + + popq %r12 /* restore R12 */ + popq %r13 /* restore R13 */ + popq %r14 /* restore R14 */ + popq %r15 /* restore R15 */ + popq %rdi /* restore RDI */ + popq %rsi /* restore RSI */ + popq %rbx /* restore RBX */ + popq %rbp /* restore RBP */ + + popq %rax /* restore hidden address of transport_t */ + + /* keep return-address on stack */ + + /* transport_t returned in RAX */ + /* return parent fcontext_t */ + movq %rcx, (%rax) + /* return data */ + movq %r8, 0x8(%rax) + + /* transport_t as 1.arg of context-function */ + /* RCX contains address of returned (hidden) transfer_t */ + movq %rax, %rcx + /* RDX contains address of passed transfer_t */ + movq %rax, %rdx + + /* indirect jump to context */ + jmp *%r9 +.seh_endproc + +.section .drectve +.ascii " -export:\"ontop_fcontext\"" diff --git a/app/context_switch/deboost.context/asm/ontop_x86_64_ms_pe_masm.asm b/app/context_switch/deboost.context/asm/ontop_x86_64_ms_pe_masm.asm new file mode 100644 index 0000000..aec6991 --- /dev/null +++ b/app/context_switch/deboost.context/asm/ontop_x86_64_ms_pe_masm.asm @@ -0,0 +1,130 @@ + +; Copyright Oliver Kowalke 2009. +; Distributed under the Boost Software License, Version 1.0. +; (See accompanying file LICENSE_1_0.txt or copy at +; http://www.boost.org/LICENSE_1_0.txt) + +; ---------------------------------------------------------------------------------- +; | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | +; ---------------------------------------------------------------------------------- +; | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | +; ---------------------------------------------------------------------------------- +; | fbr_strg | fc_dealloc | limit | base | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | +; ---------------------------------------------------------------------------------- +; | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | +; ---------------------------------------------------------------------------------- +; | R12 | R13 | R14 | R15 | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | +; ---------------------------------------------------------------------------------- +; | 0xe40 | 0x44 | 0x48 | 0x4c | 0x50 | 0x54 | 0x58 | 0x5c | +; ---------------------------------------------------------------------------------- +; | RDI | RSI | RBX | RBP | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | +; ---------------------------------------------------------------------------------- +; | 0x60 | 0x64 | 0x68 | 0x6c | 0x70 | 0x74 | 0x78 | 0x7c | +; ---------------------------------------------------------------------------------- +; | hidden | RIP | EXIT | parameter area | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 32 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | +; ---------------------------------------------------------------------------------- +; | 0x80 | 0x84 | 0x88 | 0x8c | 0x90 | 0x94 | 0x98 | 0x9c | +; ---------------------------------------------------------------------------------- +; | parameter area | FCTX | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | +; ---------------------------------------------------------------------------------- +; | 0xa0 | 0xa4 | 0xa8 | 0xac | 0xb0 | 0xb4 | 0xb8 | 0xbc | +; ---------------------------------------------------------------------------------- +; | DATA | | | | +; ---------------------------------------------------------------------------------- + +.code + +ontop_fcontext PROC BOOST_CONTEXT_EXPORT FRAME + .endprolog + + push rcx ; save hidden address of transport_t + + push rbp ; save RBP + push rbx ; save RBX + push rsi ; save RSI + push rdi ; save RDI + push r15 ; save R15 + push r14 ; save R14 + push r13 ; save R13 + push r12 ; save R12 + + ; load NT_TIB + mov r10, gs:[030h] + ; save current stack base + mov rax, [r10+08h] + push rax + ; save current stack limit + mov rax, [r10+010h] + push rax + ; save current deallocation stack + mov rax, [r10+01478h] + push rax + ; save fiber local storage + mov rax, [r10+018h] + push rax + + ; preserve RSP (pointing to context-data) in RCX + mov rcx, rsp + + ; restore RSP (pointing to context-data) from RDX + mov rsp, rdx + + ; load NT_TIB + mov r10, gs:[030h] + ; restore fiber local storage + pop rax + mov [r10+018h], rax + ; restore deallocation stack + pop rax + mov [r10+01478h], rax + ; restore stack limit + pop rax + mov [r10+010h], rax + ; restore stack base + pop rax + mov [r10+08h], rax + + pop r12 ; restore R12 + pop r13 ; restore R13 + pop r14 ; restore R14 + pop r15 ; restore R15 + pop rdi ; restore RDI + pop rsi ; restore RSI + pop rbx ; restore RBX + pop rbp ; restore RBP + + pop rax ; restore hidden address of transport_t + + ; keep return-address on stack + + ; transport_t returned in RAX + ; return parent fcontext_t + mov [rax], rcx + ; return data + mov [rax+08h], r8 + + ; transport_t as 1.arg of context-function + ; RCX contains address of returned (hidden) transfer_t + mov rcx, rax + ; RDX contains address of passed transfer_t + mov rdx, rax + + ; indirect jump to context + jmp r9 +ontop_fcontext ENDP +END diff --git a/app/context_switch/deboost.context/asm/ontop_x86_64_sysv_elf_gas.S b/app/context_switch/deboost.context/asm/ontop_x86_64_sysv_elf_gas.S new file mode 100644 index 0000000..cf189e9 --- /dev/null +++ b/app/context_switch/deboost.context/asm/ontop_x86_64_sysv_elf_gas.S @@ -0,0 +1,69 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/**************************************************************************************** + * * + * ---------------------------------------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ---------------------------------------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * + * ---------------------------------------------------------------------------------- * + * | R12 | R13 | R14 | R15 | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ---------------------------------------------------------------------------------- * + * | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | * + * ---------------------------------------------------------------------------------- * + * | RBX | RBP | RIP | EXIT | * + * ---------------------------------------------------------------------------------- * + * * + ****************************************************************************************/ + +.text +.globl ontop_fcontext +.type ontop_fcontext,@function +.align 16 +ontop_fcontext: + pushq %rbp /* save RBP */ + pushq %rbx /* save RBX */ + pushq %r15 /* save R15 */ + pushq %r14 /* save R14 */ + pushq %r13 /* save R13 */ + pushq %r12 /* save R12 */ + + /* store RSP (pointing to context-data) in RAX */ + movq %rsp, %rax + + /* restore RSP (pointing to context-data) from RDI */ + movq %rdi, %rsp + + popq %r12 /* restrore R12 */ + popq %r13 /* restrore R13 */ + popq %r14 /* restrore R14 */ + popq %r15 /* restrore R15 */ + popq %rbx /* restrore RBX */ + popq %rbp /* restrore RBP */ + + /* preserve ontop-function in R8 */ + movq %rdx, %r8 + + /* return transfer_t from jump */ + /* RAX == fctx, RDX == data */ + movq %rsi, %rdx + /* pass transfer_t as first arg in context function */ + /* RDI == fctx, RSI == data */ + movq %rax, %rdi + + /* keep return-address on stack */ + + /* indirect jump to context */ + jmp *%r8 +.size ontop_fcontext,.-ontop_fcontext + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/app/context_switch/deboost.context/asm/ontop_x86_64_sysv_macho_gas.S b/app/context_switch/deboost.context/asm/ontop_x86_64_sysv_macho_gas.S new file mode 100644 index 0000000..0135435 --- /dev/null +++ b/app/context_switch/deboost.context/asm/ontop_x86_64_sysv_macho_gas.S @@ -0,0 +1,64 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/**************************************************************************************** + * * + * ---------------------------------------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ---------------------------------------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * + * ---------------------------------------------------------------------------------- * + * | R12 | R13 | R14 | R15 | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ---------------------------------------------------------------------------------- * + * | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | * + * ---------------------------------------------------------------------------------- * + * | RBX | RBP | RIP | EXIT | * + * ---------------------------------------------------------------------------------- * + * * + ****************************************************************************************/ + +.text +.globl _ontop_fcontext +.align 8 +_ontop_fcontext: + pushq %rbp /* save RBP */ + pushq %rbx /* save RBX */ + pushq %r15 /* save R15 */ + pushq %r14 /* save R14 */ + pushq %r13 /* save R13 */ + pushq %r12 /* save R12 */ + + /* store RSP (pointing to context-data) in RAX */ + movq %rsp, %rax + + /* restore RSP (pointing to context-data) from RDI */ + movq %rdi, %rsp + + popq %r12 /* restrore R12 */ + popq %r13 /* restrore R13 */ + popq %r14 /* restrore R14 */ + popq %r15 /* restrore R15 */ + popq %rbx /* restrore RBX */ + popq %rbp /* restrore RBP */ + + /* preserve ontop-function in R8 */ + movq %rdx, %r8 + + /* return transfer_t from jump */ + /* RAX == fctx, RDX == data */ + movq %rsi, %rdx + /* pass transfer_t as first arg in context function */ + /* RDI == fctx, RSI == data */ + movq %rax, %rdi + + /* keep return-address on stack */ + + /* indirect jump to context */ + jmp *%r8 diff --git a/app/context_switch/deboost.context/cmake/ios.toolchain.cmake b/app/context_switch/deboost.context/cmake/ios.toolchain.cmake new file mode 100644 index 0000000..a56ea82 --- /dev/null +++ b/app/context_switch/deboost.context/cmake/ios.toolchain.cmake @@ -0,0 +1,429 @@ +# This file is part of the ios-cmake project. It was retrieved from +# https://github.com/cristeab/ios-cmake.git, which is a fork of +# https://code.google.com/p/ios-cmake/. Which in turn is based off of +# the Platform/Darwin.cmake and Platform/UnixPaths.cmake files which +# are included with CMake 2.8.4 +# +# The ios-cmake project is licensed under the new BSD license. +# +# Copyright (c) 2014, Bogdan Cristea and LTE Engineering Software, +# Kitware, Inc., Insight Software Consortium. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# This file is based off of the Platform/Darwin.cmake and +# Platform/UnixPaths.cmake files which are included with CMake 2.8.4 +# It has been altered for iOS development. +# +# Updated by Alex Stewart (alexs.mac@gmail.com) +# +# ***************************************************************************** +# Now maintained by Alexander Widerberg (widerbergaren [at] gmail.com) +# under the BSD-3-Clause license +# ***************************************************************************** +# +# INFORMATION / HELP +# +# The following variables control the behaviour of this toolchain: +# +# IOS_PLATFORM: OS (default) or SIMULATOR or SIMULATOR64 or TVOS or SIMULATOR_TVOS +# OS = Build for iPhoneOS. +# SIMULATOR = Build for x86 i386 iPhone Simulator. +# SIMULATOR64 = Build for x86_64 iPhone Simulator. +# TVOS = Build for AppleTVOS. +# SIMULATOR_TVOS = Build for x86_64 AppleTV Simulator. +# CMAKE_OSX_SYSROOT: Path to the iOS SDK to use. By default this is +# automatically determined from IOS_PLATFORM and xcodebuild, but +# can also be manually specified (although this should not be required). +# CMAKE_IOS_DEVELOPER_ROOT: Path to the Developer directory for the iOS platform +# being compiled for. By default this is automatically determined from +# CMAKE_OSX_SYSROOT, but can also be manually specified (although this should +# not be required). +# ENABLE_BITCODE: (1|0) Enables or disables bitcode support. Default 1 (true) +# ENABLE_ARC: (1|0) Enables or disables ARC support. Default 1 (true, ARC enabled by default) +# ENABLE_VISIBILITY: (1|0) Enables or disables symbol visibility support. Default 0 (false, visibility hidden by default) +# IOS_ARCH: (armv7 armv7s arm64 i386 x86_64) If specified, will override the default architectures for the given IOS_PLATFORM +# OS = armv7 armv7s arm64 +# SIMULATOR = i386 +# SIMULATOR64 = x86_64 +# TVOS = arm64 +# SIMULATOR_TVOS = x86_64 +# +# This toolchain defines the following variables for use externally: +# +# XCODE_VERSION: Version number (not including Build version) of Xcode detected. +# IOS_SDK_VERSION: Version of iOS SDK being used. +# CMAKE_OSX_ARCHITECTURES: Architectures being compiled for (generated from +# IOS_PLATFORM). +# +# This toolchain defines the following macros for use externally: +# +# set_xcode_property (TARGET XCODE_PROPERTY XCODE_VALUE XCODE_VARIANT) +# A convenience macro for setting xcode specific properties on targets. +# Available variants are: All, Release, RelWithDebInfo, Debug, MinSizeRel +# example: set_xcode_property (myioslib IPHONEOS_DEPLOYMENT_TARGET "3.1" "all"). +# +# find_host_package (PROGRAM ARGS) +# A macro used to find executable programs on the host system, not within the +# iOS environment. Thanks to the android-cmake project for providing the +# command. + +# Fix for PThread library not in path +set(CMAKE_THREAD_LIBS_INIT "-lpthread") +set(CMAKE_HAVE_THREADS_LIBRARY 1) +set(CMAKE_USE_WIN32_THREADS_INIT 0) +set(CMAKE_USE_PTHREADS_INIT 1) + +# Get the Xcode version being used. +execute_process(COMMAND xcodebuild -version + OUTPUT_VARIABLE XCODE_VERSION + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) +string(REGEX MATCH "Xcode [0-9\\.]+" XCODE_VERSION "${XCODE_VERSION}") +string(REGEX REPLACE "Xcode ([0-9\\.]+)" "\\1" XCODE_VERSION "${XCODE_VERSION}") +message(STATUS "Building with Xcode version: ${XCODE_VERSION}") +# Default to building for iPhoneOS if not specified otherwise, and we cannot +# determine the platform from the CMAKE_OSX_ARCHITECTURES variable. The use +# of CMAKE_OSX_ARCHITECTURES is such that try_compile() projects can correctly +# determine the value of IOS_PLATFORM from the root project, as +# CMAKE_OSX_ARCHITECTURES is propagated to them by CMake. +if (NOT DEFINED IOS_PLATFORM) + if (CMAKE_OSX_ARCHITECTURES) + if (CMAKE_OSX_ARCHITECTURES MATCHES ".*arm.*") + set(IOS_PLATFORM "OS") + elseif (CMAKE_OSX_ARCHITECTURES MATCHES "i386") + set(IOS_PLATFORM "SIMULATOR") + elseif (CMAKE_OSX_ARCHITECTURES MATCHES "x86_64") + set(IOS_PLATFORM "SIMULATOR64") + endif() + endif() + if (NOT IOS_PLATFORM) + set(IOS_PLATFORM "OS") + endif() +endif() +set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING + "Type of iOS platform for which to build.") +# Determine the platform name and architectures for use in xcodebuild commands +# from the specified IOS_PLATFORM name. +if (IOS_PLATFORM STREQUAL "OS") + set(XCODE_IOS_PLATFORM iphoneos) + if(NOT IOS_ARCH) + set(IOS_ARCH armv7 armv7s arm64) + endif() +elseif (IOS_PLATFORM STREQUAL "SIMULATOR") + set(XCODE_IOS_PLATFORM iphonesimulator) + if(NOT IOS_ARCH) + set(IOS_ARCH i386) + endif() +elseif(IOS_PLATFORM STREQUAL "SIMULATOR64") + set(XCODE_IOS_PLATFORM iphonesimulator) + if(NOT IOS_ARCH) + set(IOS_ARCH x86_64) + endif() +elseif (IOS_PLATFORM STREQUAL "TVOS") + set(XCODE_IOS_PLATFORM appletvos) + if(NOT IOS_ARCH) + set(IOS_ARCH arm64) + endif() +elseif (IOS_PLATFORM STREQUAL "SIMULATOR_TVOS") + set(XCODE_IOS_PLATFORM appletvsimulator) + if(NOT IOS_ARCH) + set(IOS_ARCH x86_64) + endif() +else() + message(FATAL_ERROR "Invalid IOS_PLATFORM: ${IOS_PLATFORM}") +endif() +message(STATUS "Configuring iOS build for platform: ${IOS_PLATFORM}, " + "architecture(s): ${IOS_ARCH}") +# If user did not specify the SDK root to use, then query xcodebuild for it. +if (NOT CMAKE_OSX_SYSROOT) + execute_process(COMMAND xcodebuild -version -sdk ${XCODE_IOS_PLATFORM} Path + OUTPUT_VARIABLE CMAKE_OSX_SYSROOT + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) + message(STATUS "Using SDK: ${CMAKE_OSX_SYSROOT} for platform: ${IOS_PLATFORM}") +endif() +if (NOT EXISTS ${CMAKE_OSX_SYSROOT}) + message(FATAL_ERROR "Invalid CMAKE_OSX_SYSROOT: ${CMAKE_OSX_SYSROOT} " + "does not exist.") +endif() +# Specify minimum version of deployment target. +if (NOT DEFINED IOS_DEPLOYMENT_TARGET) + # Unless specified, SDK version 8.0 is used by default as minimum target version. + set(IOS_DEPLOYMENT_TARGET "8.0" + CACHE STRING "Minimum iOS version to build for." ) + message(STATUS "Using the default min-version since IOS_DEPLOYMENT_TARGET not provided!") +endif() +# Use bitcode or not +if (NOT DEFINED ENABLE_BITCODE AND NOT IOS_ARCH MATCHES "((^|, )(i386|x86_64))+") + # Unless specified, enable bitcode support by default + set(ENABLE_BITCODE TRUE CACHE BOOL "Whether or not to enable bitcode") + message(STATUS "Enabling bitcode support by default. ENABLE_BITCODE not provided!") +endif() +if (NOT DEFINED ENABLE_BITCODE) + message(STATUS "Disabling bitcode support by default on simulators. ENABLE_BITCODE not provided for override!") +endif() +# Use ARC or not +if (NOT DEFINED ENABLE_ARC) + # Unless specified, enable ARC support by default + set(ENABLE_ARC TRUE CACHE BOOL "Whether or not to enable ARC") + message(STATUS "Enabling ARC support by default. ENABLE_ARC not provided!") +endif() +# Use hidden visibility or not +if (NOT DEFINED ENABLE_VISIBILITY) + # Unless specified, disable symbols visibility by default + set(ENABLE_VISIBILITY FALSE CACHE BOOL "Whether or not to hide symbols (-fvisibility=hidden)") + message(STATUS "Hiding symbols visibility by default. ENABLE_VISIBILITY not provided!") +endif() +# Get the SDK version information. +execute_process(COMMAND xcodebuild -sdk ${CMAKE_OSX_SYSROOT} -version SDKVersion + OUTPUT_VARIABLE IOS_SDK_VERSION + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) +# Find the Developer root for the specific iOS platform being compiled for +# from CMAKE_OSX_SYSROOT. Should be ../../ from SDK specified in +# CMAKE_OSX_SYSROOT. There does not appear to be a direct way to obtain +# this information from xcrun or xcodebuild. +if (NOT CMAKE_IOS_DEVELOPER_ROOT) + get_filename_component(IOS_PLATFORM_SDK_DIR ${CMAKE_OSX_SYSROOT} PATH) + get_filename_component(CMAKE_IOS_DEVELOPER_ROOT ${IOS_PLATFORM_SDK_DIR} PATH) +endif() +if (NOT EXISTS ${CMAKE_IOS_DEVELOPER_ROOT}) + message(FATAL_ERROR "Invalid CMAKE_IOS_DEVELOPER_ROOT: " + "${CMAKE_IOS_DEVELOPER_ROOT} does not exist.") +endif() +# Find the C & C++ compilers for the specified SDK. +if (NOT CMAKE_C_COMPILER) + execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT} -find clang + OUTPUT_VARIABLE CMAKE_C_COMPILER + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) + message(STATUS "Using C compiler: ${CMAKE_C_COMPILER}") +endif() +if (NOT CMAKE_CXX_COMPILER) + execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT} -find clang++ + OUTPUT_VARIABLE CMAKE_CXX_COMPILER + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) + message(STATUS "Using CXX compiler: ${CMAKE_CXX_COMPILER}") +endif() +# Find (Apple's) libtool. +execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT} -find libtool + OUTPUT_VARIABLE IOS_LIBTOOL + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) +message(STATUS "Using libtool: ${IOS_LIBTOOL}") +# Configure libtool to be used instead of ar + ranlib to build static libraries. +# This is required on Xcode 7+, but should also work on previous versions of +# Xcode. +set(CMAKE_C_CREATE_STATIC_LIBRARY + "${IOS_LIBTOOL} -static -o ") +set(CMAKE_CXX_CREATE_STATIC_LIBRARY + "${IOS_LIBTOOL} -static -o ") +# Get the version of Darwin (OS X) of the host. +execute_process(COMMAND uname -r + OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_VERSION + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) +# Standard settings. +set(CMAKE_SYSTEM_NAME Darwin CACHE INTERNAL "") +set(CMAKE_SYSTEM_VERSION ${IOS_SDK_VERSION} CACHE INTERNAL "") +set(UNIX TRUE CACHE BOOL "") +set(APPLE TRUE CACHE BOOL "") +set(IOS TRUE CACHE BOOL "") +set(CMAKE_AR ar CACHE FILEPATH "" FORCE) +set(CMAKE_RANLIB ranlib CACHE FILEPATH "" FORCE) +# Force unset of OS X-specific deployment target (otherwise autopopulated), +# required as of cmake 2.8.10. +set(CMAKE_OSX_DEPLOYMENT_TARGET "" CACHE STRING + "Must be empty for iOS builds." FORCE) +# Set the architectures for which to build. +set(CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE STRING "Build architecture for iOS") +# Skip the platform compiler checks for cross compiling. +set(CMAKE_CXX_COMPILER_FORCED TRUE) +set(CMAKE_CXX_COMPILER_WORKS TRUE) +set(CMAKE_C_COMPILER_FORCED TRUE) +set(CMAKE_C_COMPILER_WORKS TRUE) +# All iOS/Darwin specific settings - some may be redundant. +set(CMAKE_SHARED_LIBRARY_PREFIX "lib") +set(CMAKE_SHARED_LIBRARY_SUFFIX ".dylib") +set(CMAKE_SHARED_MODULE_PREFIX "lib") +set(CMAKE_SHARED_MODULE_SUFFIX ".so") +set(CMAKE_C_COMPILER_ABI ELF) +set(CMAKE_CXX_COMPILER_ABI ELF) +set(CMAKE_C_HAS_ISYSROOT 1) +set(CMAKE_CXX_HAS_ISYSROOT 1) +set(CMAKE_MODULE_EXISTS 1) +set(CMAKE_DL_LIBS "") +set(CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG "-compatibility_version ") +set(CMAKE_C_OSX_CURRENT_VERSION_FLAG "-current_version ") +set(CMAKE_CXX_OSX_COMPATIBILITY_VERSION_FLAG "${CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG}") +set(CMAKE_CXX_OSX_CURRENT_VERSION_FLAG "${CMAKE_C_OSX_CURRENT_VERSION_FLAG}") + +if(IOS_ARCH MATCHES "((^|, )(arm64|x86_64))+") + set(CMAKE_C_SIZEOF_DATA_PTR 8) + set(CMAKE_CXX_SIZEOF_DATA_PTR 8) + message(STATUS "Using a data_ptr size of 8") +else() + set(CMAKE_C_SIZEOF_DATA_PTR 4) + set(CMAKE_CXX_SIZEOF_DATA_PTR 4) + message(STATUS "Using a data_ptr size of 4") +endif() + +message(STATUS "Building for minimum iOS version: ${IOS_DEPLOYMENT_TARGET}" + " (SDK version: ${IOS_SDK_VERSION})") +# Note that only Xcode 7+ supports the newer more specific: +# -m${XCODE_IOS_PLATFORM}-version-min flags, older versions of Xcode use: +# -m(ios/ios-simulator)-version-min instead. +if (IOS_PLATFORM STREQUAL "OS") + if (XCODE_VERSION VERSION_LESS 7.0) + set(XCODE_IOS_PLATFORM_VERSION_FLAGS + "-mios-version-min=${IOS_DEPLOYMENT_TARGET}") + else() + # Xcode 7.0+ uses flags we can build directly from XCODE_IOS_PLATFORM. + set(XCODE_IOS_PLATFORM_VERSION_FLAGS + "-m${XCODE_IOS_PLATFORM}-version-min=${IOS_DEPLOYMENT_TARGET}") + endif() +elseif (IOS_PLATFORM STREQUAL "TVOS") + set(XCODE_IOS_PLATFORM_VERSION_FLAGS + "-mtvos-version-min=${IOS_DEPLOYMENT_TARGET}") +elseif (IOS_PLATFORM STREQUAL "SIMULATOR_TVOS") + set(XCODE_IOS_PLATFORM_VERSION_FLAGS + "-mtvos-simulator-version-min=${IOS_DEPLOYMENT_TARGET}") +else() + # SIMULATOR or SIMULATOR64 both use -mios-simulator-version-min. + set(XCODE_IOS_PLATFORM_VERSION_FLAGS + "-mios-simulator-version-min=${IOS_DEPLOYMENT_TARGET}") +endif() +message(STATUS "Version flags set to: ${XCODE_IOS_PLATFORM_VERSION_FLAGS}") + +if (ENABLE_BITCODE) + set(BITCODE "-fembed-bitcode") + set(HEADER_PAD "") + message(STATUS "Enabling bitcode support.") +else() + set(BITCODE "") + set(HEADER_PAD "-headerpad_max_install_names") + message(STATUS "Disabling bitcode support.") +endif() + +if (ENABLE_ARC) + set(FOBJC_ARC "-fobjc-arc") + message(STATUS "Enabling ARC support.") +else() + set(FOBJC_ARC "-fno-objc-arc") + message(STATUS "Disabling ARC support.") +endif() + +if (NOT ENABLE_VISIBILITY) + set(VISIBILITY "-fvisibility=hidden") + message(STATUS "Hiding symbols (-fvisibility=hidden).") +else() + set(VISIBILITY "") +endif() + +set(CMAKE_C_FLAGS +"${XCODE_IOS_PLATFORM_VERSION_FLAGS} ${BITCODE} -fobjc-abi-version=2 ${FOBJC_ARC} ${C_FLAGS}") +# Hidden visibilty is required for C++ on iOS. +set(CMAKE_CXX_FLAGS +"${XCODE_IOS_PLATFORM_VERSION_FLAGS} ${BITCODE} ${VISIBILITY} -fvisibility-inlines-hidden -fobjc-abi-version=2 ${FOBJC_ARC} ${CXX_FLAGS}") +set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS} -DNDEBUG -Os -fomit-frame-pointer -ffast-math ${BITCODE} ${CXX_FLAGS_MINSIZEREL}") +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS} -DNDEBUG -O2 -g -fomit-frame-pointer -ffast-math ${BITCODE} ${CXX_FLAGS_RELWITHDEBINFO}") +set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -DNDEBUG -O3 -fomit-frame-pointer -ffast-math ${BITCODE} ${CXX_FLAGS_RELEASE}") +set(CMAKE_C_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first ${C_LINK_FLAGS}") +set(CMAKE_CXX_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first ${CXX_LINK_FLAGS}") + +# In order to ensure that the updated compiler flags are used in try_compile() +# tests, we have to forcibly set them in the CMake cache, not merely set them +# in the local scope. +list(APPEND VARS_TO_FORCE_IN_CACHE + CMAKE_C_FLAGS + CMAKE_CXX_FLAGS + CMAKE_CXX_FLAGS_RELWITHDEBINFO + CMAKE_CXX_FLAGS_MINSIZEREL + CMAKE_CXX_FLAGS_RELEASE + CMAKE_C_LINK_FLAGS + CMAKE_CXX_LINK_FLAGS) +foreach(VAR_TO_FORCE ${VARS_TO_FORCE_IN_CACHE}) + set(${VAR_TO_FORCE} "${${VAR_TO_FORCE}}" CACHE STRING "" FORCE) +endforeach() + +set(CMAKE_PLATFORM_HAS_INSTALLNAME 1) +set (CMAKE_SHARED_LINKER_FLAGS "-rpath @executable_path/Frameworks -rpath @loader_path/Frameworks") +set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib ${HEADER_PAD}") +set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle ${HEADER_PAD}") +set(CMAKE_SHARED_MODULE_LOADER_C_FLAG "-Wl,-bundle_loader,") +set(CMAKE_SHARED_MODULE_LOADER_CXX_FLAG "-Wl,-bundle_loader,") +set(CMAKE_FIND_LIBRARY_SUFFIXES ".dylib" ".so" ".a") + +# Hack: if a new cmake (which uses CMAKE_INSTALL_NAME_TOOL) runs on an old +# build tree (where install_name_tool was hardcoded) and where +# CMAKE_INSTALL_NAME_TOOL isn't in the cache and still cmake didn't fail in +# CMakeFindBinUtils.cmake (because it isn't rerun) hardcode +# CMAKE_INSTALL_NAME_TOOL here to install_name_tool, so it behaves as it did +# before, Alex. +if (NOT DEFINED CMAKE_INSTALL_NAME_TOOL) + find_program(CMAKE_INSTALL_NAME_TOOL install_name_tool) +endif (NOT DEFINED CMAKE_INSTALL_NAME_TOOL) + +# Set the find root to the iOS developer roots and to user defined paths. +set(CMAKE_FIND_ROOT_PATH ${CMAKE_IOS_DEVELOPER_ROOT} ${CMAKE_OSX_SYSROOT} + ${CMAKE_PREFIX_PATH} CACHE string "iOS find search path root" FORCE) +# Default to searching for frameworks first. +set(CMAKE_FIND_FRAMEWORK FIRST) +# Set up the default search directories for frameworks. +set(CMAKE_SYSTEM_FRAMEWORK_PATH + ${CMAKE_OSX_SYSROOT}/System/Library/Frameworks + ${CMAKE_OSX_SYSROOT}/System/Library/PrivateFrameworks + ${CMAKE_OSX_SYSROOT}/Developer/Library/Frameworks) +# Only search the specified iOS SDK, not the remainder of the host filesystem. +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +# This little macro lets you set any XCode specific property. +macro(set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE XCODE_RELVERSION) + set(XCODE_RELVERSION_I "${XCODE_RELVERSION}") + if (XCODE_RELVERSION_I STREQUAL "All") + set_property(TARGET ${TARGET} PROPERTY + XCODE_ATTRIBUTE_${XCODE_PROPERTY} "${XCODE_VALUE}") + else() + set_property(TARGET ${TARGET} PROPERTY + XCODE_ATTRIBUTE_${XCODE_PROPERTY}[variant=${XCODE_RELVERSION_I}] "${XCODE_VALUE}") + endif() +endmacro(set_xcode_property) +# This macro lets you find executable programs on the host system. +macro(find_host_package) + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER) + set(IOS FALSE) + find_package(${ARGN}) + set(IOS TRUE) + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +endmacro(find_host_package) diff --git a/app/context_switch/deboost.context/include/fcontext/fcontext.h b/app/context_switch/deboost.context/include/fcontext/fcontext.h new file mode 100644 index 0000000..5c8365c --- /dev/null +++ b/app/context_switch/deboost.context/include/fcontext/fcontext.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + typedef void* fcontext_t; + + typedef struct + { + fcontext_t ctx; + void* data; + } fcontext_transfer_t; + + typedef struct + { + void* sptr; + size_t ssize; + } fcontext_stack_t; + + /** + * Callback definition for context (coroutine) + */ + typedef void (*pfn_fcontext)(fcontext_transfer_t); + + /** + * Switches to another context + * @param to Target context to switch to + * @param vp Custom user pointer to pass to new context + */ + fcontext_transfer_t jump_fcontext(fcontext_t const to, void * vp); + + /** + * Make a new context + * @param sp Pointer to allocated stack memory + * @param size Stack memory size + * @param corofn Callback function for context (coroutine) + */ + fcontext_t make_fcontext(void * sp, size_t size, pfn_fcontext corofn); + + fcontext_transfer_t ontop_fcontext(fcontext_t const to, void * vp, fcontext_transfer_t(*fn)(fcontext_transfer_t)); + + fcontext_stack_t create_fcontext_stack(size_t size); + void destroy_fcontext_stack(fcontext_stack_t* s); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/app/context_switch/deboost.context/source/stack.c b/app/context_switch/deboost.context/source/stack.c new file mode 100644 index 0000000..2477c08 --- /dev/null +++ b/app/context_switch/deboost.context/source/stack.c @@ -0,0 +1,175 @@ +#include +#include +#include + +#include "fcontext.h" + +// Detect posix +#if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__))) +/* UNIX-style OS. ------------------------------------------- */ +# include +# define _HAVE_POSIX 1 +#endif + +#ifdef _WIN32 +# define WIN32_LEAN_AND_LEAN +# include +/* x86_64 + * test x86_64 before i386 because icc might + * define __i686__ for x86_64 too */ +#if defined(__x86_64__) || defined(__x86_64) \ + || defined(__amd64__) || defined(__amd64) \ + || defined(_M_X64) || defined(_M_AMD64) +/* Windows seams not to provide a constant or function + * telling the minimal stacksize */ +# define MINSIGSTKSZ 8192 +#else +# define MINSIGSTKSZ 4096 +#endif + +static size_t getPageSize() +{ + SYSTEM_INFO si; + GetSystemInfo(&si); + return (size_t)si.dwPageSize; +} + +static size_t getMinSize() +{ + return MINSIGSTKSZ; +} + +static size_t getMaxSize() +{ + return 1 * 1024 * 1024 * 1024; /* 1GB */ +} + +static size_t getDefaultSize() +{ + return 131072; // 128kb +} + +#elif defined(_HAVE_POSIX) +#include +#include +#include +#include +#include +#include + +#if !defined (SIGSTKSZ) +# define SIGSTKSZ 131072 // 128kb recommended +# define UDEF_SIGSTKSZ +#endif + +#if !defined (MINSIGSTKSZ) +# define MINSIGSTKSZ 32768 // 32kb minimum +# define UDEF_MINSIGSTKSZ +#endif + +static size_t getPageSize() +{ + /* conform to POSIX.1-2001 */ + return (size_t)sysconf(_SC_PAGESIZE); +} + +static size_t getMinSize() +{ + return MINSIGSTKSZ; +} + +static size_t getMaxSize() +{ + struct rlimit limit; + getrlimit(RLIMIT_STACK, &limit); + + return (size_t)limit.rlim_max; +} + +static size_t getDefaultSize() +{ + return SIGSTKSZ; +} +#endif + +/* Stack allocation and protection*/ +fcontext_stack_t create_fcontext_stack(size_t size) +{ + size_t pages; + size_t size_; + void* vp; + fcontext_stack_t s; + s.sptr = NULL; + s.ssize = 0; + + /* fix size */ + if (size == 0) + size = getDefaultSize(); + size_t minsz = getMinSize(); + size_t maxsz = getMaxSize(); + if (size < minsz) + size = minsz; + if (size > maxsz) + size = maxsz; + + pages = (size_t)floorf((float)size/(float)getPageSize()); + assert(pages >= 2); /* at least two pages must fit into stack (one page is guard-page) */ + + size_ = pages * getPageSize(); + assert(size_ != 0 && size != 0); + assert(size_ <= size); + +#ifdef _WIN32 + vp = VirtualAlloc(0, size_, MEM_COMMIT, PAGE_READWRITE); + if (!vp) + return s; + + DWORD old_options; + VirtualProtect(vp, getPageSize(), PAGE_READWRITE | PAGE_GUARD, &old_options); +#elif defined(_HAVE_POSIX) +# if defined(MAP_ANON) + vp = mmap(0, size_, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); +# else + vp = mmap(0, size_, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +# endif + if (vp == MAP_FAILED) + return s; + mprotect(vp, getPageSize(), PROT_NONE); +#else + vp = malloc(size_); + if (!vp) + return s; +#endif + + s.sptr = (char*)vp + size_; + s.ssize = size_; + return s; +} + +void destroy_fcontext_stack(fcontext_stack_t* s) +{ + void* vp; + + assert(s->ssize >= getMinSize()); + assert(s->ssize <= getMaxSize()); + + vp = (char*)s->sptr - s->ssize; + +#ifdef _WIN32 + VirtualFree(vp, 0, MEM_RELEASE); +#elif defined(_HAVE_POSIX) + munmap(vp, s->ssize); +#else + free(vp); +#endif + + memset(s, 0x00, sizeof(fcontext_stack_t)); +} + +#ifdef UDEF_SIGSTKSZ +# undef SIGSTKSZ +#endif + +#ifdef UDEF_MINSIGSTKSZ +# undef MINSIGSTKSZ +#endif diff --git a/app/context_switch/deboost.context/test/test_fcontext.c b/app/context_switch/deboost.context/test/test_fcontext.c new file mode 100644 index 0000000..c6d08e0 --- /dev/null +++ b/app/context_switch/deboost.context/test/test_fcontext.c @@ -0,0 +1,56 @@ +#ifdef _WIN32 +# define WIN32_LEAN_AND_MEAN +# include +#else +# include +#endif + +#include +#include + +fcontext_t ctx; +fcontext_t ctx2; + +static inline void fsleep(uint32_t _ms) +{ +#ifdef _WIN32 + Sleep(_ms); +#else + struct timespec req = { (time_t)_ms / 1000, (long)((_ms % 1000) * 1000000) }; + struct timespec rem = { 0, 0 }; + nanosleep(&req, &rem); +#endif +} + +static void doo(fcontext_transfer_t t) +{ + puts("DOO"); + fsleep(1000); + jump_fcontext(t.ctx, NULL); +} + +static void foo(fcontext_transfer_t t) +{ + puts("FOO"); + fsleep(1000); + jump_fcontext(ctx2, NULL); + puts("FOO 2"); + fsleep(1000); + jump_fcontext(t.ctx, NULL); +} + +int main() +{ + fcontext_stack_t s = create_fcontext_stack(16 * 1024); + fcontext_stack_t s2 = create_fcontext_stack(0); + + ctx = make_fcontext(s.sptr, s.ssize, foo); + ctx2 = make_fcontext(s2.sptr, s2.ssize, doo); + + jump_fcontext(ctx, NULL); + puts("END"); + + destroy_fcontext_stack(&s); + destroy_fcontext_stack(&s2); + return 0; +} \ No newline at end of file diff --git a/app/context_switch/main.cpp b/app/context_switch/main.cpp new file mode 100644 index 0000000..97e72bd --- /dev/null +++ b/app/context_switch/main.cpp @@ -0,0 +1,179 @@ +#include +#include +#include +#include +#include + +#include "fcontext/fcontext.h" + +#include "context_switcher/context_switcher.h" +#include "context_switcher/continuation.h" + +using namespace std; + +// Settings for stack and benchmark +const size_t NUM_RUNS = 1000000; +const size_t STACK_SIZE = 512 * 1; +const char MAGIC_NUMBER = (unsigned char) 0xAB; + +// Memory for custom stack and continuation semantics +char custom_stack_1[STACK_SIZE] = {0}; +jmp_buf buffer; + +// Example callback function and declaration of our assembly stack switching routine +extern "C" { +void custom_stack_callback(void *); + +void __attribute__ ((noinline)) callback() { + static volatile int tmp; + tmp = 0; // Force at least a single memory write +} +} + +long measure_loop() { + auto start_time = chrono::steady_clock::now(); + volatile int tmp; + for (unsigned int i = 0; i < NUM_RUNS; i++) { + tmp = 0; + } + auto end_time = chrono::steady_clock::now(); + return chrono::duration_cast(end_time - start_time).count(); +} + +long measure_function_call() { + auto start_time = chrono::steady_clock::now(); + for (unsigned int i = 0; i < NUM_RUNS; i++) { + callback(); + } + auto end_time = chrono::steady_clock::now(); + return chrono::duration_cast(end_time - start_time).count(); +} + +long measure_stack_switch() { + auto start_time = chrono::steady_clock::now(); + for (unsigned int i = 0; i < NUM_RUNS; i++) { + custom_stack_callback(&custom_stack_1[STACK_SIZE]); + } + auto end_time = chrono::steady_clock::now(); + return chrono::duration_cast(end_time - start_time).count(); +} + +long measure_continuation() { + auto start_time = chrono::steady_clock::now(); + for (unsigned int i = 0; i < NUM_RUNS; i++) { + if (setjmp(buffer) == 0) { + custom_stack_callback(&custom_stack_1[STACK_SIZE]); + } + } + auto end_time = chrono::steady_clock::now(); + return chrono::duration_cast(end_time - start_time).count(); +} + +long measure_continuation_and_jump() { + auto start_time = chrono::steady_clock::now(); + for (unsigned int i = 0; i < NUM_RUNS; i++) { + if (setjmp(buffer) == 0) { + custom_stack_callback(&custom_stack_1[STACK_SIZE]); + longjmp(buffer, 1); + } + } + auto end_time = chrono::steady_clock::now(); + return chrono::duration_cast(end_time - start_time).count(); +} + +void fcontext_callback_fast(fcontext_transfer_t transfer) { + for (;;) { + callback(); + jump_fcontext(transfer.ctx, nullptr); + } +} + +long measure_fcontext_fast() { + fcontext_t context = make_fcontext(&custom_stack_1[STACK_SIZE], STACK_SIZE, &fcontext_callback_fast); + + auto start_time = chrono::steady_clock::now(); + for (unsigned int i = 0; i < NUM_RUNS; i++) { + context = jump_fcontext(context, nullptr).ctx; + } + auto end_time = chrono::steady_clock::now(); + return chrono::duration_cast(end_time - start_time).count(); +} + +void fcontext_callback_clean(fcontext_transfer_t transfer) { + callback(); + jump_fcontext(transfer.ctx, nullptr); +} + +long measure_fcontext_clean() { + auto start_time = chrono::steady_clock::now(); + for (unsigned int i = 0; i < NUM_RUNS; i++) { + fcontext_t context = make_fcontext(&custom_stack_1[STACK_SIZE], STACK_SIZE, &fcontext_callback_clean); + jump_fcontext(context, nullptr); + } + auto end_time = chrono::steady_clock::now(); + return chrono::duration_cast(end_time - start_time).count(); +} + +void fcontext_callcc(fcontext_transfer_t transfer) { + callback(); + jump_fcontext(jump_fcontext(transfer.ctx, nullptr).ctx, nullptr); +} + +long measure_fcontext_callcc() { + auto start_time = chrono::steady_clock::now(); + for (unsigned int i = 0; i < NUM_RUNS; i++) { + fcontext_t context = make_fcontext(&custom_stack_1[STACK_SIZE], STACK_SIZE, &fcontext_callcc); + jump_fcontext(jump_fcontext(context, nullptr).ctx, nullptr); + } + auto end_time = chrono::steady_clock::now(); + return chrono::duration_cast(end_time - start_time).count(); +} + +long measure_custom() { + auto start_time = chrono::steady_clock::now(); + for (unsigned int i = 0; i < NUM_RUNS; i++) { + context_switcher::enter_context(custom_stack_1, STACK_SIZE, [](context_switcher::continuation &&continuation) { + callback(); + return std::move(continuation); + }); + } + auto end_time = chrono::steady_clock::now(); + return chrono::duration_cast(end_time - start_time).count(); +} + +int main() { + memset(custom_stack_1, MAGIC_NUMBER, STACK_SIZE); + + auto time_cont_jump = measure_continuation_and_jump(); + auto time_cont = measure_continuation(); + auto time_stack = measure_stack_switch(); + auto time_func = measure_function_call(); + auto time_loop = measure_loop(); + auto time_fcontext_fast = measure_fcontext_fast(); + auto time_fcontext_clean = measure_fcontext_clean(); + auto time_fcontext_calcc = measure_fcontext_callcc(); + auto time_custom = measure_custom(); + + printf("Base\n"); + printf("Function Call : %10ld, %5.5f\n", time_func, ((float) time_func / NUM_RUNS)); + printf("Simple Loop : %10ld, %5.5f\n", time_loop, ((float) time_loop / NUM_RUNS)); + printf("Longjmp\n"); + printf("Stack Switching : %10ld, %5.5f\n", time_stack, ((float) time_stack / NUM_RUNS)); + printf("Full Continuation: %10ld, %5.5f\n", time_cont, ((float) time_cont / NUM_RUNS)); + printf("Jump Continuation: %10ld, %5.5f\n", time_cont_jump, ((float) time_cont_jump / NUM_RUNS)); + printf("Boost\n"); + printf("FContext Fast : %10ld, %5.5f\n", time_fcontext_fast, ((float) time_fcontext_fast / NUM_RUNS)); + printf("FContext Clean : %10ld, %5.5f\n", time_fcontext_clean, ((float) time_fcontext_clean / NUM_RUNS)); + printf("FContext CallCC : %10ld, %5.5f\n", time_fcontext_calcc, ((float) time_fcontext_calcc / NUM_RUNS)); + printf("Custom\n"); + printf("Custom Fast Call : %10ld, %5.5f\n", time_custom, ((float) time_custom / NUM_RUNS)); + + for (unsigned int i = 0; i < STACK_SIZE; i++) { + if (custom_stack_1[i] != MAGIC_NUMBER) { + printf("\n\nUsed stack size about %lu bytes.\n", (STACK_SIZE - i)); + break; + } + } + + return 0; +} diff --git a/app/context_switch/measurements/asm32.txt b/app/context_switch/measurements/asm32.txt new file mode 100644 index 0000000..3603100 --- /dev/null +++ b/app/context_switch/measurements/asm32.txt @@ -0,0 +1,19 @@ +Output on the banana pi + +Base +Function Call : 4975613, 4.97561 +Simple Loop : 1269267, 1.26927 +Longjmp +Stack Switching : 11255742, 11.25574 +Full Continuation: 54504045, 54.50404 +Jump Continuation: 128717262, 128.71727 +Boost +FContext Fast : 51930723, 51.93073 +FContext Clean : 57454099, 57.45410 +FContext CallCC : 101296455, 101.29646 +Custom +Custom Fast Call : 23745875, 23.74588 + + +Used stack size about 92 bytes. + diff --git a/app/context_switch/measurements/plot.py b/app/context_switch/measurements/plot.py new file mode 100644 index 0000000..1fa9b8d --- /dev/null +++ b/app/context_switch/measurements/plot.py @@ -0,0 +1,86 @@ +import numpy as np +from matplotlib import pyplot as plt +from matplotlib.patches import Patch + +labels = ['loop overhead', + 'function call', + 'stack switching', + 'setjmp and stack switching', + 'minimal fcontext overhead', + 'minimum callcc overhead', + 'custom assembly' + ] + +call_color = (0.3, 0.1, 0.4, 0.6) +setjmp_color = (0.3, 0.3, 0.4, 0.6) +fcontext_color = (0.3, 0.6, 0.4, 0.6) +custom_color = (0.3, 1.0, 0.4, 0.6) +colors = [ + call_color, + call_color, + setjmp_color, + setjmp_color, + fcontext_color, + fcontext_color, + custom_color +] + +dataX86 = np.array([ + 0.55, + 2.79, + 5.60, + 14.93, + 11.00, + 18.66, + 7.86, +]) +dataARM32 = np.array([ + 1.26, + 4.97, + 11.25, + 54.50, + 57.45, + 101.29, + 23.74, +]) + + +def plot_data(data, name): + plt.clf() + + # Make sure pgf plots have correct fonts + pgf_with_rc_fonts = { + "font.family": "serif", + "font.serif": [""], + "font.sans-serif": [""], + } + plt.rcParams.update(pgf_with_rc_fonts) + + xAxis = np.array(range(0, len(labels))) + + plt.bar(xAxis, data, color=colors) + plt.xticks(xAxis, labels, rotation=90) + plt.ylabel('runtime in ns') + plt.subplots_adjust(bottom=0.5, top=0.98) + + custom_lines = [Patch(facecolor=call_color, edgecolor='k'), + Patch(facecolor=setjmp_color, edgecolor='k'), + Patch(facecolor=fcontext_color, edgecolor='k'), + Patch(facecolor=custom_color, edgecolor='k')] + custom_labels = ['baseline', + 'setjmp', + 'fcontext', + 'custom'] + plt.legend(custom_lines, custom_labels) + + fig = plt.gcf() + fig.set_size_inches(5, 5) + fig.set_dpi(300) + # bounds = Bbox.from_bounds(-0.1, -0.1, 4.75, 4.6) + + fig.savefig(name + '.pgf') + fig.savefig(name + '.png') + + +plot_data(dataX86, "plots/context_switch_x86_64") +plot_data(dataARM32, "plots/context_switch_arm32") diff --git a/app/context_switch/measurements/plots/context_switch_arm32.pgf b/app/context_switch/measurements/plots/context_switch_arm32.pgf new file mode 100644 index 0000000..8661fbf --- /dev/null +++ b/app/context_switch/measurements/plots/context_switch_arm32.pgf @@ -0,0 +1,692 @@ +%% Creator: Matplotlib, PGF backend +%% +%% To include the figure in your LaTeX document, write +%% \input{.pgf} +%% +%% Make sure the required packages are loaded in your preamble +%% \usepackage{pgf} +%% +%% Figures using additional raster images can only be included by \input if +%% they are in the same directory as the main LaTeX file. For loading figures +%% from other directories you can use the `import` package +%% \usepackage{import} +%% and then include the figures with +%% \import{}{.pgf} +%% +%% Matplotlib used the following preamble +%% \usepackage{fontspec} +%% \setmainfont{DejaVuSans.ttf}[Path=/home/florian/.local/lib/python3.6/site-packages/matplotlib/mpl-data/fonts/ttf/] +%% \setsansfont{DejaVuSans.ttf}[Path=/home/florian/.local/lib/python3.6/site-packages/matplotlib/mpl-data/fonts/ttf/] +%% \setmonofont{DejaVuSansMono.ttf}[Path=/home/florian/.local/lib/python3.6/site-packages/matplotlib/mpl-data/fonts/ttf/] +%% +\begingroup% +\makeatletter% +\begin{pgfpicture}% +\pgfpathrectangle{\pgfpointorigin}{\pgfqpoint{5.000000in}{5.000000in}}% +\pgfusepath{use as bounding box, clip}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{1.000000,1.000000,1.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.000000pt}% +\definecolor{currentstroke}{rgb}{1.000000,1.000000,1.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{5.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{5.000000in}{5.000000in}}% +\pgfpathlineto{\pgfqpoint{0.000000in}{5.000000in}}% +\pgfpathclose% +\pgfusepath{fill}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{1.000000,1.000000,1.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.000000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetstrokeopacity{0.000000}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{0.625000in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{4.500000in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{4.500000in}{4.900000in}}% +\pgfpathlineto{\pgfqpoint{0.625000in}{4.900000in}}% +\pgfpathclose% +\pgfusepath{fill}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfpathrectangle{\pgfqpoint{0.625000in}{2.500000in}}{\pgfqpoint{3.875000in}{2.400000in}}% +\pgfusepath{clip}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{0.300000,0.100000,0.400000}% +\pgfsetfillcolor{currentfill}% +\pgfsetfillopacity{0.600000}% +\pgfsetlinewidth{0.000000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetstrokeopacity{0.000000}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{0.801136in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{1.215575in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{1.215575in}{2.528433in}}% +\pgfpathlineto{\pgfqpoint{0.801136in}{2.528433in}}% +\pgfpathclose% +\pgfusepath{fill}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfpathrectangle{\pgfqpoint{0.625000in}{2.500000in}}{\pgfqpoint{3.875000in}{2.400000in}}% +\pgfusepath{clip}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{0.300000,0.100000,0.400000}% +\pgfsetfillcolor{currentfill}% +\pgfsetfillopacity{0.600000}% +\pgfsetlinewidth{0.000000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetstrokeopacity{0.000000}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{1.319184in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{1.733623in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{1.733623in}{2.612153in}}% +\pgfpathlineto{\pgfqpoint{1.319184in}{2.612153in}}% +\pgfpathclose% +\pgfusepath{fill}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfpathrectangle{\pgfqpoint{0.625000in}{2.500000in}}{\pgfqpoint{3.875000in}{2.400000in}}% +\pgfusepath{clip}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{0.300000,0.300000,0.400000}% +\pgfsetfillcolor{currentfill}% +\pgfsetfillopacity{0.600000}% +\pgfsetlinewidth{0.000000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetstrokeopacity{0.000000}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{1.837233in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{2.251671in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{2.251671in}{2.753868in}}% +\pgfpathlineto{\pgfqpoint{1.837233in}{2.753868in}}% +\pgfpathclose% +\pgfusepath{fill}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfpathrectangle{\pgfqpoint{0.625000in}{2.500000in}}{\pgfqpoint{3.875000in}{2.400000in}}% +\pgfusepath{clip}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{0.300000,0.300000,0.400000}% +\pgfsetfillcolor{currentfill}% +\pgfsetfillopacity{0.600000}% +\pgfsetlinewidth{0.000000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetstrokeopacity{0.000000}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{2.355281in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{2.769719in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{2.769719in}{3.729849in}}% +\pgfpathlineto{\pgfqpoint{2.355281in}{3.729849in}}% +\pgfpathclose% +\pgfusepath{fill}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfpathrectangle{\pgfqpoint{0.625000in}{2.500000in}}{\pgfqpoint{3.875000in}{2.400000in}}% +\pgfusepath{clip}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{0.300000,0.600000,0.400000}% +\pgfsetfillcolor{currentfill}% +\pgfsetfillopacity{0.600000}% +\pgfsetlinewidth{0.000000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetstrokeopacity{0.000000}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{2.873329in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{3.287767in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{3.287767in}{3.796419in}}% +\pgfpathlineto{\pgfqpoint{2.873329in}{3.796419in}}% +\pgfpathclose% +\pgfusepath{fill}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfpathrectangle{\pgfqpoint{0.625000in}{2.500000in}}{\pgfqpoint{3.875000in}{2.400000in}}% +\pgfusepath{clip}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{0.300000,0.600000,0.400000}% +\pgfsetfillcolor{currentfill}% +\pgfsetfillopacity{0.600000}% +\pgfsetlinewidth{0.000000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetstrokeopacity{0.000000}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{3.391377in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{3.805816in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{3.805816in}{4.785714in}}% +\pgfpathlineto{\pgfqpoint{3.391377in}{4.785714in}}% +\pgfpathclose% +\pgfusepath{fill}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfpathrectangle{\pgfqpoint{0.625000in}{2.500000in}}{\pgfqpoint{3.875000in}{2.400000in}}% +\pgfusepath{clip}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{0.300000,1.000000,0.400000}% +\pgfsetfillcolor{currentfill}% +\pgfsetfillopacity{0.600000}% +\pgfsetlinewidth{0.000000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetstrokeopacity{0.000000}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{3.909425in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{4.323864in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{4.323864in}{3.035718in}}% +\pgfpathlineto{\pgfqpoint{3.909425in}{3.035718in}}% +\pgfpathclose% +\pgfusepath{fill}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{0.000000in}{-0.048611in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{0.000000in}{-0.048611in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{1.008356in}{2.500000in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=1.046672in,y=1.407905in,left,base,rotate=90.000000]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont loop overhead}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{0.000000in}{-0.048611in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{0.000000in}{-0.048611in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{1.526404in}{2.500000in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=1.564720in,y=1.552626in,left,base,rotate=90.000000]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont function call}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{0.000000in}{-0.048611in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{0.000000in}{-0.048611in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{2.044452in}{2.500000in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=2.082769in,y=1.331746in,left,base,rotate=90.000000]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont stack switching}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{0.000000in}{-0.048611in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{0.000000in}{-0.048611in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{2.562500in}{2.500000in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=2.600817in,y=0.507840in,left,base,rotate=90.000000]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont setjmp and stack switching}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{0.000000in}{-0.048611in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{0.000000in}{-0.048611in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{3.080548in}{2.500000in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=3.118865in,y=0.528592in,left,base,rotate=90.000000]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont fcontext minimal overhead}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{0.000000in}{-0.048611in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{0.000000in}{-0.048611in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{3.598596in}{2.500000in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=3.636913in,y=0.609972in,left,base,rotate=90.000000]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont minimum callcc overhead}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{0.000000in}{-0.048611in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{0.000000in}{-0.048611in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{4.116644in}{2.500000in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=4.154961in,y=1.187636in,left,base,rotate=90.000000]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont custom assembly}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{-0.048611in}{0.000000in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{-0.048611in}{0.000000in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{0.625000in}{2.500000in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=0.439413in,y=2.447238in,left,base]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont 0}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{-0.048611in}{0.000000in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{-0.048611in}{0.000000in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{0.625000in}{2.951321in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=0.351047in,y=2.898559in,left,base]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont 20}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{-0.048611in}{0.000000in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{-0.048611in}{0.000000in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{0.625000in}{3.402642in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=0.351047in,y=3.349880in,left,base]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont 40}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{-0.048611in}{0.000000in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{-0.048611in}{0.000000in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{0.625000in}{3.853962in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=0.351047in,y=3.801201in,left,base]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont 60}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{-0.048611in}{0.000000in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{-0.048611in}{0.000000in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{0.625000in}{4.305283in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=0.351047in,y=4.252522in,left,base]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont 80}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{-0.048611in}{0.000000in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{-0.048611in}{0.000000in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{0.625000in}{4.756604in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=0.262682in,y=4.703843in,left,base]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont 100}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=0.207126in,y=3.700000in,,bottom,rotate=90.000000]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont runtime in ns}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetrectcap% +\pgfsetmiterjoin% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{0.625000in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{0.625000in}{4.900000in}}% +\pgfusepath{stroke}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetrectcap% +\pgfsetmiterjoin% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{4.500000in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{4.500000in}{4.900000in}}% +\pgfusepath{stroke}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetrectcap% +\pgfsetmiterjoin% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{0.625000in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{4.500000in}{2.500000in}}% +\pgfusepath{stroke}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetrectcap% +\pgfsetmiterjoin% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{0.625000in}{4.900000in}}% +\pgfpathlineto{\pgfqpoint{4.500000in}{4.900000in}}% +\pgfusepath{stroke}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{1.000000,1.000000,1.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetfillopacity{0.800000}% +\pgfsetlinewidth{1.003750pt}% +\definecolor{currentstroke}{rgb}{0.800000,0.800000,0.800000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetstrokeopacity{0.800000}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{0.722222in}{3.973460in}}% +\pgfpathlineto{\pgfqpoint{1.748400in}{3.973460in}}% +\pgfpathquadraticcurveto{\pgfqpoint{1.776177in}{3.973460in}}{\pgfqpoint{1.776177in}{4.001238in}}% +\pgfpathlineto{\pgfqpoint{1.776177in}{4.802778in}}% +\pgfpathquadraticcurveto{\pgfqpoint{1.776177in}{4.830556in}}{\pgfqpoint{1.748400in}{4.830556in}}% +\pgfpathlineto{\pgfqpoint{0.722222in}{4.830556in}}% +\pgfpathquadraticcurveto{\pgfqpoint{0.694444in}{4.830556in}}{\pgfqpoint{0.694444in}{4.802778in}}% +\pgfpathlineto{\pgfqpoint{0.694444in}{4.001238in}}% +\pgfpathquadraticcurveto{\pgfqpoint{0.694444in}{3.973460in}}{\pgfqpoint{0.722222in}{3.973460in}}% +\pgfpathclose% +\pgfusepath{stroke,fill}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{0.300000,0.100000,0.400000}% +\pgfsetfillcolor{currentfill}% +\pgfsetfillopacity{0.600000}% +\pgfsetlinewidth{1.003750pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{0.750000in}{4.669477in}}% +\pgfpathlineto{\pgfqpoint{1.027778in}{4.669477in}}% +\pgfpathlineto{\pgfqpoint{1.027778in}{4.766699in}}% +\pgfpathlineto{\pgfqpoint{0.750000in}{4.766699in}}% +\pgfpathclose% +\pgfusepath{stroke,fill}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=1.138889in,y=4.669477in,left,base]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont baseline}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{0.300000,0.300000,0.400000}% +\pgfsetfillcolor{currentfill}% +\pgfsetfillopacity{0.600000}% +\pgfsetlinewidth{1.003750pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{0.750000in}{4.465620in}}% +\pgfpathlineto{\pgfqpoint{1.027778in}{4.465620in}}% +\pgfpathlineto{\pgfqpoint{1.027778in}{4.562842in}}% +\pgfpathlineto{\pgfqpoint{0.750000in}{4.562842in}}% +\pgfpathclose% +\pgfusepath{stroke,fill}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=1.138889in,y=4.465620in,left,base]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont setjmp}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{0.300000,0.600000,0.400000}% +\pgfsetfillcolor{currentfill}% +\pgfsetfillopacity{0.600000}% +\pgfsetlinewidth{1.003750pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{0.750000in}{4.261762in}}% +\pgfpathlineto{\pgfqpoint{1.027778in}{4.261762in}}% +\pgfpathlineto{\pgfqpoint{1.027778in}{4.358985in}}% +\pgfpathlineto{\pgfqpoint{0.750000in}{4.358985in}}% +\pgfpathclose% +\pgfusepath{stroke,fill}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=1.138889in,y=4.261762in,left,base]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont fcontext}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{0.300000,1.000000,0.400000}% +\pgfsetfillcolor{currentfill}% +\pgfsetfillopacity{0.600000}% +\pgfsetlinewidth{1.003750pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{0.750000in}{4.057905in}}% +\pgfpathlineto{\pgfqpoint{1.027778in}{4.057905in}}% +\pgfpathlineto{\pgfqpoint{1.027778in}{4.155127in}}% +\pgfpathlineto{\pgfqpoint{0.750000in}{4.155127in}}% +\pgfpathclose% +\pgfusepath{stroke,fill}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=1.138889in,y=4.057905in,left,base]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont custom}% +\end{pgfscope}% +\end{pgfpicture}% +\makeatother% +\endgroup% diff --git a/app/context_switch/measurements/plots/context_switch_arm32.png b/app/context_switch/measurements/plots/context_switch_arm32.png new file mode 100644 index 0000000..9d12e21 Binary files /dev/null and b/app/context_switch/measurements/plots/context_switch_arm32.png differ diff --git a/app/context_switch/measurements/plots/context_switch_x86_64.pgf b/app/context_switch/measurements/plots/context_switch_x86_64.pgf new file mode 100644 index 0000000..27971a2 --- /dev/null +++ b/app/context_switch/measurements/plots/context_switch_x86_64.pgf @@ -0,0 +1,742 @@ +%% Creator: Matplotlib, PGF backend +%% +%% To include the figure in your LaTeX document, write +%% \input{.pgf} +%% +%% Make sure the required packages are loaded in your preamble +%% \usepackage{pgf} +%% +%% Figures using additional raster images can only be included by \input if +%% they are in the same directory as the main LaTeX file. For loading figures +%% from other directories you can use the `import` package +%% \usepackage{import} +%% and then include the figures with +%% \import{}{.pgf} +%% +%% Matplotlib used the following preamble +%% \usepackage{fontspec} +%% \setmainfont{DejaVuSans.ttf}[Path=/home/florian/.local/lib/python3.6/site-packages/matplotlib/mpl-data/fonts/ttf/] +%% \setsansfont{DejaVuSans.ttf}[Path=/home/florian/.local/lib/python3.6/site-packages/matplotlib/mpl-data/fonts/ttf/] +%% \setmonofont{DejaVuSansMono.ttf}[Path=/home/florian/.local/lib/python3.6/site-packages/matplotlib/mpl-data/fonts/ttf/] +%% +\begingroup% +\makeatletter% +\begin{pgfpicture}% +\pgfpathrectangle{\pgfpointorigin}{\pgfqpoint{5.000000in}{5.000000in}}% +\pgfusepath{use as bounding box, clip}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{1.000000,1.000000,1.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.000000pt}% +\definecolor{currentstroke}{rgb}{1.000000,1.000000,1.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{5.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{5.000000in}{5.000000in}}% +\pgfpathlineto{\pgfqpoint{0.000000in}{5.000000in}}% +\pgfpathclose% +\pgfusepath{fill}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{1.000000,1.000000,1.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.000000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetstrokeopacity{0.000000}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{0.625000in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{4.500000in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{4.500000in}{4.900000in}}% +\pgfpathlineto{\pgfqpoint{0.625000in}{4.900000in}}% +\pgfpathclose% +\pgfusepath{fill}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfpathrectangle{\pgfqpoint{0.625000in}{2.500000in}}{\pgfqpoint{3.875000in}{2.400000in}}% +\pgfusepath{clip}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{0.300000,0.100000,0.400000}% +\pgfsetfillcolor{currentfill}% +\pgfsetfillopacity{0.600000}% +\pgfsetlinewidth{0.000000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetstrokeopacity{0.000000}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{0.801136in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{1.215575in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{1.215575in}{2.567371in}}% +\pgfpathlineto{\pgfqpoint{0.801136in}{2.567371in}}% +\pgfpathclose% +\pgfusepath{fill}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfpathrectangle{\pgfqpoint{0.625000in}{2.500000in}}{\pgfqpoint{3.875000in}{2.400000in}}% +\pgfusepath{clip}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{0.300000,0.100000,0.400000}% +\pgfsetfillcolor{currentfill}% +\pgfsetfillopacity{0.600000}% +\pgfsetlinewidth{0.000000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetstrokeopacity{0.000000}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{1.319184in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{1.733623in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{1.733623in}{2.841755in}}% +\pgfpathlineto{\pgfqpoint{1.319184in}{2.841755in}}% +\pgfpathclose% +\pgfusepath{fill}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfpathrectangle{\pgfqpoint{0.625000in}{2.500000in}}{\pgfqpoint{3.875000in}{2.400000in}}% +\pgfusepath{clip}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{0.300000,0.300000,0.400000}% +\pgfsetfillcolor{currentfill}% +\pgfsetfillopacity{0.600000}% +\pgfsetlinewidth{0.000000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetstrokeopacity{0.000000}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{1.837233in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{2.251671in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{2.251671in}{3.185959in}}% +\pgfpathlineto{\pgfqpoint{1.837233in}{3.185959in}}% +\pgfpathclose% +\pgfusepath{fill}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfpathrectangle{\pgfqpoint{0.625000in}{2.500000in}}{\pgfqpoint{3.875000in}{2.400000in}}% +\pgfusepath{clip}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{0.300000,0.300000,0.400000}% +\pgfsetfillcolor{currentfill}% +\pgfsetfillopacity{0.600000}% +\pgfsetlinewidth{0.000000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetstrokeopacity{0.000000}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{2.355281in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{2.769719in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{2.769719in}{4.328816in}}% +\pgfpathlineto{\pgfqpoint{2.355281in}{4.328816in}}% +\pgfpathclose% +\pgfusepath{fill}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfpathrectangle{\pgfqpoint{0.625000in}{2.500000in}}{\pgfqpoint{3.875000in}{2.400000in}}% +\pgfusepath{clip}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{0.300000,0.600000,0.400000}% +\pgfsetfillcolor{currentfill}% +\pgfsetfillopacity{0.600000}% +\pgfsetlinewidth{0.000000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetstrokeopacity{0.000000}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{2.873329in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{3.287767in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{3.287767in}{3.847420in}}% +\pgfpathlineto{\pgfqpoint{2.873329in}{3.847420in}}% +\pgfpathclose% +\pgfusepath{fill}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfpathrectangle{\pgfqpoint{0.625000in}{2.500000in}}{\pgfqpoint{3.875000in}{2.400000in}}% +\pgfusepath{clip}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{0.300000,0.600000,0.400000}% +\pgfsetfillcolor{currentfill}% +\pgfsetfillopacity{0.600000}% +\pgfsetlinewidth{0.000000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetstrokeopacity{0.000000}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{3.391377in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{3.805816in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{3.805816in}{4.785714in}}% +\pgfpathlineto{\pgfqpoint{3.391377in}{4.785714in}}% +\pgfpathclose% +\pgfusepath{fill}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfpathrectangle{\pgfqpoint{0.625000in}{2.500000in}}{\pgfqpoint{3.875000in}{2.400000in}}% +\pgfusepath{clip}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{0.300000,1.000000,0.400000}% +\pgfsetfillcolor{currentfill}% +\pgfsetfillopacity{0.600000}% +\pgfsetlinewidth{0.000000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetstrokeopacity{0.000000}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{3.909425in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{4.323864in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{4.323864in}{3.462793in}}% +\pgfpathlineto{\pgfqpoint{3.909425in}{3.462793in}}% +\pgfpathclose% +\pgfusepath{fill}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{0.000000in}{-0.048611in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{0.000000in}{-0.048611in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{1.008356in}{2.500000in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=1.046672in,y=1.407905in,left,base,rotate=90.000000]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont loop overhead}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{0.000000in}{-0.048611in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{0.000000in}{-0.048611in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{1.526404in}{2.500000in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=1.564720in,y=1.552626in,left,base,rotate=90.000000]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont function call}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{0.000000in}{-0.048611in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{0.000000in}{-0.048611in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{2.044452in}{2.500000in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=2.082769in,y=1.331746in,left,base,rotate=90.000000]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont stack switching}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{0.000000in}{-0.048611in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{0.000000in}{-0.048611in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{2.562500in}{2.500000in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=2.600817in,y=0.507840in,left,base,rotate=90.000000]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont setjmp and stack switching}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{0.000000in}{-0.048611in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{0.000000in}{-0.048611in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{3.080548in}{2.500000in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=3.118865in,y=0.528592in,left,base,rotate=90.000000]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont fcontext minimal overhead}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{0.000000in}{-0.048611in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{0.000000in}{-0.048611in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{3.598596in}{2.500000in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=3.636913in,y=0.609972in,left,base,rotate=90.000000]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont minimum callcc overhead}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{0.000000in}{-0.048611in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{0.000000in}{-0.048611in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{4.116644in}{2.500000in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=4.154961in,y=1.187636in,left,base,rotate=90.000000]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont custom assembly}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{-0.048611in}{0.000000in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{-0.048611in}{0.000000in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{0.625000in}{2.500000in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=0.306898in,y=2.447238in,left,base]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont 0.0}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{-0.048611in}{0.000000in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{-0.048611in}{0.000000in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{0.625000in}{2.806232in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=0.306898in,y=2.753470in,left,base]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont 2.5}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{-0.048611in}{0.000000in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{-0.048611in}{0.000000in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{0.625000in}{3.112464in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=0.306898in,y=3.059702in,left,base]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont 5.0}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{-0.048611in}{0.000000in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{-0.048611in}{0.000000in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{0.625000in}{3.418695in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=0.306898in,y=3.365934in,left,base]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont 7.5}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{-0.048611in}{0.000000in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{-0.048611in}{0.000000in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{0.625000in}{3.724927in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=0.218533in,y=3.672166in,left,base]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont 10.0}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{-0.048611in}{0.000000in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{-0.048611in}{0.000000in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{0.625000in}{4.031159in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=0.218533in,y=3.978398in,left,base]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont 12.5}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{-0.048611in}{0.000000in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{-0.048611in}{0.000000in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{0.625000in}{4.337391in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=0.218533in,y=4.284629in,left,base]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont 15.0}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetroundjoin% +\definecolor{currentfill}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfsys@defobject{currentmarker}{\pgfqpoint{-0.048611in}{0.000000in}}{\pgfqpoint{0.000000in}{0.000000in}}{% +\pgfpathmoveto{\pgfqpoint{0.000000in}{0.000000in}}% +\pgfpathlineto{\pgfqpoint{-0.048611in}{0.000000in}}% +\pgfusepath{stroke,fill}% +}% +\begin{pgfscope}% +\pgfsys@transformshift{0.625000in}{4.643623in}% +\pgfsys@useobject{currentmarker}{}% +\end{pgfscope}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=0.218533in,y=4.590861in,left,base]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont 17.5}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=0.162977in,y=3.700000in,,bottom,rotate=90.000000]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont runtime in ns}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetrectcap% +\pgfsetmiterjoin% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{0.625000in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{0.625000in}{4.900000in}}% +\pgfusepath{stroke}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetrectcap% +\pgfsetmiterjoin% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{4.500000in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{4.500000in}{4.900000in}}% +\pgfusepath{stroke}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetrectcap% +\pgfsetmiterjoin% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{0.625000in}{2.500000in}}% +\pgfpathlineto{\pgfqpoint{4.500000in}{2.500000in}}% +\pgfusepath{stroke}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetrectcap% +\pgfsetmiterjoin% +\pgfsetlinewidth{0.803000pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{0.625000in}{4.900000in}}% +\pgfpathlineto{\pgfqpoint{4.500000in}{4.900000in}}% +\pgfusepath{stroke}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{1.000000,1.000000,1.000000}% +\pgfsetfillcolor{currentfill}% +\pgfsetfillopacity{0.800000}% +\pgfsetlinewidth{1.003750pt}% +\definecolor{currentstroke}{rgb}{0.800000,0.800000,0.800000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetstrokeopacity{0.800000}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{0.722222in}{3.973460in}}% +\pgfpathlineto{\pgfqpoint{1.748400in}{3.973460in}}% +\pgfpathquadraticcurveto{\pgfqpoint{1.776177in}{3.973460in}}{\pgfqpoint{1.776177in}{4.001238in}}% +\pgfpathlineto{\pgfqpoint{1.776177in}{4.802778in}}% +\pgfpathquadraticcurveto{\pgfqpoint{1.776177in}{4.830556in}}{\pgfqpoint{1.748400in}{4.830556in}}% +\pgfpathlineto{\pgfqpoint{0.722222in}{4.830556in}}% +\pgfpathquadraticcurveto{\pgfqpoint{0.694444in}{4.830556in}}{\pgfqpoint{0.694444in}{4.802778in}}% +\pgfpathlineto{\pgfqpoint{0.694444in}{4.001238in}}% +\pgfpathquadraticcurveto{\pgfqpoint{0.694444in}{3.973460in}}{\pgfqpoint{0.722222in}{3.973460in}}% +\pgfpathclose% +\pgfusepath{stroke,fill}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{0.300000,0.100000,0.400000}% +\pgfsetfillcolor{currentfill}% +\pgfsetfillopacity{0.600000}% +\pgfsetlinewidth{1.003750pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{0.750000in}{4.669477in}}% +\pgfpathlineto{\pgfqpoint{1.027778in}{4.669477in}}% +\pgfpathlineto{\pgfqpoint{1.027778in}{4.766699in}}% +\pgfpathlineto{\pgfqpoint{0.750000in}{4.766699in}}% +\pgfpathclose% +\pgfusepath{stroke,fill}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=1.138889in,y=4.669477in,left,base]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont baseline}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{0.300000,0.300000,0.400000}% +\pgfsetfillcolor{currentfill}% +\pgfsetfillopacity{0.600000}% +\pgfsetlinewidth{1.003750pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{0.750000in}{4.465620in}}% +\pgfpathlineto{\pgfqpoint{1.027778in}{4.465620in}}% +\pgfpathlineto{\pgfqpoint{1.027778in}{4.562842in}}% +\pgfpathlineto{\pgfqpoint{0.750000in}{4.562842in}}% +\pgfpathclose% +\pgfusepath{stroke,fill}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=1.138889in,y=4.465620in,left,base]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont setjmp}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{0.300000,0.600000,0.400000}% +\pgfsetfillcolor{currentfill}% +\pgfsetfillopacity{0.600000}% +\pgfsetlinewidth{1.003750pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{0.750000in}{4.261762in}}% +\pgfpathlineto{\pgfqpoint{1.027778in}{4.261762in}}% +\pgfpathlineto{\pgfqpoint{1.027778in}{4.358985in}}% +\pgfpathlineto{\pgfqpoint{0.750000in}{4.358985in}}% +\pgfpathclose% +\pgfusepath{stroke,fill}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=1.138889in,y=4.261762in,left,base]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont fcontext}% +\end{pgfscope}% +\begin{pgfscope}% +\pgfsetbuttcap% +\pgfsetmiterjoin% +\definecolor{currentfill}{rgb}{0.300000,1.000000,0.400000}% +\pgfsetfillcolor{currentfill}% +\pgfsetfillopacity{0.600000}% +\pgfsetlinewidth{1.003750pt}% +\definecolor{currentstroke}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{currentstroke}% +\pgfsetdash{}{0pt}% +\pgfpathmoveto{\pgfqpoint{0.750000in}{4.057905in}}% +\pgfpathlineto{\pgfqpoint{1.027778in}{4.057905in}}% +\pgfpathlineto{\pgfqpoint{1.027778in}{4.155127in}}% +\pgfpathlineto{\pgfqpoint{0.750000in}{4.155127in}}% +\pgfpathclose% +\pgfusepath{stroke,fill}% +\end{pgfscope}% +\begin{pgfscope}% +\definecolor{textcolor}{rgb}{0.000000,0.000000,0.000000}% +\pgfsetstrokecolor{textcolor}% +\pgfsetfillcolor{textcolor}% +\pgftext[x=1.138889in,y=4.057905in,left,base]{\color{textcolor}\rmfamily\fontsize{10.000000}{12.000000}\selectfont custom}% +\end{pgfscope}% +\end{pgfpicture}% +\makeatother% +\endgroup% diff --git a/app/context_switch/measurements/plots/context_switch_x86_64.png b/app/context_switch/measurements/plots/context_switch_x86_64.png new file mode 100644 index 0000000..4205ee1 Binary files /dev/null and b/app/context_switch/measurements/plots/context_switch_x86_64.png differ diff --git a/app/context_switch/measurements/readme.md b/app/context_switch/measurements/readme.md new file mode 100644 index 0000000..17bd876 --- /dev/null +++ b/app/context_switch/measurements/readme.md @@ -0,0 +1,12 @@ +# Context Switch Measurements + +The goal of these 'quick and dirty' measurements is to get a feeling for the +expected minimum runtime of different operations involved in cotext switching/ +stackfull coroutines. + +We compare a jump_buf + stack pointer manipulation method, boost.context (represented +the c wrapper around their assembly to avoid importing whole boost), a custom +fiber call written by us in assembly and finally some baseline for method call costs. + +We plot the results for both an x86_64 and arm32 system to decide what implementation +we choose for pls, as this operation will be performed for every single task creation. diff --git a/app/context_switch/measurements/x86_64.txt b/app/context_switch/measurements/x86_64.txt new file mode 100644 index 0000000..2374d21 --- /dev/null +++ b/app/context_switch/measurements/x86_64.txt @@ -0,0 +1,21 @@ +Output on my x86_64 laptop: +- Ubuntu 18.04.3 LTS +- Core i7-8550U CPU @1.80 GHz (turbo boost/frequency scaling disabled) +- Results of best run using nice -20 + +Base +Function Call : 2796349, 2.79635 +Simple Loop : 557062, 0.55706 +Longjmp +Stack Switching : 5602146, 5.60215 +Full Continuation: 14935945, 14.93594 +Jump Continuation: 33523215, 33.52322 +Boost +FContext Fast : 9517573, 9.51757 +FContext Clean : 11006061, 11.00606 +FContext CallCC : 18661906, 18.66191 +Custom +Custom Fast Call : 7860666, 7.86067 + + +Used stack size about 32 bytes. diff --git a/app/invoke_parallel/CMakeLists.txt b/app/invoke_parallel/CMakeLists.txt deleted file mode 100644 index 944f5ef..0000000 --- a/app/invoke_parallel/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -add_executable(invoke_parallel main.cpp) -target_link_libraries(invoke_parallel pls) -if(EASY_PROFILER) - target_link_libraries(invoke_parallel easy_profiler) -endif() diff --git a/app/invoke_parallel/main.cpp b/app/invoke_parallel/main.cpp deleted file mode 100644 index be158cb..0000000 --- a/app/invoke_parallel/main.cpp +++ /dev/null @@ -1,102 +0,0 @@ -#include -#include - -#include -#include -#include - -static constexpr int CUTOFF = 16; -static constexpr int INPUT_SIZE = 8192; -typedef std::vector> complex_vector; - -void divide(complex_vector::iterator data, int n) { - complex_vector tmp_odd_elements(n / 2); - for (int i = 0; i < n / 2; i++) { - tmp_odd_elements[i] = data[i * 2 + 1]; - } - for (int i = 0; i < n / 2; i++) { - data[i] = data[i * 2]; - } - for (int i = 0; i < n / 2; i++) { - data[i + n / 2] = tmp_odd_elements[i]; - } -} - -void combine(complex_vector::iterator data, int n) { - for (int i = 0; i < n / 2; i++) { - std::complex even = data[i]; - std::complex odd = data[i + n / 2]; - - // w is the "twiddle-factor". - // this could be cached, but we run the same 'data_structures' algorithm parallel/serial, - // so it won't impact the performance comparison. - std::complex w = exp(std::complex(0, -2. * M_PI * i / n)); - - data[i] = even + w * odd; - data[i + n / 2] = even - w * odd; - } -} - -void fft(complex_vector::iterator data, int n) { - if (n < 2) { - return; - } - - PROFILE_WORK_BLOCK("Divide") - divide(data, n); - PROFILE_END_BLOCK - PROFILE_WORK_BLOCK("Invoke Parallel") - if (n == CUTOFF) { - PROFILE_WORK_BLOCK("FFT Serial") - fft(data, n / 2); - fft(data + n / 2, n / 2); - } else if (n <= CUTOFF) { - fft(data, n / 2); - fft(data + n / 2, n / 2); - } else { - pls::invoke( - [n, &data] { fft(data, n / 2); }, - [n, &data] { fft(data + n / 2, n / 2); } - ); - } - PROFILE_END_BLOCK - PROFILE_WORK_BLOCK("Combine") - combine(data, n); - PROFILE_END_BLOCK -} - -complex_vector prepare_input(int input_size) { - std::vector known_frequencies{2, 11, 52, 88, 256}; - complex_vector data(input_size); - - // Set our input data to match a time series of the known_frequencies. - // When applying fft to this time-series we should find these frequencies. - for (int i = 0; i < input_size; i++) { - data[i] = std::complex(0.0, 0.0); - for (auto frequencie : known_frequencies) { - data[i] += sin(2 * M_PI * frequencie * i / input_size); - } - } - - return data; -} - -int main() { - PROFILE_ENABLE - pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 14}; - pls::scheduler scheduler{&my_scheduler_memory, 8}; - - complex_vector initial_input = prepare_input(INPUT_SIZE); - scheduler.perform_work([&] { - PROFILE_MAIN_THREAD - // Call looks just the same, only requirement is - // the enclosure in the perform_work lambda. - for (int i = 0; i < 10; i++) { - PROFILE_WORK_BLOCK("Top Level FFT") - complex_vector input = initial_input; - fft(input.begin(), input.size()); - } - }); - - PROFILE_SAVE("test_profile.prof") -} diff --git a/app/playground/CMakeLists.txt b/app/playground/CMakeLists.txt index c1b57ee..b825f68 100644 --- a/app/playground/CMakeLists.txt +++ b/app/playground/CMakeLists.txt @@ -1,4 +1,5 @@ -add_executable(playground main.cpp) +add_executable(playground + main.cpp) # Example for adding the library to your app (as a cmake project dependency) -target_link_libraries(playground pls) +target_link_libraries(playground pls Threads::Threads) diff --git a/app/playground/main.cpp b/app/playground/main.cpp index 03b3bbf..d6e3b6c 100644 --- a/app/playground/main.cpp +++ b/app/playground/main.cpp @@ -1,26 +1,4 @@ -// Headers are available because we added the pls target -const long NUM_THREADS = 8; -const long MEMORY_PER_THREAD = 2u << 12u; - -#include "pls/pls.h" - -pls::static_scheduler_memory memory; - int main() { - pls::scheduler scheduler{&memory, NUM_THREADS}; - - scheduler.perform_work([]() { - auto lambda = []() { - // Do work - }; - using lambda_task = pls::lambda_task_by_value; - - pls::scheduler::spawn_child(lambda); - pls::scheduler::spawn_child(lambda); - - pls::scheduler::wait_for_all(); - }); - scheduler.terminate(); return 0; } diff --git a/app/test_for_new/CMakeLists.txt b/app/test_for_new/CMakeLists.txt deleted file mode 100644 index 06a77bd..0000000 --- a/app/test_for_new/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_executable(test_for_new main.cpp) - -# Example for adding the library to your app (as a cmake project dependency) -target_link_libraries(test_for_new pls) diff --git a/app/test_for_new/main.cpp b/app/test_for_new/main.cpp deleted file mode 100644 index 18596e1..0000000 --- a/app/test_for_new/main.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include -#include - -using namespace pls::internal::base; - -int global = 0; - -int main() { - // Try to use every feature, to trigger the prohibited use of new if found somewhere - thread t1{[]() {}}; - t1.join(); -} diff --git a/cmake/SetupAssemblyOutput.cmake b/cmake/SetupAssemblyOutput.cmake new file mode 100644 index 0000000..916806d --- /dev/null +++ b/cmake/SetupAssemblyOutput.cmake @@ -0,0 +1,6 @@ +option(ASSEMBLY_OUTPUT "Enable output of assembly files when building" OFF) +if (ASSEMBLY_OUTPUT) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -save-temps -Wa,-ahldn=assembly.asm -fverbose-asm -g") +endif () + +message("-- Assembly Output: ${ASSEMBLY_OUTPUT}") diff --git a/cmake/SetupOptimizationLevel.cmake b/cmake/SetupOptimizationLevel.cmake index 1f4031a..5d22958 100644 --- a/cmake/SetupOptimizationLevel.cmake +++ b/cmake/SetupOptimizationLevel.cmake @@ -13,12 +13,12 @@ message("-- Using Build Type: " ${CMAKE_BUILD_TYPE}) # Enable optimizations in release builds if (CMAKE_BUILD_TYPE STREQUAL "Release") # Link time optimization - set(CMAKE_CXX_FLAGS "-Wall -Wextra") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra") # -O2 is often seen as 'the most speed', # but inlining functions and SIMD/Vectorization is # only enabled by -O3, thus it's way faster in some # array calculations. - set(CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -march=native") set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) else () set(CMAKE_CXX_FLAGS_DEBUG "-g -O0") diff --git a/cmake/SetupThreadSanitizer.cmake b/cmake/SetupThreadSanitizer.cmake index 4a5debd..d1135b7 100644 --- a/cmake/SetupThreadSanitizer.cmake +++ b/cmake/SetupThreadSanitizer.cmake @@ -3,8 +3,10 @@ # Add optional sanitizer, off by default option(THREAD_SANITIZER "Add thread sanitizer" OFF) -if(THREAD_SANITIZER) - add_compile_options(-fsanitize=thread -g) - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=thread") -endif() -message("-- Thread Sanitizer: ${THREAD_SANITIZER}") \ No newline at end of file +if (THREAD_SANITIZER) + add_compile_options(-fsanitize=thread -g -fno-omit-frame-pointer) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=thread -fno-omit-frame-pointer") + + add_compile_definitions(THREAD_SANITIZER) +endif () +message("-- Thread Sanitizer: ${THREAD_SANITIZER}") diff --git a/extern/benchmark_base/CMakeLists.txt b/extern/benchmark_base/CMakeLists.txt new file mode 100644 index 0000000..890a006 --- /dev/null +++ b/extern/benchmark_base/CMakeLists.txt @@ -0,0 +1,21 @@ +# Configuration and common algorithm pieces for benchmarks +configure_file(src/sample_images.cpp.in sample_images.cpp) + +add_library(benchmark_base STATIC + ${CMAKE_CURRENT_BINARY_DIR}/sample_images.cpp + src/fft.cpp include/benchmark_base/fft.h + include/benchmark_base/heat.h + include/benchmark_base/matrix.h + include/benchmark_base/unbalanced.h src/unbalanced.cpp + include/benchmark_base/range.h + include/benchmark_base/fib.h) + +target_include_directories(benchmark_base + PUBLIC + $ + $ + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/src + ) + +target_link_libraries(benchmark_base picosha2) diff --git a/extern/benchmark_base/include/benchmark_base/.gitkeep b/extern/benchmark_base/include/benchmark_base/.gitkeep new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/extern/benchmark_base/include/benchmark_base/.gitkeep diff --git a/extern/benchmark_base/include/benchmark_base/RANGE_LICENSE.txt b/extern/benchmark_base/include/benchmark_base/RANGE_LICENSE.txt new file mode 100644 index 0000000..36b7cd9 --- /dev/null +++ b/extern/benchmark_base/include/benchmark_base/RANGE_LICENSE.txt @@ -0,0 +1,23 @@ +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/extern/benchmark_base/include/benchmark_base/fft.h b/extern/benchmark_base/include/benchmark_base/fft.h new file mode 100644 index 0000000..d1c4c9d --- /dev/null +++ b/extern/benchmark_base/include/benchmark_base/fft.h @@ -0,0 +1,29 @@ +#ifndef COMPARISON_BENCHMARKS_BASE_FFT_H +#define COMPARISON_BENCHMARKS_BASE_FFT_H + +#include +#include +#include + +namespace comparison_benchmarks { +namespace base { +namespace fft { + +const int SIZE = 8192; +const int NUM_ITERATIONS = 1000; +const int NUM_WARMUP_ITERATIONS = 100; + +const int RECURSIVE_CUTOFF = 32; +typedef std::vector> complex_vector; + +void fill_input(fft::complex_vector &data); + +void divide(complex_vector::iterator data, complex_vector::iterator swap_array, int n); +void conquer(complex_vector::iterator data, complex_vector::iterator swap_array, int n); +void combine(complex_vector::iterator data, int n); + +} +} +} + +#endif //COMPARISON_BENCHMARKS_BASE_FFT_H diff --git a/extern/benchmark_base/include/benchmark_base/fib.h b/extern/benchmark_base/include/benchmark_base/fib.h new file mode 100644 index 0000000..b9a6a6e --- /dev/null +++ b/extern/benchmark_base/include/benchmark_base/fib.h @@ -0,0 +1,18 @@ + +#ifndef COMPARISON_BENCHMARKS_BASE_FIB_H_ +#define COMPARISON_BENCHMARKS_BASE_FIB_H_ + +namespace comparison_benchmarks { +namespace base { +namespace fib { + +const int INPUT_N = 18; + +const int NUM_ITERATIONS = 1000; +const int NUM_WARMUP_ITERATIONS = 100; + +} +} +} + +#endif //COMPARISON_BENCHMARKS_BASE_FIB_H_ diff --git a/extern/benchmark_base/include/benchmark_base/heat.h b/extern/benchmark_base/include/benchmark_base/heat.h new file mode 100644 index 0000000..007d643 --- /dev/null +++ b/extern/benchmark_base/include/benchmark_base/heat.h @@ -0,0 +1,117 @@ + +#ifndef COMPARISON_BENCHMARKS_BASE_HEAT_H +#define COMPARISON_BENCHMARKS_BASE_HEAT_H + +#include +#include +#include + +namespace comparison_benchmarks { +namespace base { +namespace heat { + +const int DIFFUSION_SIZE = 256; +const int DIFFUSION_STEPS = 256; + +const int NUM_ITERATIONS = 100; +const int WARMUP_ITERATIONS = 20; + +template +class heat_diffusion { + // Center portion is SIZExSIZE, borders are fixed temperature values + using matrix = std::array, SIZE + 2>; + + protected: + // Sane default values for the simulation (form paper). + // This is not about perfect simulation results but the speedup of the workload. + double c = 0.1; + double d_s = 1.0 / (SIZE + 1); + double d_t = (d_s * d_s) / (4 * c); + + public: + matrix *current_data; + matrix *next_data; + + explicit heat_diffusion() { + current_data = new matrix; + next_data = new matrix; + reset_data(); + } + + ~heat_diffusion() { + delete current_data; + delete next_data; + } + + virtual void run_simulation(int n) { + for (int i = 0; i < n; i++) { + for (int row = 1; row <= SIZE; row++) { + for (int column = 1; column <= SIZE; column++) { + update_element(row, column); + } + } + + // Synchronization point needed to coordinate the calculation! + swap_data_arrays(); + } + } + + protected: + void update_element(int row, int column) { + (*next_data)[row][column] = (*current_data)[row][column] + ((c * d_t) / (d_s * d_s)) * + ((*current_data)[row + 1][column] + (*current_data)[row - 1][column] + - 4 * (*current_data)[row][column] + + (*current_data)[row][column + 1] + (*current_data)[row][column - 1]); + } + + void swap_data_arrays() { + matrix *tmp = current_data; + current_data = next_data; + next_data = tmp; + } + + void reset_data() { + for (int row = 0; row < SIZE + 2; row++) { + for (int column = 0; column < SIZE + 2; column++) { + (*current_data)[row][column] = 0.0; + (*next_data)[row][column] = 0.0; + + // Edges are a fixed, hot temperature + if (row == 0 || row == SIZE + 1) { + (*current_data)[row][column] = 1.0; + (*next_data)[row][column] = 1.0; + } + } + } + } +}; + +template +std::ostream &operator<<(std::ostream &strm, const heat_diffusion &simulation) { + for (int i = 0; i < SIZE + 2; i++) { + for (int j = 0; j < SIZE + 2; j++) { + // 'color' our output according to temperature + char out; + if (simulation.current_data[i][j] < 0.1) { + out = ' '; + } else if (simulation.current_data[i][j] < 0.2) { + out = '-'; + } else if (simulation.current_data[i][j] < 0.5) { + out = '='; + } else { + out = '#'; + } + + strm << out << "\t"; + } + strm << std::endl; + } + + return strm; +} + +} +} +} + +#endif //COMPARISON_BENCHMARKS_BASE_HEAT_H diff --git a/extern/benchmark_base/include/benchmark_base/matrix.h b/extern/benchmark_base/include/benchmark_base/matrix.h new file mode 100644 index 0000000..1d8af13 --- /dev/null +++ b/extern/benchmark_base/include/benchmark_base/matrix.h @@ -0,0 +1,65 @@ + +#ifndef COMPARISON_BENCHMARKS_BASE_MATRIX_H +#define COMPARISON_BENCHMARKS_BASE_MATRIX_H + +#include +#include + +namespace comparison_benchmarks { +namespace base { +namespace matrix { + +const int MATRIX_SIZE = 128; + +const int NUM_ITERATIONS = 5000; +const int WARMUP_ITERATIONS = 1000; + +template +class matrix { + public: + T data[SIZE][SIZE]; + + explicit matrix() { + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + data[i][j] = i; + } + } + } + + virtual void multiply(const matrix &a, const matrix &b) { + for (int i = 0; i < SIZE; i++) { + multiply_column(i, a, b); + } + } + + protected: + void multiply_column(int i, const matrix &a, const matrix &b) { + for (int j = 0; j < SIZE; ++j) { + data[i][j] = 0; + } + for (int k = 0; k < SIZE; ++k) { + for (int j = 0; j < SIZE; ++j) { + data[i][j] += a.data[i][k] * b.data[k][j]; + } + } + } +}; + +template +std::ostream &operator<<(std::ostream &strm, const matrix &matrix) { + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + strm << matrix.data[i][j] << "\t"; + } + strm << std::endl; + } + + return strm; +} + +} +} +} + +#endif //COMPARISON_BENCHMARKS_BASE_MATRIX_H diff --git a/extern/benchmark_base/include/benchmark_base/range.h b/extern/benchmark_base/include/benchmark_base/range.h new file mode 100644 index 0000000..06bf65e --- /dev/null +++ b/extern/benchmark_base/include/benchmark_base/range.h @@ -0,0 +1,608 @@ +/* + Range + ===== + + Copyright (c) 2009-2011 Khaled Alshaya + + Distributed under the Boost Software License, version 1.0 + (See the license at: http://www.boost.org/license_1_0.txt). +*/ + +/* + Rationale + ========= + + In Python, there is a beautiful function called "range". + "range" allows the programmer to iterate over a range elegantly. + This concept is not as general as "for-loops" in C++, + but non the less, it expresses the intent of the programmer + clearer than the general "for-loops" in many cases. + + + Design + ====== + + Range is made to be STL-like library. In fact, it is + built on top of the concepts of STL. The library is designed to + work with STL algorithms as well. Range is more flexible + than the Python "range", because: + + Range is an "immutable ordered random access container" + + + Specifications + ============== + + Range satisfies the following requirements: + + * Immutable. + * Random Access Container. + * Random Access Iterator Interface. + * Constant Time Complexity Operations. + + + Range models an ordered sequence of elements, + where a range is defined by: + + [begin, end) + + * begin: the first element in the range. (Inclusive) + * end : the last element in the range. (Exclusive) + * step : the distance between two consecutive elements in a range. + + where each element in the range is defined by: + + element = begin + step * i + + * i: is the index of the element in range. + + The following precondition must be met for the sequence + to be a valid range: + + step != 0 + && + ( + begin <= end && step > 0 + || + begin >= end && step < 0 + ) + + + Portability + =========== + + Range Generator is written in standard C++ (C++98). It depends + -only- on the standard C++ library. +*/ + +// TODO: See if we should swap this out for our own implementation, for now this is fine, as it is self contained. +/** + * Notes on Modification: + * The code was adpated to fit into our namespacing/naming scheme for simpler use. + * This includes ifdef's, namespace and code formatting style. + */ + +#ifndef Range_h__ +#define Range_h__ + +#include +#include +#include +#include + +namespace comparison_benchmarks { +namespace base { +namespace range { + +template +struct basic_range { + struct const_iterator_impl { + typedef IntegerType value_type; + typedef std::size_t size_type; + typedef IntegerType difference_type; + typedef value_type *pointer; + typedef value_type &reference; + typedef + std::random_access_iterator_tag + iterator_category; + + const_iterator_impl() : r(0), index(0) {} + + const_iterator_impl(const const_iterator_impl &rhs) + : r(rhs.r), index(rhs.index) {} + + const_iterator_impl(basic_range const *p_range, size_type p_index) + : r(p_range), index(p_index) {} + + const_iterator_impl &operator=(const const_iterator_impl &rhs) { + r = rhs.r; + index = rhs.index; + return *this; + } + + bool operator==(const const_iterator_impl &rhs) const { + return *r == *(rhs.r) && index == rhs.index; + } + + bool operator!=(const const_iterator_impl &rhs) const { + return !(*this == rhs); + } + + bool operator<(const const_iterator_impl &rhs) const { + return index < rhs.index; + } + + bool operator>(const const_iterator_impl &rhs) const { + return index > rhs.index; + } + + bool operator<=(const const_iterator_impl &rhs) const { + return index <= rhs.index; + } + + bool operator>=(const const_iterator_impl &rhs) const { + return index >= rhs.index; + } + + value_type operator*() const { + return r->m_first_element + r->m_step * index; + } + + // operator-> + // is not implemented because the value_type is an integer type + // and primitive types in C++ don't define member functions. + + const_iterator_impl &operator++() { + ++index; + return *this; + } + + const_iterator_impl operator++(int) { + const_iterator_impl temp = *this; + ++index; + return temp; + } + + const_iterator_impl &operator--() { + --index; + return *this; + } + + const_iterator_impl operator--(int) { + const_iterator_impl temp = *this; + --index; + return temp; + } + + const_iterator_impl &operator+=(difference_type increment) { + index += increment; + return *this; + } + + // operator+ + // is friend operator but operator- + // is not, because we want to allow the following for "+": + // iterator+5 + // 5+iterator + // For the "-" it is not correct to do so, because + // iterator-5 != 5-iterator + friend const_iterator_impl operator+ + (const const_iterator_impl &lhs, difference_type increment) { + const_iterator_impl sum; + sum.r = lhs.r; + sum.index = lhs.index + increment; + return sum; + } + + const_iterator_impl &operator-=(difference_type decrement) { + index -= decrement; + return *this; + } + + const_iterator_impl operator-(difference_type decrement) const { + const_iterator_impl shifted_iterator; + shifted_iterator.r = r; + shifted_iterator.index = index - decrement; + return shifted_iterator; + } + + difference_type operator-(const const_iterator_impl &rhs) const { + return index - rhs.index; + } + + value_type operator[](difference_type offset) const { + size_type new_index = index + offset; + return r->m_first_element + r->m_step * new_index; + } + + private: + basic_range const *r; + size_type index; + }; + + struct const_reverse_iterator_impl { + typedef IntegerType value_type; + typedef std::size_t size_type; + typedef IntegerType difference_type; + typedef value_type *pointer; + typedef value_type &reference; + typedef + std::random_access_iterator_tag + iterator_category; + + const_reverse_iterator_impl() : r(0), index(0) {} + + const_reverse_iterator_impl(const const_reverse_iterator_impl &rhs) + : r(rhs.r), index(rhs.index) {} + + const_reverse_iterator_impl(basic_range const *p_range, size_type p_index) + : r(p_range), index(p_index) {} + + const_reverse_iterator_impl &operator=(const const_reverse_iterator_impl &rhs) { + r = rhs.r; + index = rhs.index; + return *this; + } + + bool operator==(const const_reverse_iterator_impl &rhs) const { + return *r == *(rhs.r) && index == rhs.index; + } + + bool operator!=(const const_reverse_iterator_impl &rhs) const { + return !(*this == rhs); + } + + bool operator<(const const_reverse_iterator_impl &rhs) const { + return index < rhs.index; + } + + bool operator>(const const_reverse_iterator_impl &rhs) const { + return index > rhs.index; + } + + bool operator<=(const const_reverse_iterator_impl &rhs) const { + return index <= rhs.index; + } + + bool operator>=(const const_reverse_iterator_impl &rhs) const { + return index >= rhs.index; + } + + value_type operator*() const { + size_type reverse_index + = (r->m_element_count - 1) - index; + return r->m_first_element + r->m_step * reverse_index; + } + + // operator-> + // is not implemented because the value_type is integer type + // and primitive types in C++ don't define member functions. + + const_reverse_iterator_impl &operator++() { + ++index; + return *this; + } + + const_reverse_iterator_impl operator++(int) { + const_reverse_iterator_impl temp = *this; + ++index; + return temp; + } + + const_reverse_iterator_impl &operator--() { + --index; + return *this; + } + + const_reverse_iterator_impl operator--(int) { + const_reverse_iterator_impl temp = *this; + --index; + return temp; + } + + const_reverse_iterator_impl &operator+=(difference_type increment) { + index += increment; + return *this; + } + + // operator+ + // is friend operator but operator- + // is not, because we want to allow the following for "+": + // iterator+5 + // 5+iterator + // For the "-" it is not correct to do so, because + // iterator-5 != 5-iterator + friend const_reverse_iterator_impl operator+ + (const const_reverse_iterator_impl &lhs, difference_type increment) { + const_reverse_iterator_impl sum; + sum.r = lhs.r; + sum.index = lhs.index + increment; + return sum; + } + + const_reverse_iterator_impl &operator-=(difference_type decrement) { + index -= decrement; + return *this; + } + + const_reverse_iterator_impl operator-(difference_type decrement) const { + const_reverse_iterator_impl shifted_iterator; + shifted_iterator.r = r; + shifted_iterator.index = index - decrement; + return shifted_iterator; + } + + difference_type operator-(const const_reverse_iterator_impl &rhs) const { + return index - rhs.index; + } + + value_type operator[](difference_type offset) const { + size_type new_reverse_index + = (r->m_element_count - 1) - (index + offset); + return r->m_first_element + r->m_step * new_reverse_index; + } + + private: + basic_range const *r; + size_type index; + }; + + typedef IntegerType value_type; + typedef const_iterator_impl iterator; + typedef const_iterator_impl const_iterator; + typedef const_reverse_iterator_impl reverse_iterator; + typedef const_reverse_iterator_impl const_reverse_iterator; + typedef value_type &reference; + typedef const value_type &const_reference; + typedef value_type *pointer; + typedef IntegerType difference_type; + typedef std::size_t size_type; + + // In the case of default construction, + // the range is considered as an empty range with no elements. + // step can be anything other than 0. 1 is + // an implementation convention, and it doesn't have + // a significance in this case because the range is empty. + basic_range() : m_first_element(0), m_element_count(0), m_step(1) {} + + // first_element: is begin in specifications. + // last_element: is end in specifications. + basic_range(value_type first_element, value_type last_element, value_type step) + : m_first_element(first_element), + m_step(step) { + // We need to count the number of elements. + // The only case where a range is invalid, + // when the step=0. It means that the range + // is infinite, because the number of elements + // in a range, is the length of that range + // divided by the difference between + // every two successive elements. + + if (step == 0) + throw std::out_of_range("Invalid Range: step can't be equal to zero!"); + if (first_element < last_element && step < 0) + throw std::out_of_range("Invalid Range: step can't be backward, while the range is forward!"); + if (first_element > last_element && step > 0) + throw std::out_of_range("Invalid Range: step can't be forward, while the range is backward!"); + + m_element_count = (last_element - first_element) / step; + if ((last_element - first_element) % step != 0) + ++m_element_count; + } + + // The following constructor, determines the step + // automatically. If the range is forward, then + // step will be one. If the range is backward, + // step will be minus one. If the begin is equal + // to end, then the step must not equal to zero + // and it is set to one as a convention. + basic_range(value_type first_element, value_type last_element) + : m_first_element(first_element) { + if (last_element >= first_element) *this = basic_range(first_element, last_element, 1); + else *this = basic_range(first_element, last_element, -1); + + } + + // The following constructor is a shortcut + // if you want the first element as zero. + // the step is determined automatically, based + // on the last element. If the last element is + // positive, then step is one, but if it is negative + // then step is minus one. + basic_range(value_type last_element) + : m_first_element(0) { + if (last_element >= m_first_element) *this = basic_range(m_first_element, last_element, 1); + else *this = basic_range(m_first_element, last_element, -1); + } + + basic_range(const basic_range &r) + : m_first_element(r.m_first_element), + m_element_count(r.m_element_count), + m_step(r.m_step) {} + + basic_range &operator=(const basic_range &r) { + m_first_element = r.m_first_element; + m_element_count = r.m_element_count; + m_step = r.m_step; + + return *this; + } + + bool operator==(const basic_range &r) const { + return m_first_element == r.m_first_element + && + m_element_count == r.m_element_count + && + m_step == r.m_step; + } + + bool operator!=(const basic_range &r) const { + return !(*this == r); + } + + // The following four functions enable the user to compare + // ranges using ( <, >, <=, >=). + // The comparison between two ranges is a simple lexicographical + // comparison(element by element). By convention, if two ranges + // R1, R2 where R1 has a smaller number of elements. Then if + // R1 contains more elements but all R1 elements are found in R2 + // R1 is considered less than R2. + bool operator<(const basic_range &r) const { + // ********** This function needs refactoring. + + if (m_element_count == 0 && r.m_element_count == 0) + return false; + if (m_element_count == 0 && r.m_element_count > 0) + return true; + if (m_element_count > 0 && r.m_element_count == 0) + return false; + + // At this point, both has at least one element. + if (m_first_element < r.m_first_element) + return true; + if (m_first_element > r.m_first_element) + return false; + + // At this point, the first element of both are equal. + if (m_element_count == 1 && r.m_element_count == 1) + return false; + if (m_element_count == 1 && r.m_element_count > 1) + return true; + if (m_element_count > 1 && r.m_element_count == 1) + return false; + + // At this point, both have at least two elements with + // a similar first element. Note than the final answer + // in this case depends on the second element only, because + // we don't need to compare the elements further. + // Note that the second element is at (index == 1), because + // the first element is at (index == 0). + if (m_first_element + m_step * 1 < r.m_first_element + r.m_step * 1) + return true; + if (m_first_element + m_step * 1 > r.m_first_element + r.m_step * 1) + return false; + + // if the first two elements of both ranges are equal, then + // they are co-linear ranges(because the step is constant). + // In that case, they comparison depends only on + // the size of the ranges by convention. + return m_element_count < r.m_element_count; + } + + bool operator>(const basic_range &r) const { + // ********** This function needs refactoring. + + if (m_element_count == 0 && r.m_element_count == 0) + return false; + if (m_element_count == 0 && r.m_element_count > 0) + return false; + if (m_element_count > 0 && r.m_element_count == 0) + return true; + + // At this point, both has at least one element. + if (m_first_element < r.m_first_element) + return false; + if (m_first_element > r.m_first_element) + return true; + + // At this point, the first element of both are equal. + if (m_element_count == 1 && r.m_element_count == 1) + return false; + if (m_element_count == 1 && r.m_element_count > 1) + return false; + if (m_element_count > 1 && r.m_element_count == 1) + return true; + + // At this point, both have at least two elements with + // a similar first element. Note than the final answer + // in this case depends on the second element only, because + // we don't need to compare the elements further. + // Note that the second element is at (index == 1), because + // the first element is at (index == 0). + if (m_first_element + m_step * 1 < r.m_first_element + r.m_step * 1) + return false; + if (m_first_element + m_step * 1 > r.m_first_element + r.m_step * 1) + return true; + + // if the first two elements of both ranges are equal, then + // they are co-linear ranges(because the step is constant). + // In that case, they comparison depends only on + // the size of the ranges by convention. + return m_element_count > r.m_element_count; + } + + bool operator<=(const basic_range &r) const { + return !(*this > r); + } + + bool operator>=(const basic_range &r) const { + return !(*this < r); + } + + const_iterator begin() const { + return const_iterator(this, 0); + } + + const_iterator end() const { + return const_iterator(this, m_element_count); + } + + const_reverse_iterator rbegin() const { + return const_reverse_iterator(this, 0); + } + + const_reverse_iterator rend() const { + return const_reverse_iterator(this, m_element_count); + } + + size_type size() const { + return m_element_count; + } + + size_type max_size() const { + // Because this is an immutable container, + // max_size() == size() + return m_element_count; + } + + bool empty() const { + return m_element_count == 0; + } + + // exist() and find() are similar except that + // find() returns the index of the element. + iterator find(value_type element) const { + value_type element_index = (element - m_first_element) / m_step; + bool in_range = element_index >= 0 && element_index < m_element_count && + (element - m_first_element) % m_step == 0; + if (in_range) + return begin() + element_index; + return end(); + } + + bool exist(value_type element) const { + return find(element) != end(); + } + + // In the standard, the operator[] + // should return a const reference. + // Because Range Generator doesn't store its elements + // internally, we return a copy of the value. + // In any case, this doesn't affect the semantics of the operator. + value_type operator[](size_type index) const { + return m_first_element + m_step * index; + } + + private: + // m_first_element: begin (see specifications). + // m_element_count: (end - begin) / step + value_type m_first_element, m_element_count, m_step; +}; + +// This is the default type of range! +typedef basic_range range; +} +} +} + +#endif // range_h__ diff --git a/app/benchmark_unbalanced/node.h b/extern/benchmark_base/include/benchmark_base/unbalanced.h similarity index 60% rename from app/benchmark_unbalanced/node.h rename to extern/benchmark_base/include/benchmark_base/unbalanced.h index 5111059..5396ce2 100644 --- a/app/benchmark_unbalanced/node.h +++ b/extern/benchmark_base/include/benchmark_base/unbalanced.h @@ -1,6 +1,6 @@ -#ifndef UTS_NODE_H -#define UTS_NODE_H +#ifndef COMPARISON_BENCHMARKS_BASE_UNBALANCED_H_ +#define COMPARISON_BENCHMARKS_BASE_UNBALANCED_H_ #include #include @@ -8,7 +8,20 @@ #include "picosha2.h" -namespace uts { +namespace comparison_benchmarks { +namespace base { +namespace unbalanced { + +const int SEED = 42; +const int ROOT_CHILDREN = 140; +const double Q = 0.124875; +const int NORMAL_CHILDREN = 8; + +const int NUM_NODES = 71069; + +const int NUM_ITERATIONS = 50; +const int WARMUP_ITERATIONS = 5; + using node_state = std::array; /** @@ -20,6 +33,9 @@ class node { // The state is used to allow a deterministic tree construction using sha256 hashes. node_state state_; + // Number of children for the current node + int num_children_; + // Set this to a positive number for the root node to start the tree with a specific size int root_children_; @@ -28,46 +44,54 @@ class node { int b_; // Private constructor for children - node(node_state state, double q, int b) : state_{state}, root_children_{-1}, q_{q}, b_{b} {} + node(node_state state, double q, int b) : state_{state}, + num_children_{0}, + root_children_{-1}, + q_{q}, + b_{b} { init_num_children(); } std::array generate_child_state(uint32_t index); double get_state_random(); + void init_num_children() { + double state_random = get_state_random(); + if (root_children_ > 0) { + num_children_ = root_children_; // Root always spawns children + } else if (state_random < q_) { + num_children_ = b_; + } else { + num_children_ = 0; + } + } public: - node(int seed, int root_children, double q, int b) : state_({{}}), root_children_{root_children}, q_{q}, b_{b} { + node(uint32_t seed, int root_children, double q, int b) + : state_({{}}), num_children_{0}, root_children_{root_children}, q_{q}, b_{b} { + for (int i = 0; i < 16; i++) { state_[i] = 0; } - state_[16] = static_cast(0xFF & (seed >> 24)); - state_[17] = static_cast(0xFF & (seed >> 16)); - state_[18] = static_cast(0xFF & (seed >> 8)); - state_[19] = static_cast(0xFF & (seed >> 0)); + state_[16] = static_cast(0xFFu & (seed >> 24u)); + state_[17] = static_cast(0xFFu & (seed >> 16u)); + state_[18] = static_cast(0xFFu & (seed >> 8u)); + state_[19] = static_cast(0xFFu & (seed >> 0u)); picosha2::hash256_one_by_one hasher; hasher.process(state_.begin(), state_.end()); hasher.finish(); hasher.get_hash_bytes(state_.begin(), state_.end()); - } - std::vector spawn_child_nodes() { - double state_random = get_state_random(); - int num_children; - if (root_children_ > 0) { - num_children = root_children_; // Root always spawns children - } else if (state_random < q_) { - num_children = b_; - } else { - num_children = 0; - } + init_num_children(); + } - std::vector result; - for (int i = 0; i < num_children; i++) { - result.push_back(node(generate_child_state(i), q_, b_)); - } + int get_num_children() const { return num_children_; } - return result; + node spawn_child_node(int index) { + return {generate_child_state(index), q_, b_}; } }; + +} +} } -#endif //UTS_NODE_H +#endif //COMPARISON_BENCHMARKS_BASE_UNBALANCED_H_ diff --git a/extern/benchmark_base/src/.gitkeep b/extern/benchmark_base/src/.gitkeep new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/extern/benchmark_base/src/.gitkeep diff --git a/extern/benchmark_base/src/fft.cpp b/extern/benchmark_base/src/fft.cpp new file mode 100644 index 0000000..d21d00c --- /dev/null +++ b/extern/benchmark_base/src/fft.cpp @@ -0,0 +1,53 @@ +#include "benchmark_base/fft.h" + +namespace comparison_benchmarks { +namespace base { +namespace fft { + +void fill_input(fft::complex_vector &data) { + for (size_t i = 0; i < data.size(); i++) { + data[i] = std::complex(sin(i), 0.0); + } +} + +void divide(complex_vector::iterator data, complex_vector::iterator tmp_odd_elements, int n) { + for (int i = 0; i < n / 2; i++) { + tmp_odd_elements[i] = data[i * 2 + 1]; + } + for (int i = 0; i < n / 2; i++) { + data[i] = data[i * 2]; + } + for (int i = 0; i < n / 2; i++) { + data[i + n / 2] = tmp_odd_elements[i]; + } +} + +void combine(complex_vector::iterator data, int n) { + for (int i = 0; i < n / 2; i++) { + std::complex even = data[i]; + std::complex odd = data[i + n / 2]; + + // w is the "twiddle-factor". + // this could be cached, but we run the same 'base' algorithm parallel/serial, + // so it won't impact the performance comparison. + std::complex w = exp(std::complex(0, -2. * M_PI * i / n)); + + data[i] = even + w * odd; + data[i + n / 2] = even - w * odd; + } +} + +void conquer(complex_vector::iterator data, complex_vector::iterator swap_array, int n) { + if (n < 2) { + return; + } + + divide(data, swap_array, n); + conquer(data, swap_array, n / 2); + conquer(data + n / 2, swap_array + n / 2, n / 2); + combine(data, n); +} + +} +} +} diff --git a/extern/benchmark_base/src/sample_images.cpp.in b/extern/benchmark_base/src/sample_images.cpp.in new file mode 100644 index 0000000..abef3b0 --- /dev/null +++ b/extern/benchmark_base/src/sample_images.cpp.in @@ -0,0 +1,23 @@ +#include +#include +#include +#include + +using namespace std; + +namespace comparison_benchmarks { + namespace base { + vector get_sample_image_paths() { + const int num_images = 19; + + vector result(num_images); + for (int i = 0; i < num_images; i++) { + ostringstream string_stream; + string_stream << "@CMAKE_CURRENT_SOURCE_DIR@/sample_images/" << i << ".jpg"; + result[i] = string_stream.str(); + } + + return result; + } + } +} diff --git a/app/benchmark_unbalanced/function_node.cpp b/extern/benchmark_base/src/unbalanced.cpp similarity index 68% rename from app/benchmark_unbalanced/function_node.cpp rename to extern/benchmark_base/src/unbalanced.cpp index 1cb931e..e23153e 100644 --- a/app/benchmark_unbalanced/function_node.cpp +++ b/extern/benchmark_base/src/unbalanced.cpp @@ -1,6 +1,9 @@ -#include "node.h" +#include "benchmark_base/unbalanced.h" + +namespace comparison_benchmarks { +namespace base { +namespace unbalanced { -namespace uts { node_state node::generate_child_state(uint32_t index) { node_state result; @@ -16,13 +19,16 @@ node_state node::generate_child_state(uint32_t index) { double node::get_state_random() { int32_t state_random_integer; - uint32_t b = ((uint32_t) state_[16] << 24) | - ((uint32_t) state_[17] << 16) | - ((uint32_t) state_[18] << 8) | - ((uint32_t) state_[19] << 0); + uint32_t b = ((uint32_t) state_[16] << 24u) | + ((uint32_t) state_[17] << 16u) | + ((uint32_t) state_[18] << 8u) | + ((uint32_t) state_[19] << 0u); b = b & 0x7fffffff; // Mask out negative values state_random_integer = static_cast(b); return (double) state_random_integer / (double) INT32_MAX; } + +} +} } diff --git a/extern/benchmark_runner/CMakeLists.txt b/extern/benchmark_runner/CMakeLists.txt new file mode 100644 index 0000000..27dfa8b --- /dev/null +++ b/extern/benchmark_runner/CMakeLists.txt @@ -0,0 +1,2 @@ +add_library(benchmark_runner INTERFACE) +target_include_directories(benchmark_runner INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/extern/benchmark_runner/benchmark_runner.h b/extern/benchmark_runner/benchmark_runner.h new file mode 100644 index 0000000..7a9ef2b --- /dev/null +++ b/extern/benchmark_runner/benchmark_runner.h @@ -0,0 +1,107 @@ + +#ifndef BENCHMARK_RUNNER_H +#define BENCHMARK_RUNNER_H + +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +class benchmark_runner { + private: + string csv_path_; + string csv_name_; + + chrono::steady_clock::time_point last_start_time_; + vector times_; + + void print_statistics() { + long time_sum = std::accumulate(times_.begin(), times_.end(), 0l); + cout << "Average Runtime (us): " << (time_sum / times_.size()) << endl; + } + + inline bool file_exists(const std::string &name) { + ifstream f(name); + return f.good(); + } + + public: + benchmark_runner(string csv_path, string csv_name) : csv_path_{std::move(csv_path)}, + csv_name_{std::move(csv_name)}, + times_{} { + string command = "mkdir -p " + csv_path_; + int res = system(command.c_str()); + if (res) { + cout << "Error while creating directory!" << endl; + exit(1); + } + } + + static void read_args(int argc, char **argv, int &num_threads, string &path) { + if (argc < 3) { + cout << "Must Specifiy concurrency and output directory! (usage: `benchmark `)" + << endl; + exit(1); + } + + string tmp = argv[1]; + path = tmp; + num_threads = atoi(argv[2]); + } + + void start_iteration() { + last_start_time_ = chrono::steady_clock::now(); + } + + void end_iteration() { + auto end_time = chrono::steady_clock::now(); + long time = chrono::duration_cast(end_time - last_start_time_).count(); + times_.emplace_back(time); + } + + void run_iterations(int count, + const function measure, + int warmup_count, + const function prepare = []() {}) { + for (int i = 0; i < warmup_count; i++) { + prepare(); + measure(); + } + + for (int i = 0; i < count; i++) { + prepare(); + start_iteration(); + measure(); + end_iteration(); + } + } + + void commit_results(bool print_stats) { + if (print_stats) { + print_statistics(); + } + + string full_filename = csv_path_ + csv_name_; + bool write_header = !file_exists(full_filename); + + { // Scope for output file + ofstream o(full_filename, std::fstream::out | std::fstream::app); + if (write_header) { + o << "runtime_us" << endl; + } + for (auto time : times_) { + o << time << endl; + } + } // End Scope for output file + + times_.clear(); + } +}; + +#endif //BENCHMARK_RUNNER_H diff --git a/extern/picosha2/CMakeLists.txt b/extern/picosha2/CMakeLists.txt new file mode 100644 index 0000000..0f2f59d --- /dev/null +++ b/extern/picosha2/CMakeLists.txt @@ -0,0 +1,2 @@ +add_library(picosha2 INTERFACE) +target_include_directories(picosha2 INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) \ No newline at end of file diff --git a/extern/picosha2/LICENSE b/extern/picosha2/LICENSE new file mode 100644 index 0000000..4e22100 --- /dev/null +++ b/extern/picosha2/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2017 okdshin + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/extern/picosha2/picosha2.h b/extern/picosha2/picosha2.h new file mode 100644 index 0000000..bc00c74 --- /dev/null +++ b/extern/picosha2/picosha2.h @@ -0,0 +1,377 @@ +/* +The MIT License (MIT) + +Copyright (C) 2017 okdshin + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#ifndef PICOSHA2_H +#define PICOSHA2_H +// picosha2:20140213 + +#ifndef PICOSHA2_BUFFER_SIZE_FOR_INPUT_ITERATOR +#define PICOSHA2_BUFFER_SIZE_FOR_INPUT_ITERATOR \ + 1048576 //=1024*1024: default is 1MB memory +#endif + +#include +#include +#include +#include +#include +#include +namespace picosha2 { +typedef unsigned long word_t; +typedef unsigned char byte_t; + +static const size_t k_digest_size = 32; + +namespace detail { +inline byte_t mask_8bit(byte_t x) { return x & 0xff; } + +inline word_t mask_32bit(word_t x) { return x & 0xffffffff; } + +const word_t add_constant[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, + 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, + 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, + 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, + 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, + 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2}; + +const word_t initial_message_digest[8] = {0x6a09e667, 0xbb67ae85, 0x3c6ef372, + 0xa54ff53a, 0x510e527f, 0x9b05688c, + 0x1f83d9ab, 0x5be0cd19}; + +inline word_t ch(word_t x, word_t y, word_t z) { return (x & y) ^ ((~x) & z); } + +inline word_t maj(word_t x, word_t y, word_t z) { + return (x & y) ^ (x & z) ^ (y & z); +} + +inline word_t rotr(word_t x, std::size_t n) { + assert(n < 32); + return mask_32bit((x >> n) | (x << (32 - n))); +} + +inline word_t bsig0(word_t x) { return rotr(x, 2) ^ rotr(x, 13) ^ rotr(x, 22); } + +inline word_t bsig1(word_t x) { return rotr(x, 6) ^ rotr(x, 11) ^ rotr(x, 25); } + +inline word_t shr(word_t x, std::size_t n) { + assert(n < 32); + return x >> n; +} + +inline word_t ssig0(word_t x) { return rotr(x, 7) ^ rotr(x, 18) ^ shr(x, 3); } + +inline word_t ssig1(word_t x) { return rotr(x, 17) ^ rotr(x, 19) ^ shr(x, 10); } + +template +void hash256_block(RaIter1 message_digest, RaIter2 first, RaIter2 last) { + assert(first + 64 == last); + static_cast(last); // for avoiding unused-variable warning + word_t w[64]; + std::fill(w, w + 64, 0); + for (std::size_t i = 0; i < 16; ++i) { + w[i] = (static_cast(mask_8bit(*(first + i * 4))) << 24) | + (static_cast(mask_8bit(*(first + i * 4 + 1))) << 16) | + (static_cast(mask_8bit(*(first + i * 4 + 2))) << 8) | + (static_cast(mask_8bit(*(first + i * 4 + 3)))); + } + for (std::size_t i = 16; i < 64; ++i) { + w[i] = mask_32bit(ssig1(w[i - 2]) + w[i - 7] + ssig0(w[i - 15]) + + w[i - 16]); + } + + word_t a = *message_digest; + word_t b = *(message_digest + 1); + word_t c = *(message_digest + 2); + word_t d = *(message_digest + 3); + word_t e = *(message_digest + 4); + word_t f = *(message_digest + 5); + word_t g = *(message_digest + 6); + word_t h = *(message_digest + 7); + + for (std::size_t i = 0; i < 64; ++i) { + word_t temp1 = h + bsig1(e) + ch(e, f, g) + add_constant[i] + w[i]; + word_t temp2 = bsig0(a) + maj(a, b, c); + h = g; + g = f; + f = e; + e = mask_32bit(d + temp1); + d = c; + c = b; + b = a; + a = mask_32bit(temp1 + temp2); + } + *message_digest += a; + *(message_digest + 1) += b; + *(message_digest + 2) += c; + *(message_digest + 3) += d; + *(message_digest + 4) += e; + *(message_digest + 5) += f; + *(message_digest + 6) += g; + *(message_digest + 7) += h; + for (std::size_t i = 0; i < 8; ++i) { + *(message_digest + i) = mask_32bit(*(message_digest + i)); + } +} + +} // namespace detail + +template +void output_hex(InIter first, InIter last, std::ostream& os) { + os.setf(std::ios::hex, std::ios::basefield); + while (first != last) { + os.width(2); + os.fill('0'); + os << static_cast(*first); + ++first; + } + os.setf(std::ios::dec, std::ios::basefield); +} + +template +void bytes_to_hex_string(InIter first, InIter last, std::string& hex_str) { + std::ostringstream oss; + output_hex(first, last, oss); + hex_str.assign(oss.str()); +} + +template +void bytes_to_hex_string(const InContainer& bytes, std::string& hex_str) { + bytes_to_hex_string(bytes.begin(), bytes.end(), hex_str); +} + +template +std::string bytes_to_hex_string(InIter first, InIter last) { + std::string hex_str; + bytes_to_hex_string(first, last, hex_str); + return hex_str; +} + +template +std::string bytes_to_hex_string(const InContainer& bytes) { + std::string hex_str; + bytes_to_hex_string(bytes, hex_str); + return hex_str; +} + +class hash256_one_by_one { + public: + hash256_one_by_one() { init(); } + + void init() { + buffer_.clear(); + std::fill(data_length_digits_, data_length_digits_ + 4, 0); + std::copy(detail::initial_message_digest, + detail::initial_message_digest + 8, h_); + } + + template + void process(RaIter first, RaIter last) { + add_to_data_length(static_cast(std::distance(first, last))); + std::copy(first, last, std::back_inserter(buffer_)); + std::size_t i = 0; + for (; i + 64 <= buffer_.size(); i += 64) { + detail::hash256_block(h_, buffer_.begin() + i, + buffer_.begin() + i + 64); + } + buffer_.erase(buffer_.begin(), buffer_.begin() + i); + } + + void finish() { + byte_t temp[64]; + std::fill(temp, temp + 64, 0); + std::size_t remains = buffer_.size(); + std::copy(buffer_.begin(), buffer_.end(), temp); + temp[remains] = 0x80; + + if (remains > 55) { + std::fill(temp + remains + 1, temp + 64, 0); + detail::hash256_block(h_, temp, temp + 64); + std::fill(temp, temp + 64 - 4, 0); + } else { + std::fill(temp + remains + 1, temp + 64 - 4, 0); + } + + write_data_bit_length(&(temp[56])); + detail::hash256_block(h_, temp, temp + 64); + } + + template + void get_hash_bytes(OutIter first, OutIter last) const { + for (const word_t* iter = h_; iter != h_ + 8; ++iter) { + for (std::size_t i = 0; i < 4 && first != last; ++i) { + *(first++) = detail::mask_8bit( + static_cast((*iter >> (24 - 8 * i)))); + } + } + } + + private: + void add_to_data_length(word_t n) { + word_t carry = 0; + data_length_digits_[0] += n; + for (std::size_t i = 0; i < 4; ++i) { + data_length_digits_[i] += carry; + if (data_length_digits_[i] >= 65536u) { + carry = data_length_digits_[i] >> 16; + data_length_digits_[i] &= 65535u; + } else { + break; + } + } + } + void write_data_bit_length(byte_t* begin) { + word_t data_bit_length_digits[4]; + std::copy(data_length_digits_, data_length_digits_ + 4, + data_bit_length_digits); + + // convert byte length to bit length (multiply 8 or shift 3 times left) + word_t carry = 0; + for (std::size_t i = 0; i < 4; ++i) { + word_t before_val = data_bit_length_digits[i]; + data_bit_length_digits[i] <<= 3; + data_bit_length_digits[i] |= carry; + data_bit_length_digits[i] &= 65535u; + carry = (before_val >> (16 - 3)) & 65535u; + } + + // write data_bit_length + for (int i = 3; i >= 0; --i) { + (*begin++) = static_cast(data_bit_length_digits[i] >> 8); + (*begin++) = static_cast(data_bit_length_digits[i]); + } + } + std::vector buffer_; + word_t data_length_digits_[4]; // as 64bit integer (16bit x 4 integer) + word_t h_[8]; +}; + +inline void get_hash_hex_string(const hash256_one_by_one& hasher, + std::string& hex_str) { + byte_t hash[k_digest_size]; + hasher.get_hash_bytes(hash, hash + k_digest_size); + return bytes_to_hex_string(hash, hash + k_digest_size, hex_str); +} + +inline std::string get_hash_hex_string(const hash256_one_by_one& hasher) { + std::string hex_str; + get_hash_hex_string(hasher, hex_str); + return hex_str; +} + +namespace impl { +template +void hash256_impl(RaIter first, RaIter last, OutIter first2, OutIter last2, int, + std::random_access_iterator_tag) { + hash256_one_by_one hasher; + // hasher.init(); + hasher.process(first, last); + hasher.finish(); + hasher.get_hash_bytes(first2, last2); +} + +template +void hash256_impl(InputIter first, InputIter last, OutIter first2, + OutIter last2, int buffer_size, std::input_iterator_tag) { + std::vector buffer(buffer_size); + hash256_one_by_one hasher; + // hasher.init(); + while (first != last) { + int size = buffer_size; + for (int i = 0; i != buffer_size; ++i, ++first) { + if (first == last) { + size = i; + break; + } + buffer[i] = *first; + } + hasher.process(buffer.begin(), buffer.begin() + size); + } + hasher.finish(); + hasher.get_hash_bytes(first2, last2); +} +} + +template +void hash256(InIter first, InIter last, OutIter first2, OutIter last2, + int buffer_size = PICOSHA2_BUFFER_SIZE_FOR_INPUT_ITERATOR) { + picosha2::impl::hash256_impl( + first, last, first2, last2, buffer_size, + typename std::iterator_traits::iterator_category()); +} + +template +void hash256(InIter first, InIter last, OutContainer& dst) { + hash256(first, last, dst.begin(), dst.end()); +} + +template +void hash256(const InContainer& src, OutIter first, OutIter last) { + hash256(src.begin(), src.end(), first, last); +} + +template +void hash256(const InContainer& src, OutContainer& dst) { + hash256(src.begin(), src.end(), dst.begin(), dst.end()); +} + +template +void hash256_hex_string(InIter first, InIter last, std::string& hex_str) { + byte_t hashed[k_digest_size]; + hash256(first, last, hashed, hashed + k_digest_size); + std::ostringstream oss; + output_hex(hashed, hashed + k_digest_size, oss); + hex_str.assign(oss.str()); +} + +template +std::string hash256_hex_string(InIter first, InIter last) { + std::string hex_str; + hash256_hex_string(first, last, hex_str); + return hex_str; +} + +inline void hash256_hex_string(const std::string& src, std::string& hex_str) { + hash256_hex_string(src.begin(), src.end(), hex_str); +} + +template +void hash256_hex_string(const InContainer& src, std::string& hex_str) { + hash256_hex_string(src.begin(), src.end(), hex_str); +} + +template +std::string hash256_hex_string(const InContainer& src) { + return hash256_hex_string(src.begin(), src.end()); +} +templatevoid hash256(std::ifstream& f, OutIter first, OutIter last){ + hash256(std::istreambuf_iterator(f), std::istreambuf_iterator(), first,last); + +} +}// namespace picosha2 +#endif // PICOSHA2_H diff --git a/lib/context_switcher/CMakeLists.txt b/lib/context_switcher/CMakeLists.txt new file mode 100644 index 0000000..354cb26 --- /dev/null +++ b/lib/context_switcher/CMakeLists.txt @@ -0,0 +1,82 @@ +cmake_minimum_required(VERSION 3.10) +project(context_switcher + VERSION 0.0.1 + DESCRIPTION "allows to execute functions and lambdas on a new stack and switch between them" + LANGUAGES CXX ASM) + +set(CMAKE_CXX_STANDARD 11) + +# Platform Support - Edit this when porting the context switch facility. +# Settings: +# CS_USE_BOOST = ON/OFF Use boost's fcontext as the assembly implementation for the context switch. +# CS_USE_FAST = ON/OFF Use custom implementation optimized for fast creation/return context switch. +# CMAKE_SYSTEM_PROCESSOR = String Target processor +# CMAKE_SYSTEM_NAME = String Target +# +# Typically cross compiling in cmake will set CMAKE_SYSTEM_PROCESSOR and CMAKE_SYSTEM_NAME. +# The library uses these two to select the correct assembly file for performing the context switch. +# We currently only include minimal system support in our config file. The fastest way to port the +# library is to see if one of the boost.context assembly files matches your target platform and adding +# that to tho selection script. +# +# By default we use CS_USE_FAST if available and fall back to CS_USE_BOOST if needed. +# When sanitizers are turned on (or other instrumentation) we recommend to use CS_USE_BOOST +# as its calling convention causes no problems with function entry/exit instrumentation. + +message("-- Configure Context Switcher: ${CMAKE_SYSTEM_PROCESSOR} running ${CMAKE_SYSTEM_NAME}") +include(asm/cscontext/SelectAssemblyFiles.cmake) +include(asm/fcontext/SelectAssemblyFiles.cmake) + +if (CS_CSCONTEXT_FOUND AND NOT THREAD_SANITIZER AND NOT CS_FORCE_FALLBACK) + MESSAGE("-- Using cscontext implementation") + SET(CS_CONTEXT_SWITCH_ASSEMBLY ${CS_CSCONTEXT_ASSEMBLY}) + SET(CS_USE_CSCONTEXT TRUE) +elseif (CS_FCONTEXT_FOUND) + if (THREAD_SANITIZER) + MESSAGE("-- Falling back to fcontext implementation (thread sanitizer active)") + else () + MESSAGE("-- Falling back to fcontext implementation") + endif () + SET(CS_CONTEXT_SWITCH_ASSEMBLY ${CS_FCONTEXT_ASSEMBLY}) + SET(CS_USE_CSCONTEXT FALSE) +else () + MESSAGE(FATAL_ERROR "Platform (${CMAKE_SYSTEM_PROCESSOR} on ${CMAKE_SYSTEM_NAME}) not supported! Please see Readme for instructions to port.") +endif () +message("-- Configure Context Switcher: Configuration Done") + +add_library(context_switcher STATIC + ${CS_CONTEXT_SWITCH_ASSEMBLY} + include/context_switcher/context_switcher.h src/context_switcher.cpp + include/context_switcher/cscontext.h + include/context_switcher/fcontext.h + include/context_switcher/continuation.h + include/context_switcher/context.h) + +if (CS_USE_CSCONTEXT) + target_compile_definitions(context_switcher PUBLIC CS_USE_CSCONTEXT) +else () + target_compile_definitions(context_switcher PUBLIC CS_USE_FCONTEXT) +endif () + +# Add everything in `./include` to be in the include path of this project +target_include_directories(context_switcher + PUBLIC + $ + $ + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/src + ) + +# Installation on the system (we pack the context switcher with pls for now...) +INSTALL(TARGETS context_switcher + EXPORT pls-targets + LIBRARY DESTINATION lib/context_switcher + ARCHIVE DESTINATION lib/context_switcher + ) +# ...all headers in `include` +INSTALL( + DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/pls + DESTINATION include + FILES_MATCHING PATTERN "*.h*" +) + diff --git a/lib/context_switcher/asm/cscontext/SelectAssemblyFiles.cmake b/lib/context_switcher/asm/cscontext/SelectAssemblyFiles.cmake new file mode 100644 index 0000000..e957080 --- /dev/null +++ b/lib/context_switcher/asm/cscontext/SelectAssemblyFiles.cmake @@ -0,0 +1,25 @@ +# Tries to locate the correct CS_FAST_ASSEMBLY files for this platform and +# sets CS_CSCONTEXT_FOUND to true if the platform is supported. +# +# To add a platform add a clause locating the correct assembly implementations for the system. + +SET(CS_CSCONTEXT_FOUND TRUE) +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND CMAKE_SYSTEM_NAME STREQUAL "Linux") + # Typical Linux running on x86_64 + SET(CS_CSCONTEXT_ASSEMBLY + asm/cscontext/enter_context_x86_64_sysv_elf.s + asm/cscontext/switch_context_x86_64_sysv_elf.s) +elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l" AND CMAKE_SYSTEM_NAME STREQUAL "Linux") + # Typical Linux running on ARMv7 + SET(CS_CSCONTEXT_ASSEMBLY + asm/cscontext/enter_context_arm32_sysv_elf.s + asm/cscontext/switch_context_arm32_sysv_elf.s) +else () + SET(CS_CSCONTEXT_FOUND FALSE) +endif () + +if (CS_CSCONTEXT_FOUND) + MESSAGE("-- CS_CSCONTEXT_FOUND: ${CS_CSCONTEXT_ASSEMBLY}") +else () + MESSAGE("-- CS_CSCONTEXT_FOUND: NOT FOUND") +endif () diff --git a/lib/context_switcher/asm/cscontext/enter_context_arm32_sysv_elf.s b/lib/context_switcher/asm/cscontext/enter_context_arm32_sysv_elf.s new file mode 100644 index 0000000..9f6b917 --- /dev/null +++ b/lib/context_switcher/asm/cscontext/enter_context_arm32_sysv_elf.s @@ -0,0 +1,63 @@ + .arm + .text + .global __cs_enter_context + .type __cs_enter_context, %function + +__cs_enter_context: + /* Parameter List (in order) + * r0 = new stack pointer + * r1 = first parameter to callback + * r2 = callback function pointer + * r3 = new stack limit (not used on most platforms) + * + * Return + * r0 = continuation that returned control back to the caller (null if fallthrough) + * + * Variables + * r4 = temporary for the old stack pointer */ + + /* ========== Save State ========== */ + /* store programm counter for later return */ + push {lr} + /* store callee saved registers */ + push {r4-r12,lr} + /* store floating point extension registers */ + #if (defined(__VFP_FP__) && !defined(__SOFTFP__)) + sub sp, sp, #64 + vstmia sp, {d8-d15} + #endif + /* ========== Save State ========== */ + + /* Perform change to new stack */ + /* Keep old stack as second parameter to our callback function. */ + mov r4, sp + /* Make sure that stack start is properly aligned. */ + and r0, r0, #-16 + /* Switch to new stack pointer. */ + mov sp, r0 + + /* Perform actual function call, this will now be on the new stack */ + /* r0 = first parametor to callback (continuation) */ + /* r1 = second parameter to callback (arbetary pointer) */ + mov r0, r4 + blx r2 + + /* Restore state of returned continuation. */ + /* To do so we first reset the stack pointer (which we get returned in r0). */ + /* After that we execute our standard restore procedere to pop the state from the stack. */ + mov sp, r0 + + /* ========== Restore State ========== */ + /* restore floating point extension registers */ + #if (defined(__VFP_FP__) && !defined(__SOFTFP__)) + vldmia sp, {d8-d15} + add sp, sp, #64 + #endif + /* restore callee saved registers */ + pop {r4-r12,lr} + /* ========== Restore State ========== */ + + /* Just return back from the call. */ + /* This is the end of a fiber, so we have no continuation. */ + eor r0, r0, r0 + pop {pc} diff --git a/lib/context_switcher/asm/cscontext/enter_context_x86_64_sysv_elf.s b/lib/context_switcher/asm/cscontext/enter_context_x86_64_sysv_elf.s new file mode 100644 index 0000000..3bd5315 --- /dev/null +++ b/lib/context_switcher/asm/cscontext/enter_context_x86_64_sysv_elf.s @@ -0,0 +1,92 @@ + .file "enter_context_x86_64.s" + .text + .global __cs_enter_context + .type __cs_enter_context, @function + +.align 16 +__cs_enter_context: + .cfi_startproc + .cfi_undefined rip + # Parameter List (in order) + # rdi = new stack pointer + # rsi = first parameter to callback + # rdx = callback function pointer + # rcx = new stack limit (not used on most platforms) + + # Return + # rax = continuation that returned control back to the caller (null if fallthrough) + + # Variables + # r12 = temporary for the old stack pointer + + ############### Save State ############### + # Make space on the stack + leaq -0x38(%rsp), %rsp + # Store calee saved general registers. + movq %r12, 0x00(%rsp) + movq %r13, 0x08(%rsp) + movq %r14, 0x10(%rsp) + movq %r15, 0x18(%rsp) + movq %rbx, 0x20(%rsp) + movq %rbp, 0x28(%rsp) + # Store MMX control- and status-word + stmxcsr 0x30(%rsp) + # Store x87 control-word + fnstcw 0x34(%rsp) + ############### Save State ############### + + # Perform change to new stack. + # Keep old stack as second parameter to our callback function. + movq %rsp, %r12 + # Make sure that stack start is properly aligned. + andq $-16, %rdi + # Switch to new stack pointer. + movq %rdi, %rsp + + # Init the new stack to something reasonable. + # Here we point it at the finish part of our function with the frame pointer at 0x0. + # This is not stricly required, but stops debuggers/tools from freaking out. + leaq __cs_finish(%rip), %r13 + pushq %r13 + pushq $0x0 + movq %rsp, %rbp + + # Perform actual function call, this will now be on the new stack + # rdi = first parametor to callback (continuation) + # rsi = second parameter to callback (arbetary pointer) + movq %r12, %rdi + call *%rdx + + # Restore state of returned continuation. + # To do so we first reset the stack pointer (which we get returned in rax). + # After that we execute our standard restore procedere to pop the state from the stack. + movq %rax, %rsp + + ############ Restore State ############ + # restore calee saved general registers + movq 0x00(%rsp), %r12 + movq 0x08(%rsp), %r13 + movq 0x10(%rsp), %r14 + movq 0x18(%rsp), %r15 + movq 0x20(%rsp), %rbx + movq 0x28(%rsp), %rbp + # restore MMX control- and status-word + ldmxcsr 0x30(%rsp) + # restore x87 control-word + fldcw 0x34(%rsp) + # Free space on the stack + leaq 0x38(%rsp), %rsp + ############ Restore State ############ + + # Just return back from the call. + # This is the end of a fiber, so we have no continuation. + xor %rax, %rax + ret + +__cs_finish: + # exit code is zero + xorq %rdi, %rdi + # exit application + call _exit@PLT + hlt + .cfi_endproc diff --git a/lib/context_switcher/asm/cscontext/switch_context_arm32_sysv_elf.s b/lib/context_switcher/asm/cscontext/switch_context_arm32_sysv_elf.s new file mode 100644 index 0000000..3f143a1 --- /dev/null +++ b/lib/context_switcher/asm/cscontext/switch_context_arm32_sysv_elf.s @@ -0,0 +1,48 @@ + .arm + .text + .global __cs_switch_context + .type __cs_switch_context, %function + +__cs_switch_context: + /* Parameter List (in order) + * r0 = pointer to continuation (should hold value of target stack will be filled with this continuation) + * + * Return + * r0 = continuation that returned control back to the caller (null if fallthrough) + * + * Variables + * r1 = temporary for the old stack pointer */ + + /* ========== Save State ========== */ + /* store programm counter for later return */ + push {lr} + /* store callee saved registers */ + push {r4-r12,lr} + /* store floating point extension registers */ + #if (defined(__VFP_FP__) && !defined(__SOFTFP__)) + sub sp, sp, #64 + vstmia sp, {d8-d15} + #endif + /* ========== Save State ========== */ + + /* Perform change to new stack */ + /* Keep old stack as result from this function. */ + mov r1, sp + /* Switch to new stack pointer. */ + mov sp, r0 + + + /* ========== Restore State ========== */ + /* restore floating point extension registers */ + #if (defined(__VFP_FP__) && !defined(__SOFTFP__)) + vldmia sp, {d8-d15} + add sp, sp, #64 + #endif + /* restore callee saved registers */ + pop {r4-r12,lr} + /* ========== Restore State ========== */ + + /* Just return back from the call. */ + /* This is the end of a fiber, so we have no continuation. */ + mov r0, r1 + pop {pc} diff --git a/lib/context_switcher/asm/cscontext/switch_context_x86_64_sysv_elf.s b/lib/context_switcher/asm/cscontext/switch_context_x86_64_sysv_elf.s new file mode 100644 index 0000000..47af59e --- /dev/null +++ b/lib/context_switcher/asm/cscontext/switch_context_x86_64_sysv_elf.s @@ -0,0 +1,54 @@ + .file "switch_context_x86_64.s" + .text + .global __cs_switch_context + .type __cs_switch_context, @function + +.align 16 +__cs_switch_context: + # Parameter List (in order) + # rdi = pointer to continuation (should hold value of target stack will be filled with this continuation) + + # Return + # rax = continuation that returned control back to the caller (null if fallthrough) + + ############### Save State ############### + # Make space on the stack + leaq -0x38(%rsp), %rsp + # Store calee saved general registers. + movq %r12, 0x00(%rsp) + movq %r13, 0x08(%rsp) + movq %r14, 0x10(%rsp) + movq %r15, 0x18(%rsp) + movq %rbx, 0x20(%rsp) + movq %rbp, 0x28(%rsp) + # Store MMX control- and status-word + stmxcsr 0x30(%rsp) + # Store x87 control-word + fnstcw 0x34(%rsp) + ############### Save State ############### + + # Perform change to new stack. + # Keep old stack as result from this function + movq %rsp, %rax + # switch to new stack pointer + movq %rdi, %rsp + + ############ Restore State ############ + # restore calee saved general registers + movq 0x00(%rsp), %r12 + movq 0x08(%rsp), %r13 + movq 0x10(%rsp), %r14 + movq 0x18(%rsp), %r15 + movq 0x20(%rsp), %rbx + movq 0x28(%rsp), %rbp + # restore MMX control- and status-word + ldmxcsr 0x30(%rsp) + # restore x87 control-word + fldcw 0x34(%rsp) + # Free space on the stack + leaq 0x38(%rsp), %rsp + ############ Restore State ############ + + # Return the context we came from as a continuation. + # rax has already the correct value + ret diff --git a/lib/context_switcher/asm/fcontext/SelectAssemblyFiles.cmake b/lib/context_switcher/asm/fcontext/SelectAssemblyFiles.cmake new file mode 100644 index 0000000..25d91fe --- /dev/null +++ b/lib/context_switcher/asm/fcontext/SelectAssemblyFiles.cmake @@ -0,0 +1,27 @@ +# Tries to locate the correct CS_FCONTEXT_ASSEMBLY files for this platform and +# sets CS_FCONTEXT_FOUND to true if the platform is supported. +# +# To add a platform add a clause locating the correct assembly implementations for the system. + +SET(CS_FCONTEXT_FOUND TRUE) +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND CMAKE_SYSTEM_NAME STREQUAL "Linux") + # Typical Linux running on x86_64 + SET(CS_FCONTEXT_ASSEMBLY + asm/fcontext/jump_x86_64_sysv_elf_gas.S + asm/fcontext/make_x86_64_sysv_elf_gas.S + asm/fcontext/ontop_x86_64_sysv_elf_gas.S) +elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l" AND CMAKE_SYSTEM_NAME STREQUAL "Linux") + # Typical Linux running on ARMv7 + SET(CS_FCONTEXT_ASSEMBLY + asm/fcontext/jump_arm_aapcs_elf_gas.S + asm/fcontext/make_arm_aapcs_elf_gas.S + asm/fcontext/ontop_arm_aapcs_elf_gas.S) +else () + SET(CS_FCONTEXT_FOUND FALSE) +endif () + +if (CS_FCONTEXT_FOUND) + MESSAGE("-- CS_FCONTEXT_FOUND: ${CS_FCONTEXT_ASSEMBLY}") +else () + MESSAGE("-- CS_FCONTEXT_FOUND: NOT FOUND") +endif () diff --git a/lib/context_switcher/asm/fcontext/jump_arm64_aapcs_elf_gas.S b/lib/context_switcher/asm/fcontext/jump_arm64_aapcs_elf_gas.S new file mode 100644 index 0000000..cefd183 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/jump_arm64_aapcs_elf_gas.S @@ -0,0 +1,114 @@ +/* + Copyright Edward Nevill + Oliver Kowalke 2015 + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | d8 | d9 | d10 | d11 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | d12 | d13 | d14 | d15 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 0x40| 0x44| 0x48| 0x4c| 0x50| 0x54| 0x58| 0x5c| * + * ------------------------------------------------- * + * | x19 | x20 | x21 | x22 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 0x60| 0x64| 0x68| 0x6c| 0x70| 0x74| 0x78| 0x7c| * + * ------------------------------------------------- * + * | x23 | x24 | x25 | x26 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 0x80| 0x84| 0x88| 0x8c| 0x90| 0x94| 0x98| 0x9c| * + * ------------------------------------------------- * + * | x27 | x28 | FP | LR | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | | | * + * ------------------------------------------------- * + * | 0xa0| 0xa4| 0xa8| 0xac| | | * + * ------------------------------------------------- * + * | PC | align | | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.file "jump_arm64_aapcs_elf_gas.S" +.text +.align 2 +.global jump_fcontext +.type jump_fcontext, %function +jump_fcontext: + # prepare stack for GP + FPU + sub sp, sp, #0xb0 + + # save d8 - d15 + stp d8, d9, [sp, #0x00] + stp d10, d11, [sp, #0x10] + stp d12, d13, [sp, #0x20] + stp d14, d15, [sp, #0x30] + + # save x19-x30 + stp x19, x20, [sp, #0x40] + stp x21, x22, [sp, #0x50] + stp x23, x24, [sp, #0x60] + stp x25, x26, [sp, #0x70] + stp x27, x28, [sp, #0x80] + stp x29, x30, [sp, #0x90] + + # save LR as PC + str x30, [sp, #0xa0] + + # store RSP (pointing to context-data) in X0 + mov x4, sp + + # restore RSP (pointing to context-data) from X1 + mov sp, x0 + + # load d8 - d15 + ldp d8, d9, [sp, #0x00] + ldp d10, d11, [sp, #0x10] + ldp d12, d13, [sp, #0x20] + ldp d14, d15, [sp, #0x30] + + # load x19-x30 + ldp x19, x20, [sp, #0x40] + ldp x21, x22, [sp, #0x50] + ldp x23, x24, [sp, #0x60] + ldp x25, x26, [sp, #0x70] + ldp x27, x28, [sp, #0x80] + ldp x29, x30, [sp, #0x90] + + # return transfer_t from jump + # pass transfer_t as first arg in context function + # X0 == FCTX, X1 == DATA + mov x0, x4 + + # load pc + ldr x4, [sp, #0xa0] + + # restore stack from GP + FPU + add sp, sp, #0xb0 + + ret x4 +.size jump_fcontext,.-jump_fcontext +# Mark that we don't need executable stack. +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/jump_arm64_aapcs_macho_gas.S b/lib/context_switcher/asm/fcontext/jump_arm64_aapcs_macho_gas.S new file mode 100644 index 0000000..31738f7 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/jump_arm64_aapcs_macho_gas.S @@ -0,0 +1,109 @@ +/* + Copyright Edward Nevill + Oliver Kowalke 2015 + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | d8 | d9 | d10 | d11 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | d12 | d13 | d14 | d15 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 0x40| 0x44| 0x48| 0x4c| 0x50| 0x54| 0x58| 0x5c| * + * ------------------------------------------------- * + * | x19 | x20 | x21 | x22 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 0x60| 0x64| 0x68| 0x6c| 0x70| 0x74| 0x78| 0x7c| * + * ------------------------------------------------- * + * | x23 | x24 | x25 | x26 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 0x80| 0x84| 0x88| 0x8c| 0x90| 0x94| 0x98| 0x9c| * + * ------------------------------------------------- * + * | x27 | x28 | FP | LR | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | | | * + * ------------------------------------------------- * + * | 0xa0| 0xa4| 0xa8| 0xac| | | * + * ------------------------------------------------- * + * | PC | align | | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.globl _jump_fcontext +.balign 16 +_jump_fcontext: + ; prepare stack for GP + FPU + sub sp, sp, #0xb0 + + ; save d8 - d15 + stp d8, d9, [sp, #0x00] + stp d10, d11, [sp, #0x10] + stp d12, d13, [sp, #0x20] + stp d14, d15, [sp, #0x30] + + ; save x19-x30 + stp x19, x20, [sp, #0x40] + stp x21, x22, [sp, #0x50] + stp x23, x24, [sp, #0x60] + stp x25, x26, [sp, #0x70] + stp x27, x28, [sp, #0x80] + stp fp, lr, [sp, #0x90] + + ; save LR as PC + str lr, [sp, #0xa0] + + ; store RSP (pointing to context-data) in X0 + mov x4, sp + + ; restore RSP (pointing to context-data) from X1 + mov sp, x0 + + ; load d8 - d15 + ldp d8, d9, [sp, #0x00] + ldp d10, d11, [sp, #0x10] + ldp d12, d13, [sp, #0x20] + ldp d14, d15, [sp, #0x30] + + ; load x19-x30 + ldp x19, x20, [sp, #0x40] + ldp x21, x22, [sp, #0x50] + ldp x23, x24, [sp, #0x60] + ldp x25, x26, [sp, #0x70] + ldp x27, x28, [sp, #0x80] + ldp fp, lr, [sp, #0x90] + + ; return transfer_t from jump + ; pass transfer_t as first arg in context function + ; X0 == FCTX, X1 == DATA + mov x0, x4 + + ; load pc + ldr x4, [sp, #0xa0] + + ; restore stack from GP + FPU + add sp, sp, #0xb0 + + ret x4 diff --git a/lib/context_switcher/asm/fcontext/jump_arm_aapcs_elf_gas.S b/lib/context_switcher/asm/fcontext/jump_arm_aapcs_elf_gas.S new file mode 100644 index 0000000..86efe9d --- /dev/null +++ b/lib/context_switcher/asm/fcontext/jump_arm_aapcs_elf_gas.S @@ -0,0 +1,88 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | s16 | s17 | s18 | s19 | s20 | s21 | s22 | s23 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | s24 | s25 | s26 | s27 | s28 | s29 | s30 | s31 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 0x40| 0x44| 0x48| 0x4c| 0x50| 0x54| 0x58| 0x5c| * + * ------------------------------------------------- * + * |hiddn| v1 | v2 | v3 | v4 | v5 | v6 | v7 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 0x60| 0x64| 0x68| 0x6c| 0x70| 0x74| 0x78| 0x7c| * + * ------------------------------------------------- * + * | v8 | lr | pc | FCTX| DATA| | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.file "jump_arm_aapcs_elf_gas.S" +.text +.globl jump_fcontext +.align 2 +.type jump_fcontext,%function +.syntax unified +jump_fcontext: + @ save LR as PC + push {lr} + @ save hidden,V1-V8,LR + push {a1,v1-v8,lr} + + @ prepare stack for FPU + sub sp, sp, #64 +#if (defined(__VFP_FP__) && !defined(__SOFTFP__)) + @ save S16-S31 + vstmia sp, {d8-d15} +#endif + + @ store RSP (pointing to context-data) in A1 + mov a1, sp + + @ restore RSP (pointing to context-data) from A2 + mov sp, a2 + +#if (defined(__VFP_FP__) && !defined(__SOFTFP__)) + @ restore S16-S31 + vldmia sp, {d8-d15} +#endif + @ prepare stack for FPU + add sp, sp, #64 + + @ restore hidden,V1-V8,LR + pop {a4,v1-v8,lr} + + @ return transfer_t from jump + str a1, [a4, #0] + str a3, [a4, #4] + @ pass transfer_t as first arg in context function + @ A1 == FCTX, A2 == DATA + mov a2, a3 + + @ restore PC + pop {pc} +.size jump_fcontext,.-jump_fcontext + +@ Mark that we don't need executable stack. +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/jump_arm_aapcs_macho_gas.S b/lib/context_switcher/asm/fcontext/jump_arm_aapcs_macho_gas.S new file mode 100644 index 0000000..8edd0d7 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/jump_arm_aapcs_macho_gas.S @@ -0,0 +1,95 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | s16 | s17 | s18 | s19 | s20 | s21 | s22 | s23 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | s24 | s25 | s26 | s27 | s28 | s29 | s30 | s31 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | sjlj|hiddn| v1 | v2 | v3 | v4 | v5 | v6 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | v7 | v8 | lr | pc | FCTX| DATA| | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.globl _jump_fcontext +.align 2 +_jump_fcontext: + @ save LR as PC + push {lr} + @ save hidden,V1-V8,LR + push {a1,v1-v8,lr} + + @ locate TLS to save/restore SjLj handler + mrc p15, 0, v2, c13, c0, #3 + bic v2, v2, #3 + + @ load TLS[__PTK_LIBC_DYLD_Unwind_SjLj_Key] + ldr v1, [v2, #8] + @ save SjLj handler + push {v1} + + @ prepare stack for FPU + sub sp, sp, #64 +#if (defined(__VFP_FP__) && !defined(__SOFTFP__)) + @ save S16-S31 + vstmia sp, {d8-d15} +#endif + + @ store RSP (pointing to context-data) in A1 + mov a1, sp + + @ restore RSP (pointing to context-data) from A2 + mov sp, a2 + +#if (defined(__VFP_FP__) && !defined(__SOFTFP__)) + @ restore S16-S31 + vldmia sp, {d8-d15} +#endif + @ prepare stack for FPU + add sp, sp, #64 + + @ r#estore SjLj handler + pop {v1} + @ store SjLj handler in TLS + str v1, [v2, #8] + + @ restore hidden,V1-V8,LR + pop {a4,v1-v8,lr} + + @ return transfer_t from jump + str a1, [a4, #0] + str a3, [a4, #4] + @ pass transfer_t as first arg in context function + @ A1 == FCTX, A2 == DATA + mov a2, a3 + + @ restore PC + pop {pc} diff --git a/lib/context_switcher/asm/fcontext/jump_arm_aapcs_pe_armasm.asm b/lib/context_switcher/asm/fcontext/jump_arm_aapcs_pe_armasm.asm new file mode 100644 index 0000000..bca923c --- /dev/null +++ b/lib/context_switcher/asm/fcontext/jump_arm_aapcs_pe_armasm.asm @@ -0,0 +1,81 @@ +;/* +; Copyright Oliver Kowalke 2009. +; Distributed under the Boost Software License, Version 1.0. +; (See accompanying file LICENSE_1_0.txt or copy at +; http://www.boost.org/LICENSE_1_0.txt) +;*/ + +; ******************************************************* +; * * +; * ------------------------------------------------- * +; * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * +; * ------------------------------------------------- * +; * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * +; * ------------------------------------------------- * +; * |deall|limit| base|hiddn| v1 | v2 | v3 | v4 | * +; * ------------------------------------------------- * +; * ------------------------------------------------- * +; * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * +; * ------------------------------------------------- * +; * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * +; * ------------------------------------------------- * +; * | v5 | v6 | v7 | v8 | lr | pc | FCTX| DATA| * +; * ------------------------------------------------- * +; * * +; ******************************************************* + + AREA |.text|, CODE + ALIGN 4 + EXPORT jump_fcontext + +jump_fcontext PROC + ; save LR as PC + push {lr} + ; save hidden,V1-V8,LR + push {a1,v1-v8,lr} + + ; load TIB to save/restore thread size and limit. + ; we do not need preserve CPU flag and can use it's arg register + mrc p15, #0, v1, c13, c0, #2 + + ; save current stack base + ldr a5, [v1, #0x04] + push {a5} + ; save current stack limit + ldr a5, [v1, #0x08] + push {a5} + ; save current deallocation stack + ldr a5, [v1, #0xe0c] + push {a5} + + ; store RSP (pointing to context-data) in A1 + mov a1, sp + + ; restore RSP (pointing to context-data) from A2 + mov sp, a2 + + ; restore deallocation stack + pop {a5} + str a5, [v1, #0xe0c] + ; restore stack limit + pop {a5} + str a5, [v1, #0x08] + ; restore stack base + pop {a5} + str a5, [v1, #0x04] + + ; restore hidden,V1-V8,LR + pop {a4,v1-v8,lr} + + ; return transfer_t from jump + str a1, [a4, #0] + str a3, [a4, #4] + ; pass transfer_t as first arg in context function + ; A1 == FCTX, A2 == DATA + mov a2, a3 + + ; restore PC + pop {pc} + + ENDP + END diff --git a/lib/context_switcher/asm/fcontext/jump_combined_sysv_macho_gas.S b/lib/context_switcher/asm/fcontext/jump_combined_sysv_macho_gas.S new file mode 100644 index 0000000..1d27afa --- /dev/null +++ b/lib/context_switcher/asm/fcontext/jump_combined_sysv_macho_gas.S @@ -0,0 +1,20 @@ +/* + Copyright Sergue E. Leontiev 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +// Stub file for universal binary + +#if defined(__i386__) + #include "jump_i386_sysv_macho_gas.S" +#elif defined(__x86_64__) + #include "jump_x86_64_sysv_macho_gas.S" +#elif defined(__ppc__) + #include "jump_ppc32_sysv_macho_gas.S" +#elif defined(__ppc64__) + #include "jump_ppc64_sysv_macho_gas.S" +#else + #error "No arch's" +#endif diff --git a/lib/context_switcher/asm/fcontext/jump_i386_ms_pe_gas.asm b/lib/context_switcher/asm/fcontext/jump_i386_ms_pe_gas.asm new file mode 100644 index 0000000..bf5c75a --- /dev/null +++ b/lib/context_switcher/asm/fcontext/jump_i386_ms_pe_gas.asm @@ -0,0 +1,123 @@ +/* + Copyright Oliver Kowalke 2009. + Copyright Thomas Sailer 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/************************************************************************************* +* --------------------------------------------------------------------------------- * +* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * +* --------------------------------------------------------------------------------- * +* | 0h | 04h | 08h | 0ch | 010h | 014h | 018h | 01ch | * +* --------------------------------------------------------------------------------- * +* | fc_mxcsr|fc_x87_cw| fc_strg |fc_deallo| limit | base | fc_seh | EDI | * +* --------------------------------------------------------------------------------- * +* --------------------------------------------------------------------------------- * +* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * +* --------------------------------------------------------------------------------- * +* | 020h | 024h | 028h | 02ch | 030h | 034h | 038h | 03ch | * +* --------------------------------------------------------------------------------- * +* | ESI | EBX | EBP | EIP | to | data | EH NXT |SEH HNDLR| * +* --------------------------------------------------------------------------------- * +**************************************************************************************/ + +.file "jump_i386_ms_pe_gas.asm" +.text +.p2align 4,,15 + +/* mark as using no unregistered SEH handlers */ +.globl @feat.00 +.def @feat.00; .scl 3; .type 0; .endef +.set @feat.00, 1 + +.globl _jump_fcontext +.def _jump_fcontext; .scl 2; .type 32; .endef +_jump_fcontext: + /* prepare stack */ + leal -0x2c(%esp), %esp + +#if !defined(BOOST_USE_TSX) + /* save MMX control- and status-word */ + stmxcsr (%esp) + /* save x87 control-word */ + fnstcw 0x4(%esp) +#endif + + /* load NT_TIB */ + movl %fs:(0x18), %edx + /* load fiber local storage */ + movl 0x10(%edx), %eax + movl %eax, 0x8(%esp) + /* load current dealloction stack */ + movl 0xe0c(%edx), %eax + movl %eax, 0xc(%esp) + /* load current stack limit */ + movl 0x8(%edx), %eax + movl %eax, 0x10(%esp) + /* load current stack base */ + movl 0x4(%edx), %eax + movl %eax, 0x14(%esp) + /* load current SEH exception list */ + movl (%edx), %eax + movl %eax, 0x18(%esp) + + movl %edi, 0x1c(%esp) /* save EDI */ + movl %esi, 0x20(%esp) /* save ESI */ + movl %ebx, 0x24(%esp) /* save EBX */ + movl %ebp, 0x28(%esp) /* save EBP */ + + /* store ESP (pointing to context-data) in EAX */ + movl %esp, %eax + + /* firstarg of jump_fcontext() == fcontext to jump to */ + movl 0x30(%esp), %ecx + + /* restore ESP (pointing to context-data) from ECX */ + movl %ecx, %esp + +#if !defined(BOOST_USE_TSX) + /* restore MMX control- and status-word */ + ldmxcsr (%esp) + /* restore x87 control-word */ + fldcw 0x4(%esp) +#endif + + /* restore NT_TIB into EDX */ + movl %fs:(0x18), %edx + /* restore fiber local storage */ + movl 0x8(%esp), %ecx + movl %ecx, 0x10(%edx) + /* restore current deallocation stack */ + movl 0xc(%esp), %ecx + movl %ecx, 0xe0c(%edx) + /* restore current stack limit */ + movl 0x10(%esp), %ecx + movl %ecx, 0x8(%edx) + /* restore current stack base */ + movl 0x14(%esp), %ecx + movl %ecx, 0x4(%edx) + /* restore current SEH exception list */ + movl 0x18(%esp), %ecx + movl %ecx, (%edx) + + movl 0x2c(%esp), %ecx /* restore EIP */ + + movl 0x1c(%esp), %edi /* restore EDI */ + movl 0x20(%esp), %esi /* restore ESI */ + movl 0x24(%esp), %ebx /* restore EBX */ + movl 0x28(%esp), %ebp /* restore EBP */ + + /* prepare stack */ + leal 0x30(%esp), %esp + + /* return transfer_t */ + /* FCTX == EAX, DATA == EDX */ + movl 0x34(%eax), %edx + + /* jump to context */ + jmp *%ecx + +.section .drectve +.ascii " -export:\"_jump_fcontext\"" diff --git a/lib/context_switcher/asm/fcontext/jump_i386_ms_pe_masm.asm b/lib/context_switcher/asm/fcontext/jump_i386_ms_pe_masm.asm new file mode 100644 index 0000000..7a9e848 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/jump_i386_ms_pe_masm.asm @@ -0,0 +1,116 @@ + +; Copyright Oliver Kowalke 2009. +; Distributed under the Boost Software License, Version 1.0. +; (See accompanying file LICENSE_1_0.txt or copy at +; http://www.boost.org/LICENSE_1_0.txt) + +; --------------------------------------------------------------------------------- +; | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | +; --------------------------------------------------------------------------------- +; | 0h | 04h | 08h | 0ch | 010h | 014h | 018h | 01ch | +; --------------------------------------------------------------------------------- +; | fc_mxcsr|fc_x87_cw| fc_strg |fc_deallo| limit | base | fc_seh | EDI | +; --------------------------------------------------------------------------------- +; --------------------------------------------------------------------------------- +; | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | +; --------------------------------------------------------------------------------- +; | 020h | 024h | 028h | 02ch | 030h | 034h | 038h | 03ch | +; --------------------------------------------------------------------------------- +; | ESI | EBX | EBP | EIP | to | data | EH NXT |SEH HNDLR| +; --------------------------------------------------------------------------------- + +.386 +.XMM +.model flat, c +.code + +jump_fcontext PROC BOOST_CONTEXT_EXPORT + ; prepare stack + lea esp, [esp-02ch] + +IFNDEF BOOST_USE_TSX + ; save MMX control- and status-word + stmxcsr [esp] + ; save x87 control-word + fnstcw [esp+04h] +ENDIF + + assume fs:nothing + ; load NT_TIB into ECX + mov edx, fs:[018h] + assume fs:error + ; load fiber local storage + mov eax, [edx+010h] + mov [esp+08h], eax + ; load current deallocation stack + mov eax, [edx+0e0ch] + mov [esp+0ch], eax + ; load current stack limit + mov eax, [edx+08h] + mov [esp+010h], eax + ; load current stack base + mov eax, [edx+04h] + mov [esp+014h], eax + ; load current SEH exception list + mov eax, [edx] + mov [esp+018h], eax + + mov [esp+01ch], edi ; save EDI + mov [esp+020h], esi ; save ESI + mov [esp+024h], ebx ; save EBX + mov [esp+028h], ebp ; save EBP + + ; store ESP (pointing to context-data) in EAX + mov eax, esp + + ; firstarg of jump_fcontext() == fcontext to jump to + mov ecx, [esp+030h] + + ; restore ESP (pointing to context-data) from ECX + mov esp, ecx + +IFNDEF BOOST_USE_TSX + ; restore MMX control- and status-word + ldmxcsr [esp] + ; restore x87 control-word + fldcw [esp+04h] +ENDIF + + assume fs:nothing + ; load NT_TIB into EDX + mov edx, fs:[018h] + assume fs:error + ; restore fiber local storage + mov ecx, [esp+08h] + mov [edx+010h], ecx + ; restore current deallocation stack + mov ecx, [esp+0ch] + mov [edx+0e0ch], ecx + ; restore current stack limit + mov ecx, [esp+010h] + mov [edx+08h], ecx + ; restore current stack base + mov ecx, [esp+014h] + mov [edx+04h], ecx + ; restore current SEH exception list + mov ecx, [esp+018h] + mov [edx], ecx + + mov ecx, [esp+02ch] ; restore EIP + + mov edi, [esp+01ch] ; restore EDI + mov esi, [esp+020h] ; restore ESI + mov ebx, [esp+024h] ; restore EBX + mov ebp, [esp+028h] ; restore EBP + + ; prepare stack + lea esp, [esp+030h] + + ; return transfer_t + ; FCTX == EAX, DATA == EDX + mov edx, [eax+034h] + + ; jump to context + jmp ecx +jump_fcontext ENDP +END diff --git a/lib/context_switcher/asm/fcontext/jump_i386_sysv_elf_gas.S b/lib/context_switcher/asm/fcontext/jump_i386_sysv_elf_gas.S new file mode 100644 index 0000000..b96d4b5 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/jump_i386_sysv_elf_gas.S @@ -0,0 +1,83 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/**************************************************************************************** + * * + * ---------------------------------------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ---------------------------------------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * + * ---------------------------------------------------------------------------------- * + * | fc_mxcsr|fc_x87_cw| EDI | ESI | EBX | EBP | EIP | hidden | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ---------------------------------------------------------------------------------- * + * | 0x20 | 0x24 | | * + * ---------------------------------------------------------------------------------- * + * | to | data | | * + * ---------------------------------------------------------------------------------- * + * * + ****************************************************************************************/ + +.file "jump_i386_sysv_elf_gas.S" +.text +.globl jump_fcontext +.align 2 +.type jump_fcontext,@function +jump_fcontext: + leal -0x18(%esp), %esp /* prepare stack */ + +#if !defined(BOOST_USE_TSX) + stmxcsr (%esp) /* save MMX control- and status-word */ + fnstcw 0x4(%esp) /* save x87 control-word */ +#endif + + movl %edi, 0x8(%esp) /* save EDI */ + movl %esi, 0xc(%esp) /* save ESI */ + movl %ebx, 0x10(%esp) /* save EBX */ + movl %ebp, 0x14(%esp) /* save EBP */ + + /* store ESP (pointing to context-data) in ECX */ + movl %esp, %ecx + + /* first arg of jump_fcontext() == fcontext to jump to */ + movl 0x20(%esp), %eax + + /* second arg of jump_fcontext() == data to be transferred */ + movl 0x24(%esp), %edx + + /* restore ESP (pointing to context-data) from EAX */ + movl %eax, %esp + + /* address of returned transport_t */ + movl 0x1c(%esp), %eax + /* return parent fcontext_t */ + movl %ecx, (%eax) + /* return data */ + movl %edx, 0x4(%eax) + + movl 0x18(%esp), %ecx /* restore EIP */ + +#if !defined(BOOST_USE_TSX) + ldmxcsr (%esp) /* restore MMX control- and status-word */ + fldcw 0x4(%esp) /* restore x87 control-word */ +#endif + + movl 0x8(%esp), %edi /* restore EDI */ + movl 0xc(%esp), %esi /* restore ESI */ + movl 0x10(%esp), %ebx /* restore EBX */ + movl 0x14(%esp), %ebp /* restore EBP */ + + leal 0x20(%esp), %esp /* prepare stack */ + + /* jump to context */ + jmp *%ecx +.size jump_fcontext,.-jump_fcontext + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/jump_i386_sysv_macho_gas.S b/lib/context_switcher/asm/fcontext/jump_i386_sysv_macho_gas.S new file mode 100644 index 0000000..8ab7c6f --- /dev/null +++ b/lib/context_switcher/asm/fcontext/jump_i386_sysv_macho_gas.S @@ -0,0 +1,74 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/**************************************************************************************** + * * + * ---------------------------------------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ---------------------------------------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * + * ---------------------------------------------------------------------------------- * + * | fc_mxcsr|fc_x87_cw| EDI | ESI | EBX | EBP | EIP | to | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ---------------------------------------------------------------------------------- * + * | 0x20 | | * + * ---------------------------------------------------------------------------------- * + * | data | | * + * ---------------------------------------------------------------------------------- * + * * + ****************************************************************************************/ + +.text +.globl _jump_fcontext +.align 2 +_jump_fcontext: + leal -0x18(%esp), %esp /* prepare stack */ + +#if !defined(BOOST_USE_TSX) + stmxcsr (%esp) /* save MMX control- and status-word */ + fnstcw 0x4(%esp) /* save x87 control-word */ +#endif + + movl %edi, 0x8(%esp) /* save EDI */ + movl %esi, 0xc(%esp) /* save ESI */ + movl %ebx, 0x10(%esp) /* save EBX */ + movl %ebp, 0x14(%esp) /* save EBP */ + + /* store ESP (pointing to context-data) in ECX */ + movl %esp, %ecx + + /* first arg of jump_fcontext() == fcontext to jump to */ + movl 0x1c(%esp), %eax + + /* second arg of jump_fcontext() == data to be transferred */ + movl 0x20(%esp), %edx + + /* restore ESP (pointing to context-data) from EAX */ + movl %eax, %esp + + /* return parent fcontext_t */ + movl %ecx, %eax + /* returned data is stored in EDX */ + + movl 0x18(%esp), %ecx /* restore EIP */ + +#if !defined(BOOST_USE_TSX) + ldmxcsr (%esp) /* restore MMX control- and status-word */ + fldcw 0x4(%esp) /* restore x87 control-word */ +#endif + + movl 0x8(%esp), %edi /* restore EDI */ + movl 0xc(%esp), %esi /* restore ESI */ + movl 0x10(%esp), %ebx /* restore EBX */ + movl 0x14(%esp), %ebp /* restore EBP */ + + leal 0x1c(%esp), %esp /* prepare stack */ + + /* jump to context */ + jmp *%ecx diff --git a/lib/context_switcher/asm/fcontext/jump_i386_x86_64_sysv_macho_gas.S b/lib/context_switcher/asm/fcontext/jump_i386_x86_64_sysv_macho_gas.S new file mode 100644 index 0000000..959ddac --- /dev/null +++ b/lib/context_switcher/asm/fcontext/jump_i386_x86_64_sysv_macho_gas.S @@ -0,0 +1,16 @@ +/* + Copyright Sergue E. Leontiev 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +// Stub file for universal binary + +#if defined(__i386__) + #include "jump_i386_sysv_macho_gas.S" +#elif defined(__x86_64__) + #include "jump_x86_64_sysv_macho_gas.S" +#else + #error "No arch's" +#endif diff --git a/lib/context_switcher/asm/fcontext/jump_mips32_o32_elf_gas.S b/lib/context_switcher/asm/fcontext/jump_mips32_o32_elf_gas.S new file mode 100644 index 0000000..f2b8034 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/jump_mips32_o32_elf_gas.S @@ -0,0 +1,119 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | F20 | F22 | F24 | F26 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | F28 | F30 | S0 | S1 | S2 | S3 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | S4 | S5 | S6 | S7 | FP |hiddn| RA | PC | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | ABI ARGS | GP | FCTX| DATA| | * + * ------------------------------------------------- * + * * + * *****************************************************/ + +.file "jump_mips32_o32_elf_gas.S" +.text +.globl jump_fcontext +.align 2 +.type jump_fcontext,@function +.ent jump_fcontext +jump_fcontext: + # reserve space on stack + addiu $sp, $sp, -96 + + sw $s0, 48($sp) # save S0 + sw $s1, 52($sp) # save S1 + sw $s2, 56($sp) # save S2 + sw $s3, 60($sp) # save S3 + sw $s4, 64($sp) # save S4 + sw $s5, 68($sp) # save S5 + sw $s6, 72($sp) # save S6 + sw $s7, 76($sp) # save S7 + sw $fp, 80($sp) # save FP + sw $a0, 84($sp) # save hidden, address of returned transfer_t + sw $ra, 88($sp) # save RA + sw $ra, 92($sp) # save RA as PC + +#if defined(__mips_hard_float) + s.d $f20, ($sp) # save F20 + s.d $f22, 8($sp) # save F22 + s.d $f24, 16($sp) # save F24 + s.d $f26, 24($sp) # save F26 + s.d $f28, 32($sp) # save F28 + s.d $f30, 40($sp) # save F30 +#endif + + # store SP (pointing to context-data) in A0 + move $a0, $sp + + # restore SP (pointing to context-data) from A1 + move $sp, $a1 + +#if defined(__mips_hard_float) + l.d $f20, ($sp) # restore F20 + l.d $f22, 8($sp) # restore F22 + l.d $f24, 16($sp) # restore F24 + l.d $f26, 24($sp) # restore F26 + l.d $f28, 32($sp) # restore F28 + l.d $f30, 40($sp) # restore F30 +#endif + + lw $s0, 48($sp) # restore S0 + lw $s1, 52($sp) # restore S1 + lw $s2, 56($sp) # restore S2 + lw $s3, 60($sp) # restore S3 + lw $s4, 64($sp) # restore S4 + lw $s5, 68($sp) # restore S5 + lw $s6, 72($sp) # restore S6 + lw $s7, 76($sp) # restore S7 + lw $fp, 80($sp) # restore FP + lw $v0, 84($sp) # restore hidden, address of returned transfer_t + lw $ra, 88($sp) # restore RA + + # load PC + lw $t9, 92($sp) + + # adjust stack + addiu $sp, $sp, 96 + + # return transfer_t from jump + sw $a0, ($v0) # fctx of transfer_t + sw $a2, 4($v0) # data of transfer_t + # pass transfer_t as first arg in context function + # A0 == fctx, A1 == data + move $a1, $a2 + + # jump to context + jr $t9 +.end jump_fcontext +.size jump_fcontext, .-jump_fcontext + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/jump_mips64_n64_elf_gas.S b/lib/context_switcher/asm/fcontext/jump_mips64_n64_elf_gas.S new file mode 100644 index 0000000..5423dd4 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/jump_mips64_n64_elf_gas.S @@ -0,0 +1,121 @@ +/* + Copyright Jiaxun Yang 2018. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 8 | 16 | 24 | * + * ------------------------------------------------- * + * | F24 | F25 | F26 | F27 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 40 | 48 | 56 | * + * ------------------------------------------------- * + * | F28 | F29 | F30 | F31 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 72 | 80 | 88 | * + * ------------------------------------------------- * + * | S0 | S1 | S2 | S3 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | S4 | S5 | S6 | S7 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | * + * ------------------------------------------------- * + * | FP | GP | RA | PC | * + * ------------------------------------------------- * + * * + * *****************************************************/ + +.file "jump_mips64_n64_elf_gas.S" +.text +.globl jump_fcontext +.align 3 +.type jump_fcontext,@function +.ent jump_fcontext +jump_fcontext: + # reserve space on stack + daddiu $sp, $sp, -160 + + sd $s0, 64($sp) # save S0 + sd $s1, 72($sp) # save S1 + sd $s2, 80($sp) # save S2 + sd $s3, 88($sp) # save S3 + sd $s4, 96($sp) # save S4 + sd $s5, 104($sp) # save S5 + sd $s6, 112($sp) # save S6 + sd $s7, 120($sp) # save S7 + sd $fp, 128($sp) # save FP + sd $ra, 144($sp) # save RA + sd $ra, 152($sp) # save RA as PC + + + s.d $f24, 0($sp) # save F24 + s.d $f25, 8($sp) # save F25 + s.d $f26, 16($sp) # save F26 + s.d $f27, 24($sp) # save F27 + s.d $f28, 32($sp) # save F28 + s.d $f29, 40($sp) # save F29 + s.d $f30, 48($sp) # save F30 + s.d $f31, 56($sp) # save F31 + + # store SP (pointing to old context-data) in v0 as return + move $v0, $sp + + # get SP (pointing to new context-data) from a0 param + move $sp, $a0 + + l.d $f24, 0($sp) # restore F24 + l.d $f25, 8($sp) # restore F25 + l.d $f26, 16($sp) # restore F26 + l.d $f27, 24($sp) # restore F27 + l.d $f28, 32($sp) # restore F28 + l.d $f29, 40($sp) # restore F29 + l.d $f30, 48($sp) # restore F30 + l.d $f31, 56($sp) # restore F31 + + ld $s0, 64($sp) # restore S0 + ld $s1, 72($sp) # restore S1 + ld $s2, 80($sp) # restore S2 + ld $s3, 88($sp) # restore S3 + ld $s4, 96($sp) # restore S4 + ld $s5, 104($sp) # restore S5 + ld $s6, 112($sp) # restore S6 + ld $s7, 120($sp) # restore S7 + ld $fp, 128($sp) # restore FP + ld $ra, 144($sp) # restore RAa + + # load PC + ld $t9, 152($sp) + + # adjust stack + daddiu $sp, $sp, 160 + + move $a0, $v0 # move old sp from v0 to a0 as param + move $v1, $a1 # move *data from a1 to v1 as return + + # jump to context + jr $t9 +.end jump_fcontext +.size jump_fcontext, .-jump_fcontext + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/jump_ppc32_ppc64_sysv_macho_gas.S b/lib/context_switcher/asm/fcontext/jump_ppc32_ppc64_sysv_macho_gas.S new file mode 100644 index 0000000..f175e31 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/jump_ppc32_ppc64_sysv_macho_gas.S @@ -0,0 +1,16 @@ +/* + Copyright Sergue E. Leontiev 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +// Stub file for universal binary + +#if defined(__ppc__) + #include "jump_ppc32_sysv_macho_gas.S" +#elif defined(__ppc64__) + #include "jump_ppc64_sysv_macho_gas.S" +#else + #error "No arch's" +#endif diff --git a/lib/context_switcher/asm/fcontext/jump_ppc32_sysv_elf_gas.S b/lib/context_switcher/asm/fcontext/jump_ppc32_sysv_elf_gas.S new file mode 100644 index 0000000..48e09c9 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/jump_ppc32_sysv_elf_gas.S @@ -0,0 +1,201 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * |bchai|hiddn| fpscr | PC | CR | R14 | R15 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | R16 | R17 | R18 | R19 | R20 | R21 | R22 | R23 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | R24 | R25 | R26 | R27 | R28 | R29 | R30 | R31 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | F14 | F15 | F16 | F17 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | * + * ------------------------------------------------- * + * | F18 | F19 | F20 | F21 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 160 | 164 | 168 | 172 | 176 | 180 | 184 | 188 | * + * ------------------------------------------------- * + * | F22 | F23 | F24 | F25 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 192 | 196 | 200 | 204 | 208 | 212 | 216 | 220 | * + * ------------------------------------------------- * + * | F26 | F27 | F28 | F29 | * + * ------------------------------------------------- * + * ------------------------|------------ * + * | 224 | 228 | 232 | 236 | 240 | 244 | * + * ------------------------|------------ * + * | F30 | F31 |bchai| LR | * + * ------------------------|------------ * + * * + *******************************************************/ + +.file "jump_ppc32_sysv_elf_gas.S" +.text +.globl jump_fcontext +.align 2 +.type jump_fcontext,@function +jump_fcontext: + # Linux: jump_fcontext( hidden transfer_t * R3, R4, R5) + # Other: transfer_t R3:R4 = jump_fcontext( R3, R4) + + mflr %r0 # return address from LR + mffs %f0 # FPSCR + mfcr %r8 # condition register + + stwu %r1, -240(%r1) # allocate stack space, R1 % 16 == 0 + stw %r0, 244(%r1) # save LR in caller's frame + +#ifdef __linux__ + stw %r3, 4(%r1) # hidden pointer +#endif + + stfd %f0, 8(%r1) # FPSCR + stw %r0, 16(%r1) # LR as PC + stw %r8, 20(%r1) # CR + + # Save registers R14 to R31. + # Don't change R2, the thread-local storage pointer. + # Don't change R13, the small data pointer. + stw %r14, 24(%r1) + stw %r15, 28(%r1) + stw %r16, 32(%r1) + stw %r17, 36(%r1) + stw %r18, 40(%r1) + stw %r19, 44(%r1) + stw %r20, 48(%r1) + stw %r21, 52(%r1) + stw %r22, 56(%r1) + stw %r23, 60(%r1) + stw %r24, 64(%r1) + stw %r25, 68(%r1) + stw %r26, 72(%r1) + stw %r27, 76(%r1) + stw %r28, 80(%r1) + stw %r29, 84(%r1) + stw %r30, 88(%r1) + stw %r31, 92(%r1) + + # Save registers F14 to F31 in slots with 8-byte alignment. + # 4-byte alignment may stall the pipeline of some processors. + # Less than 4 may cause alignment traps. + stfd %f14, 96(%r1) + stfd %f15, 104(%r1) + stfd %f16, 112(%r1) + stfd %f17, 120(%r1) + stfd %f18, 128(%r1) + stfd %f19, 136(%r1) + stfd %f20, 144(%r1) + stfd %f21, 152(%r1) + stfd %f22, 160(%r1) + stfd %f23, 168(%r1) + stfd %f24, 176(%r1) + stfd %f25, 184(%r1) + stfd %f26, 192(%r1) + stfd %f27, 200(%r1) + stfd %f28, 208(%r1) + stfd %f29, 216(%r1) + stfd %f30, 224(%r1) + stfd %f31, 232(%r1) + + # store RSP (pointing to context-data) in R7/R6 + # restore RSP (pointing to context-data) from R4/R3 +#ifdef __linux__ + mr %r7, %r1 + mr %r1, %r4 + lwz %r3, 4(%r1) # hidden pointer +#else + mr %r6, %r1 + mr %r1, %r3 +#endif + + lfd %f0, 8(%r1) # FPSCR + lwz %r0, 16(%r1) # PC + lwz %r8, 20(%r1) # CR + + mtfsf 0xff, %f0 # restore FPSCR + mtctr %r0 # load CTR with PC + mtcr %r8 # restore CR + + # restore R14 to R31 + lwz %r14, 24(%r1) + lwz %r15, 28(%r1) + lwz %r16, 32(%r1) + lwz %r17, 36(%r1) + lwz %r18, 40(%r1) + lwz %r19, 44(%r1) + lwz %r20, 48(%r1) + lwz %r21, 52(%r1) + lwz %r22, 56(%r1) + lwz %r23, 60(%r1) + lwz %r24, 64(%r1) + lwz %r25, 68(%r1) + lwz %r26, 72(%r1) + lwz %r27, 76(%r1) + lwz %r28, 80(%r1) + lwz %r29, 84(%r1) + lwz %r30, 88(%r1) + lwz %r31, 92(%r1) + + # restore F14 to F31 + lfd %f14, 96(%r1) + lfd %f15, 104(%r1) + lfd %f16, 112(%r1) + lfd %f17, 120(%r1) + lfd %f18, 128(%r1) + lfd %f19, 136(%r1) + lfd %f20, 144(%r1) + lfd %f21, 152(%r1) + lfd %f22, 160(%r1) + lfd %f23, 168(%r1) + lfd %f24, 176(%r1) + lfd %f25, 184(%r1) + lfd %f26, 192(%r1) + lfd %f27, 200(%r1) + lfd %f28, 208(%r1) + lfd %f29, 216(%r1) + lfd %f30, 224(%r1) + lfd %f31, 232(%r1) + + # restore LR from caller's frame + lwz %r0, 244(%r1) + mtlr %r0 + + # adjust stack + addi %r1, %r1, 240 + + # return transfer_t +#ifdef __linux__ + stw %r7, 0(%r3) + stw %r5, 4(%r3) +#else + mr %r3, %r6 + # %r4, %r4 +#endif + + # jump to context + bctr +.size jump_fcontext, .-jump_fcontext + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/jump_ppc32_sysv_macho_gas.S b/lib/context_switcher/asm/fcontext/jump_ppc32_sysv_macho_gas.S new file mode 100644 index 0000000..c555237 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/jump_ppc32_sysv_macho_gas.S @@ -0,0 +1,201 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/****************************************************** + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | F14 | F15 | F16 | F17 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | F18 | F19 | F20 | F21 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | F22 | F23 | F24 | F25 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | F26 | F27 | F28 | F29 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | * + * ------------------------------------------------- * + * | F30 | F31 | fpscr | R13 | R14 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | * + * ------------------------------------------------- * + * | 160 | 164 | 168 | 172 | 176 | 180 | 184 | 188 | * + * ------------------------------------------------- * + * | R15 | R16 | R17 | R18 | R19 | R20 | R21 | R22 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | * + * ------------------------------------------------- * + * | 192 | 196 | 200 | 204 | 208 | 212 | 216 | 220 | * + * ------------------------------------------------- * + * | R23 | R24 | R25 | R26 | R27 | R28 | R29 | R30 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | * + * ------------------------------------------------- * + * | 224 | 228 | 232 | 236 | 240 | 244 | 248 | 252 | * + * ------------------------------------------------- * + * | R31 |hiddn| CR | LR | PC |bchai|linkr| FCTX| * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 64 | | * + * ------------------------------------------------- * + * | 256 | | * + * ------------------------------------------------- * + * | DATA| | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.globl _jump_fcontext +.align 2 +_jump_fcontext: + ; reserve space on stack + subi r1, r1, 244 + + stfd f14, 0(r1) # save F14 + stfd f15, 8(r1) # save F15 + stfd f16, 16(r1) # save F16 + stfd f17, 24(r1) # save F17 + stfd f18, 32(r1) # save F18 + stfd f19, 40(r1) # save F19 + stfd f20, 48(r1) # save F20 + stfd f21, 56(r1) # save F21 + stfd f22, 64(r1) # save F22 + stfd f23, 72(r1) # save F23 + stfd f24, 80(r1) # save F24 + stfd f25, 88(r1) # save F25 + stfd f26, 96(r1) # save F26 + stfd f27, 104(r1) # save F27 + stfd f28, 112(r1) # save F28 + stfd f29, 120(r1) # save F29 + stfd f30, 128(r1) # save F30 + stfd f31, 136(r1) # save F31 + mffs f0 # load FPSCR + stfd f0, 144(r1) # save FPSCR + + stw r13, 152(r1) # save R13 + stw r14, 156(r1) # save R14 + stw r15, 160(r1) # save R15 + stw r16, 164(r1) # save R16 + stw r17, 168(r1) # save R17 + stw r18, 172(r1) # save R18 + stw r19, 176(r1) # save R19 + stw r20, 180(r1) # save R20 + stw r21, 184(r1) # save R21 + stw r22, 188(r1) # save R22 + stw r23, 192(r1) # save R23 + stw r24, 196(r1) # save R24 + stw r25, 200(r1) # save R25 + stw r26, 204(r1) # save R26 + stw r27, 208(r1) # save R27 + stw r28, 212(r1) # save R28 + stw r29, 216(r1) # save R29 + stw r30, 220(r1) # save R30 + stw r31, 224(r1) # save R31 + stw r3, 228(r1) # save hidden + + # save CR + mfcr r0 + stw r0, 232(r1) + # save LR + mflr r0 + stw r0, 236(r1) + # save LR as PC + stw r0, 240(r1) + + # store RSP (pointing to context-data) in R6 + mr r6, r1 + + # restore RSP (pointing to context-data) from R4 + mr r1, r4 + + lfd f14, 0(r1) # restore F14 + lfd f15, 8(r1) # restore F15 + lfd f16, 16(r1) # restore F16 + lfd f17, 24(r1) # restore F17 + lfd f18, 32(r1) # restore F18 + lfd f19, 40(r1) # restore F19 + lfd f20, 48(r1) # restore F20 + lfd f21, 56(r1) # restore F21 + lfd f22, 64(r1) # restore F22 + lfd f23, 72(r1) # restore F23 + lfd f24, 80(r1) # restore F24 + lfd f25, 88(r1) # restore F25 + lfd f26, 96(r1) # restore F26 + lfd f27, 104(r1) # restore F27 + lfd f28, 112(r1) # restore F28 + lfd f29, 120(r1) # restore F29 + lfd f30, 128(r1) # restore F30 + lfd f31, 136(r1) # restore F31 + lfd f0, 144(r1) # load FPSCR + mtfsf 0xff, f0 # restore FPSCR + + lwz r13, 152(r1) # restore R13 + lwz r14, 156(r1) # restore R14 + lwz r15, 160(r1) # restore R15 + lwz r16, 164(r1) # restore R16 + lwz r17, 168(r1) # restore R17 + lwz r18, 172(r1) # restore R18 + lwz r19, 176(r1) # restore R19 + lwz r20, 180(r1) # restore R20 + lwz r21, 184(r1) # restore R21 + lwz r22, 188(r1) # restore R22 + lwz r23, 192(r1) # restore R23 + lwz r24, 196(r1) # restore R24 + lwz r25, 200(r1) # restore R25 + lwz r26, 204(r1) # restore R26 + lwz r27, 208(r1) # restore R27 + lwz r28, 212(r1) # restore R28 + lwz r29, 216(r1) # restore R29 + lwz r30, 220(r1) # restore R30 + lwz r31, 224(r1) # restore R31 + lwz r3, 228(r1) # restore hidden + + # restore CR + lwz r0, 232(r1) + mtcr r0 + # restore LR + lwz r0, 236(r1) + mtlr r0 + # load PC + lwz r0, 240(r1) + # restore CTR + mtctr r0 + + # adjust stack + addi r1, r1, 244 + + # return transfer_t + stw r6, 0(r3) + stw r5, 4(r3) + + # jump to context + bctr diff --git a/lib/context_switcher/asm/fcontext/jump_ppc32_sysv_xcoff_gas.S b/lib/context_switcher/asm/fcontext/jump_ppc32_sysv_xcoff_gas.S new file mode 100644 index 0000000..5a96772 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/jump_ppc32_sysv_xcoff_gas.S @@ -0,0 +1,203 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/****************************************************** + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | F14 | F15 | F16 | F17 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | F18 | F19 | F20 | F21 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | F22 | F23 | F24 | F25 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | F26 | F27 | F28 | F29 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | * + * ------------------------------------------------- * + * | F30 | F31 | fpscr | R13 | R14 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | * + * ------------------------------------------------- * + * | 160 | 164 | 168 | 172 | 176 | 180 | 184 | 188 | * + * ------------------------------------------------- * + * | R15 | R16 | R17 | R18 | R19 | R20 | R21 | R22 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | * + * ------------------------------------------------- * + * | 192 | 196 | 200 | 204 | 208 | 212 | 216 | 220 | * + * ------------------------------------------------- * + * | R23 | R24 | R25 | R26 | R27 | R28 | R29 | R30 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | * + * ------------------------------------------------- * + * | 224 | 228 | 232 | 236 | 240 | 244 | 248 | 252 | * + * ------------------------------------------------- * + * | R31 |hiddn| CR | LR | PC |bchai|linkr| FCTX| * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 64 | | * + * ------------------------------------------------- * + * | 256 | | * + * ------------------------------------------------- * + * | DATA| | * + * ------------------------------------------------- * + * * + *******************************************************/ +.globl .jump_fcontext +.globl jump_fcontext[DS] +.align 2 +.csect jump_fcontext[DS] +jump_fcontext: + .long .jump_fcontext +.jump_fcontext: + # reserve space on stack + subi r1, r1, 244 + + stfd f14, 0(r1) # save F14 + stfd f15, 8(r1) # save F15 + stfd f16, 16(r1) # save F16 + stfd f17, 24(r1) # save F17 + stfd f18, 32(r1) # save F18 + stfd f19, 40(r1) # save F19 + stfd f20, 48(r1) # save F20 + stfd f21, 56(r1) # save F21 + stfd f22, 64(r1) # save F22 + stfd f23, 72(r1) # save F23 + stfd f24, 80(r1) # save F24 + stfd f25, 88(r1) # save F25 + stfd f26, 96(r1) # save F26 + stfd f27, 104(r1) # save F27 + stfd f28, 112(r1) # save F28 + stfd f29, 120(r1) # save F29 + stfd f30, 128(r1) # save F30 + stfd f31, 136(r1) # save F31 + mffs f0 # load FPSCR + stfd f0, 144(r1) # save FPSCR + + stw r13, 152(r1) # save R13 + stw r14, 156(r1) # save R14 + stw r15, 160(r1) # save R15 + stw r16, 164(r1) # save R16 + stw r17, 168(r1) # save R17 + stw r18, 172(r1) # save R18 + stw r19, 176(r1) # save R19 + stw r20, 180(r1) # save R20 + stw r21, 184(r1) # save R21 + stw r22, 188(r1) # save R22 + stw r23, 192(r1) # save R23 + stw r24, 196(r1) # save R24 + stw r25, 200(r1) # save R25 + stw r26, 204(r1) # save R26 + stw r27, 208(r1) # save R27 + stw r28, 212(r1) # save R28 + stw r29, 216(r1) # save R29 + stw r30, 220(r1) # save R30 + stw r31, 224(r1) # save R31 + stw r3, 228(r1) # save hidden + + # save CR + mfcr r0 + stw r0, 232(r1) + # save LR + mflr r0 + stw r0, 236(r1) + # save LR as PC + stw r0, 240(r1) + + # store RSP (pointing to context-data) in R6 + mr r6, r1 + + # restore RSP (pointing to context-data) from R4 + mr r1, r4 + + lfd f14, 0(r1) # restore F14 + lfd f15, 8(r1) # restore F15 + lfd f16, 16(r1) # restore F16 + lfd f17, 24(r1) # restore F17 + lfd f18, 32(r1) # restore F18 + lfd f19, 40(r1) # restore F19 + lfd f20, 48(r1) # restore F20 + lfd f21, 56(r1) # restore F21 + lfd f22, 64(r1) # restore F22 + lfd f23, 72(r1) # restore F23 + lfd f24, 80(r1) # restore F24 + lfd f25, 88(r1) # restore F25 + lfd f26, 96(r1) # restore F26 + lfd f27, 104(r1) # restore F27 + lfd f28, 112(r1) # restore F28 + lfd f29, 120(r1) # restore F29 + lfd f30, 128(r1) # restore F30 + lfd f31, 136(r1) # restore F31 + lfd f0, 144(r1) # load FPSCR + mtfsf 0xff, f0 # restore FPSCR + + lwz r13, 152(r1) # restore R13 + lwz r14, 156(r1) # restore R14 + lwz r15, 160(r1) # restore R15 + lwz r16, 164(r1) # restore R16 + lwz r17, 168(r1) # restore R17 + lwz r18, 172(r1) # restore R18 + lwz r19, 176(r1) # restore R19 + lwz r20, 180(r1) # restore R20 + lwz r21, 184(r1) # restore R21 + lwz r22, 188(r1) # restore R22 + lwz r23, 192(r1) # restore R23 + lwz r24, 196(r1) # restore R24 + lwz r25, 200(r1) # restore R25 + lwz r26, 204(r1) # restore R26 + lwz r27, 208(r1) # restore R27 + lwz r28, 212(r1) # restore R28 + lwz r29, 216(r1) # restore R29 + lwz r30, 220(r1) # restore R30 + lwz r31, 224(r1) # restore R31 + lwz r3, 228(r1) # restore hidden + + # restore CR + lwz r0, 232(r1) + mtcr r0 + # restore LR + lwz r0, 236(r1) + mtlr r0 + # load PC + lwz r0, 240(r1) + # restore CTR + mtctr r0 + + # adjust stack + addi r1, r1, 244 + + # return transfer_t + stw r6, 0(r3) + stw r5, 4(r3) + + # jump to context + bctr diff --git a/lib/context_switcher/asm/fcontext/jump_ppc64_sysv_elf_gas.S b/lib/context_switcher/asm/fcontext/jump_ppc64_sysv_elf_gas.S new file mode 100644 index 0000000..28907db --- /dev/null +++ b/lib/context_switcher/asm/fcontext/jump_ppc64_sysv_elf_gas.S @@ -0,0 +1,221 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | TOC | R14 | R15 | R16 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | R17 | R18 | R19 | R20 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | R21 | R22 | R23 | R24 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | R25 | R26 | R27 | R28 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | * + * ------------------------------------------------- * + * | R29 | R30 | R31 | hidden | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | * + * ------------------------------------------------- * + * | 160 | 164 | 168 | 172 | 176 | 180 | 184 | 188 | * + * ------------------------------------------------- * + * | CR | LR | PC | back-chain| * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | * + * ------------------------------------------------- * + * | 192 | 196 | 200 | 204 | 208 | 212 | 216 | 220 | * + * ------------------------------------------------- * + * | cr saved | lr saved | compiler | linker | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | * + * ------------------------------------------------- * + * | 224 | 228 | 232 | 236 | 240 | 244 | 248 | 252 | * + * ------------------------------------------------- * + * | TOC saved | FCTX | DATA | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.file "jump_ppc64_sysv_elf_gas.S" +.globl jump_fcontext +#if _CALL_ELF == 2 + .text + .align 2 +jump_fcontext: + addis %r2, %r12, .TOC.-jump_fcontext@ha + addi %r2, %r2, .TOC.-jump_fcontext@l + .localentry jump_fcontext, . - jump_fcontext +#else + .section ".opd","aw" + .align 3 +jump_fcontext: +# ifdef _CALL_LINUX + .quad .L.jump_fcontext,.TOC.@tocbase,0 + .type jump_fcontext,@function + .text + .align 2 +.L.jump_fcontext: +# else + .hidden .jump_fcontext + .globl .jump_fcontext + .quad .jump_fcontext,.TOC.@tocbase,0 + .size jump_fcontext,24 + .type .jump_fcontext,@function + .text + .align 2 +.jump_fcontext: +# endif +#endif + # reserve space on stack + subi %r1, %r1, 184 + +#if _CALL_ELF != 2 + std %r2, 0(%r1) # save TOC +#endif + std %r14, 8(%r1) # save R14 + std %r15, 16(%r1) # save R15 + std %r16, 24(%r1) # save R16 + std %r17, 32(%r1) # save R17 + std %r18, 40(%r1) # save R18 + std %r19, 48(%r1) # save R19 + std %r20, 56(%r1) # save R20 + std %r21, 64(%r1) # save R21 + std %r22, 72(%r1) # save R22 + std %r23, 80(%r1) # save R23 + std %r24, 88(%r1) # save R24 + std %r25, 96(%r1) # save R25 + std %r26, 104(%r1) # save R26 + std %r27, 112(%r1) # save R27 + std %r28, 120(%r1) # save R28 + std %r29, 128(%r1) # save R29 + std %r30, 136(%r1) # save R30 + std %r31, 144(%r1) # save R31 +#if _CALL_ELF != 2 + std %r3, 152(%r1) # save hidden +#endif + + # save CR + mfcr %r0 + std %r0, 160(%r1) + # save LR + mflr %r0 + std %r0, 168(%r1) + # save LR as PC + std %r0, 176(%r1) + + # store RSP (pointing to context-data) in R6 + mr %r6, %r1 + +#if _CALL_ELF == 2 + # restore RSP (pointing to context-data) from R3 + mr %r1, %r3 +#else + # restore RSP (pointing to context-data) from R4 + mr %r1, %r4 + + ld %r2, 0(%r1) # restore TOC +#endif + ld %r14, 8(%r1) # restore R14 + ld %r15, 16(%r1) # restore R15 + ld %r16, 24(%r1) # restore R16 + ld %r17, 32(%r1) # restore R17 + ld %r18, 40(%r1) # restore R18 + ld %r19, 48(%r1) # restore R19 + ld %r20, 56(%r1) # restore R20 + ld %r21, 64(%r1) # restore R21 + ld %r22, 72(%r1) # restore R22 + ld %r23, 80(%r1) # restore R23 + ld %r24, 88(%r1) # restore R24 + ld %r25, 96(%r1) # restore R25 + ld %r26, 104(%r1) # restore R26 + ld %r27, 112(%r1) # restore R27 + ld %r28, 120(%r1) # restore R28 + ld %r29, 128(%r1) # restore R29 + ld %r30, 136(%r1) # restore R30 + ld %r31, 144(%r1) # restore R31 +#if _CALL_ELF != 2 + ld %r3, 152(%r1) # restore hidden +#endif + + # restore CR + ld %r0, 160(%r1) + mtcr %r0 + # restore LR + ld %r0, 168(%r1) + mtlr %r0 + + # load PC + ld %r12, 176(%r1) + # restore CTR + mtctr %r12 + + # adjust stack + addi %r1, %r1, 184 + +#if _CALL_ELF == 2 + # copy transfer_t into transfer_fn arg registers + mr %r3, %r6 + # arg pointer already in %r4 + + # jump to context + bctr + .size jump_fcontext, .-jump_fcontext +#else + # zero in r3 indicates first jump to context-function + cmpdi %r3, 0 + beq use_entry_arg + + # return transfer_t + std %r6, 0(%r3) + std %r5, 8(%r3) + + # jump to context + bctr + +use_entry_arg: + # copy transfer_t into transfer_fn arg registers + mr %r3, %r6 + mr %r4, %r5 + + # jump to context + bctr +# ifdef _CALL_LINUX + .size .jump_fcontext, .-.L.jump_fcontext +# else + .size .jump_fcontext, .-.jump_fcontext +# endif +#endif + + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/jump_ppc64_sysv_macho_gas.S b/lib/context_switcher/asm/fcontext/jump_ppc64_sysv_macho_gas.S new file mode 100644 index 0000000..74fcb2a --- /dev/null +++ b/lib/context_switcher/asm/fcontext/jump_ppc64_sysv_macho_gas.S @@ -0,0 +1,164 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | TOC | R14 | R15 | R16 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | R17 | R18 | R19 | R20 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | R21 | R22 | R23 | R24 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | R25 | R26 | R27 | R28 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | * + * ------------------------------------------------- * + * | R29 | R30 | R31 | hidden | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | * + * ------------------------------------------------- * + * | 160 | 164 | 168 | 172 | 176 | 180 | 184 | 188 | * + * ------------------------------------------------- * + * | CR | LR | PC | back-chain| * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | * + * ------------------------------------------------- * + * | 192 | 196 | 200 | 204 | 208 | 212 | 216 | 220 | * + * ------------------------------------------------- * + * | cr saved | lr saved | compiler | linker | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | * + * ------------------------------------------------- * + * | 224 | 228 | 232 | 236 | 240 | 244 | 248 | 252 | * + * ------------------------------------------------- * + * | TOC saved | FCTX | DATA | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.align 2 +.globl _jump_fcontext + +_jump_fcontext: + ; reserve space on stack + subi r1, r1, 184 + + std r14, 8(r1) ; save R14 + std r15, 16(r1) ; save R15 + std r16, 24(r1) ; save R16 + std r17, 32(r1) ; save R17 + std r18, 40(r1) ; save R18 + std r19, 48(r1) ; save R19 + std r20, 56(r1) ; save R20 + std r21, 64(r1) ; save R21 + std r22, 72(r1) ; save R22 + std r23, 80(r1) ; save R23 + std r24, 88(r1) ; save R24 + std r25, 96(r1) ; save R25 + std r26, 104(r1) ; save R26 + std r27, 112(r1) ; save R27 + std r28, 120(r1) ; save R28 + std r29, 128(r1) ; save R29 + std r30, 136(r1) ; save R30 + std r31, 144(r1) ; save R31 + std r3, 152(r1) ; save hidden + + ; save CR + mfcr r0 + std r0, 160(r1) + ; save LR + mflr r0 + std r0, 168(r1) + ; save LR as PC + std r0, 176(r1) + + ; store RSP (pointing to context-data) in R6 + mr r6, r1 + + ; restore RSP (pointing to context-data) from R4 + mr r1, r4 + + ld r14, 8(r1) ; restore R14 + ld r15, 16(r1) ; restore R15 + ld r16, 24(r1) ; restore R16 + ld r17, 32(r1) ; restore R17 + ld r18, 40(r1) ; restore R18 + ld r19, 48(r1) ; restore R19 + ld r20, 56(r1) ; restore R20 + ld r21, 64(r1) ; restore R21 + ld r22, 72(r1) ; restore R22 + ld r23, 80(r1) ; restore R23 + ld r24, 88(r1) ; restore R24 + ld r25, 96(r1) ; restore R25 + ld r26, 104(r1) ; restore R26 + ld r27, 112(r1) ; restore R27 + ld r28, 120(r1) ; restore R28 + ld r29, 128(r1) ; restore R29 + ld r30, 136(r1) ; restore R30 + ld r31, 144(r1) ; restore R31 + ld r3, 152(r1) ; restore hidden + + ; restore CR + ld r0, 160(r1) + mtcr r0 + ; restore LR + ld r0, 168(r1) + mtlr r0 + + ; load PC + ld r12, 176(r1) + # restore CTR + mtctr r12 + + # adjust stack + addi r1, r1, 184 + + # zero in r3 indicates first jump to context-function + cmpdi r3, 0 + beq use_entry_arg + + # return transfer_t + std r6, 0(r3) + std r5, 8(r3) + + # jump to context + bctr + +use_entry_arg: + # copy transfer_t into transfer_fn arg registers + mr r3, r6 + mr r4, r5 + + # jump to context + bctr diff --git a/lib/context_switcher/asm/fcontext/jump_ppc64_sysv_xcoff_gas.S b/lib/context_switcher/asm/fcontext/jump_ppc64_sysv_xcoff_gas.S new file mode 100644 index 0000000..ff0e6ea --- /dev/null +++ b/lib/context_switcher/asm/fcontext/jump_ppc64_sysv_xcoff_gas.S @@ -0,0 +1,92 @@ + +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +.align 2 +.globl .jump_fcontext +.jump_fcontext: + # reserve space on stack + subi 1, 1, 184 + + std 13, 0(1) # save R13 + std 14, 8(1) # save R14 + std 15, 16(1) # save R15 + std 16, 24(1) # save R16 + std 17, 32(1) # save R17 + std 18, 40(1) # save R18 + std 19, 48(1) # save R19 + std 20, 56(1) # save R20 + std 21, 64(1) # save R21 + std 22, 72(1) # save R22 + std 23, 80(1) # save R23 + std 24, 88(1) # save R24 + std 25, 96(1) # save R25 + std 26, 104(1) # save R26 + std 27, 112(1) # save R27 + std 29, 120(1) # save R28 + std 29, 128(1) # save R29 + std 30, 136(1) # save R30 + std 31, 144(1) # save R31 + std 3, 152(1) # save hidden + + # save CR + mfcr 0 + std 0, 160(1) + # save LR + mflr 0 + std 0, 168(1) + # save LR as PC + std 0, 176(1) + + # store RSP (pointing to context-data) in R6 + mr 6, 1 + + # restore RSP (pointing to context-data) from R4 + mr 1, 4 + + ld 13, 0(1) # restore R13 + ld 14, 8(1) # restore R14 + ld 15, 16(1) # restore R15 + ld 16, 24(1) # restore R16 + ld 17, 32(1) # restore R17 + ld 18, 40(1) # restore R18 + ld 19, 48(1) # restore R19 + ld 20, 56(1) # restore R20 + ld 21, 64(1) # restore R21 + ld 22, 72(1) # restore R22 + ld 23, 80(1) # restore R23 + ld 24, 88(1) # restore R24 + ld 25, 96(1) # restore R25 + ld 26, 104(1) # restore R26 + ld 27, 112(1) # restore R27 + ld 28, 120(1) # restore R28 + ld 29, 128(1) # restore R29 + ld 30, 136(1) # restore R30 + ld 31, 144(1) # restore R31 + ld 3, 152(1) # restore hidden + + # restore CR + ld 0, 160(1) + mtcr 0 + # restore LR + ld 0, 168(1) + mtlr 0 + + # load PC + ld 0, 176(1) + # restore CTR + mtctr 0 + + # adjust stack + addi 1, 1, 184 + + # return transfer_t + std 6, 0(3) + std 5, 8(3) + + # jump to context + bctr diff --git a/lib/context_switcher/asm/fcontext/jump_riscv64_sysv_elf_gas.S b/lib/context_switcher/asm/fcontext/jump_riscv64_sysv_elf_gas.S new file mode 100644 index 0000000..5417e5d --- /dev/null +++ b/lib/context_switcher/asm/fcontext/jump_riscv64_sysv_elf_gas.S @@ -0,0 +1,150 @@ +/* + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | fs0 | fs1 | fs2 | fs3 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | fs4 | fs5 | fs6 | fs7 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 0x40| 0x44| 0x48| 0x4c| 0x50| 0x54| 0x58| 0x5c| * + * ------------------------------------------------- * + * | fs8 | fs9 | fs10 | fs11 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 0x60| 0x64| 0x68| 0x6c| 0x70| 0x74| 0x78| 0x7c| * + * ------------------------------------------------- * + * | s0 | s1 | s2 | s3 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 0x80| 0x84| 0x88| 0x8c| 0x90| 0x94| 0x98| 0x9c| * + * ------------------------------------------------- * + * | s4 | s5 | s6 | s7 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | * + * ------------------------------------------------- * + * | 0xa0| 0xa4| 0xa8| 0xac| 0xb0| 0xb4| 0xb8| 0xbc| * + * ------------------------------------------------- * + * | s8 | s9 | s10 | s11 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 48 | 49 | 50 | 51 | | | | | * + * ------------------------------------------------- * + * | 0xc0| 0xc4| 0xc8| 0xcc| | | | | * + * ------------------------------------------------- * + * | ra | pc | | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.file "jump_riscv64_sysv_elf_gas.S" +.text +.align 1 +.global jump_fcontext +.type jump_fcontext, %function +jump_fcontext: + # prepare stack for GP + FPU + addi sp, sp, -0xd0 + + # save fs0 - fs11 + fsd fs0, 0x00(sp) + fsd fs1, 0x08(sp) + fsd fs2, 0x10(sp) + fsd fs3, 0x18(sp) + fsd fs4, 0x20(sp) + fsd fs5, 0x28(sp) + fsd fs6, 0x30(sp) + fsd fs7, 0x38(sp) + fsd fs8, 0x40(sp) + fsd fs9, 0x48(sp) + fsd fs10, 0x50(sp) + fsd fs11, 0x58(sp) + + # save s0-s11, ra + sd s0, 0x60(sp) + sd s1, 0x68(sp) + sd s2, 0x70(sp) + sd s3, 0x78(sp) + sd s4, 0x80(sp) + sd s5, 0x88(sp) + sd s6, 0x90(sp) + sd s7, 0x98(sp) + sd s8, 0xa0(sp) + sd s9, 0xa8(sp) + sd s10, 0xb0(sp) + sd s11, 0xb8(sp) + sd ra, 0xc0(sp) + + # save RA as PC + sd ra, 0xc8(sp) + + # store SP (pointing to context-data) in A2 + mv a2, sp + + # restore SP (pointing to context-data) from A0 + mv sp, a0 + + # load fs0 - fs11 + fld fs0, 0x00(sp) + fld fs1, 0x08(sp) + fld fs2, 0x10(sp) + fld fs3, 0x18(sp) + fld fs4, 0x20(sp) + fld fs5, 0x28(sp) + fld fs6, 0x30(sp) + fld fs7, 0x38(sp) + fld fs8, 0x40(sp) + fld fs9, 0x48(sp) + fld fs10, 0x50(sp) + fld fs11, 0x58(sp) + + # load s0-s11,ra + ld s0, 0x60(sp) + ld s1, 0x68(sp) + ld s2, 0x70(sp) + ld s3, 0x78(sp) + ld s4, 0x80(sp) + ld s5, 0x88(sp) + ld s6, 0x90(sp) + ld s7, 0x98(sp) + ld s8, 0xa0(sp) + ld s9, 0xa8(sp) + ld s10, 0xb0(sp) + ld s11, 0xb8(sp) + ld ra, 0xc0(sp) + + # return transfer_t from jump + # pass transfer_t as first arg in context function + # a0 == FCTX, a1 == DATA + mv a0, a2 + + # load pc + ld a2, 0xc8(sp) + + # restore stack from GP + FPU + addi sp, sp, 0xd0 + + jr a2 +.size jump_fcontext,.-jump_fcontext +# Mark that we don't need executable stack. +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/jump_s390x_sysv_elf_gas.S b/lib/context_switcher/asm/fcontext/jump_s390x_sysv_elf_gas.S new file mode 100644 index 0000000..b2163cc --- /dev/null +++ b/lib/context_switcher/asm/fcontext/jump_s390x_sysv_elf_gas.S @@ -0,0 +1,117 @@ +/******************************************************* +* * +* ------------------------------------------------- * +* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * +* ------------------------------------------------- * +* | 0 | 8 | 16 | 24 | * +* ------------------------------------------------- * +* | R6 | R7 | R8 | R9 | * +* ------------------------------------------------- * +* ------------------------------------------------- * +* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * +* ------------------------------------------------- * +* | 32 | 40 | 48 | 56 | * +* ------------------------------------------------- * +* | R10 | R11 | R12 | R13 | * +* ------------------------------------------------- * +* ------------------------------------------------- * +* | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * +* ------------------------------------------------- * +* | 64 | 72 | 80 | 88 | * +* ------------------------------------------------- * +* | R14/LR | R15 | F1 | F3 | * +* ------------------------------------------------- * +* ------------------------------------------------- * +* | 24 | 25 | 26 | 27 | 28 | 29 | | * +* ------------------------------------------------- * +* | 96 | 104 | 112 | 120 | * +* ------------------------------------------------- * +* | F5 | F7 | PC | | * +* ------------------------------------------------- * +* *****************************************************/ + +.file "jump_s390x_sysv_elf_gas.S" +.text +.align 4 # According to the sample code in the ELF ABI docs +.global jump_fcontext +.type jump_fcontext, @function + +#define GR_OFFSET 0 +#define LR_OFFSET 64 +#define SP_OFFSET 72 +#define FP_OFFSET 80 +#define PC_OFFSET 112 +#define L_CTX 120 +#define L_STACKFRAME 120 + +jump_fcontext: + + # Reserved the space for stack to store the data of current context + # before we jump to the new context. + aghi %r15,-L_STACKFRAME + + # save the registers to the stack + stmg %r6, %r15, GR_OFFSET(%r15) + + # save the floating point registers + std %f0,FP_OFFSET(%r15) + std %f3,FP_OFFSET+8(%r15) + std %f5,FP_OFFSET+16(%r15) + std %f7,FP_OFFSET+24(%r15) + + # Save LR as PC + stg %r14,PC_OFFSET(%r15) + + # Store the SP pointing to the old context-data into R0 + lgr %r0,%r15 + + # Get the SP pointing to the new context-data + # Note: Since the return type of the jump_fcontext is struct whose + # size is more than 8. The compiler automatically passes the + # address of the transfer_t where the data needs to store into R2. + + # Hence the first param passed to the jump_fcontext which represent + # the fctx we want to switch to is present in R3 + # R2 --> Address of the return transfer_t struct + # R3 --> Context we want to switch to + # R4 --> Data + lgr %r15,%r3 + + # Load the registers with the data present in context-data of the + # context we are going to switch to + lmg %r6, %r14, GR_OFFSET(%r15) + + # Restore Floating point registers + ld %f1,FP_OFFSET(%r15) + ld %f3,FP_OFFSET+8(%r15) + ld %f5,FP_OFFSET+16(%r15) + ld %f7,FP_OFFSET+24(%r15) + + # Load PC + lg %r1,PC_OFFSET(%r15) + + # Adjust the stack + aghi %r15,120 + + # R2 --> Address where the return transfer_t is stored + # R0 --> FCTX + # R4 --> DATA + + # Store the elements to return transfer_t + stg %r15, 0(%r2) + stg %r4, 8(%r2) + + # Note: The address in R2 points to the place where the return + # transfer_t is stored. Since context_function take transfer_t + # as first parameter. And R2 is the register which holds the + # first parameter value. + + #jump to context + br %r1 + +.size jump_fcontext,.-jump_fcontext +# Mark that we don't need executable stack. +.section .note.GNU-stack,"",%progbits + + + diff --git a/lib/context_switcher/asm/fcontext/jump_x86_64_ms_pe_gas.asm b/lib/context_switcher/asm/fcontext/jump_x86_64_ms_pe_gas.asm new file mode 100644 index 0000000..ec4ecfe --- /dev/null +++ b/lib/context_switcher/asm/fcontext/jump_x86_64_ms_pe_gas.asm @@ -0,0 +1,209 @@ +/* + Copyright Oliver Kowalke 2009. + Copyright Thomas Sailer 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/************************************************************************************* +* ---------------------------------------------------------------------------------- * +* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * +* ---------------------------------------------------------------------------------- * +* | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * +* ---------------------------------------------------------------------------------- * +* | SEE registers (XMM6-XMM15) | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * +* ---------------------------------------------------------------------------------- * +* | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | * +* ---------------------------------------------------------------------------------- * +* | SEE registers (XMM6-XMM15) | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * +* ---------------------------------------------------------------------------------- * +* | 0xe40 | 0x44 | 0x48 | 0x4c | 0x50 | 0x54 | 0x58 | 0x5c | * +* ---------------------------------------------------------------------------------- * +* | SEE registers (XMM6-XMM15) | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * +* ---------------------------------------------------------------------------------- * +* | 0x60 | 0x64 | 0x68 | 0x6c | 0x70 | 0x74 | 0x78 | 0x7c | * +* ---------------------------------------------------------------------------------- * +* | SEE registers (XMM6-XMM15) | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 32 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | * +* ---------------------------------------------------------------------------------- * +* | 0x80 | 0x84 | 0x88 | 0x8c | 0x90 | 0x94 | 0x98 | 0x9c | * +* ---------------------------------------------------------------------------------- * +* | SEE registers (XMM6-XMM15) | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | * +* ---------------------------------------------------------------------------------- * +* | 0xa0 | 0xa4 | 0xa8 | 0xac | 0xb0 | 0xb4 | 0xb8 | 0xbc | * +* ---------------------------------------------------------------------------------- * +* | fc_mxcsr|fc_x87_cw| | fbr_strg | fc_dealloc | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | * +* ---------------------------------------------------------------------------------- * +* | 0xc0 | 0xc4 | 0xc8 | 0xcc | 0xd0 | 0xd4 | 0xd8 | 0xdc | * +* ---------------------------------------------------------------------------------- * +* | limit | base | R12 | R13 | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | * +* ---------------------------------------------------------------------------------- * +* | 0xe0 | 0xe4 | 0xe8 | 0xec | 0xf0 | 0xf4 | 0xf8 | 0xfc | * +* ---------------------------------------------------------------------------------- * +* | R14 | R15 | RDI | RSI | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | * +* ---------------------------------------------------------------------------------- * +* | 0x100 | 0x104 | 0x108 | 0x10c | 0x110 | 0x114 | 0x118 | 0x11c | * +* ---------------------------------------------------------------------------------- * +* | RBX | RBP | hidden | RIP | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | * +* ---------------------------------------------------------------------------------- * +* | 0x120 | 0x124 | 0x128 | 0x12c | 0x130 | 0x134 | 0x138 | 0x13c | * +* ---------------------------------------------------------------------------------- * +* | parameter area | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | * +* ---------------------------------------------------------------------------------- * +* | 0x140 | 0x144 | 0x148 | 0x14c | 0x150 | 0x154 | 0x158 | 0x15c | * +* ---------------------------------------------------------------------------------- * +* | FCTX | DATA | | * +* ---------------------------------------------------------------------------------- * +**************************************************************************************/ + +.file "jump_x86_64_ms_pe_gas.asm" +.text +.p2align 4,,15 +.globl jump_fcontext +.def jump_fcontext; .scl 2; .type 32; .endef +.seh_proc jump_fcontext +jump_fcontext: +.seh_endprologue + + leaq -0x118(%rsp), %rsp /* prepare stack */ + +#if !defined(BOOST_USE_TSX) + /* save XMM storage */ + movaps %xmm6, 0x0(%rsp) + movaps %xmm7, 0x10(%rsp) + movaps %xmm8, 0x20(%rsp) + movaps %xmm9, 0x30(%rsp) + movaps %xmm10, 0x40(%rsp) + movaps %xmm11, 0x50(%rsp) + movaps %xmm12, 0x60(%rsp) + movaps %xmm13, 0x70(%rsp) + movaps %xmm14, 0x80(%rsp) + movaps %xmm15, 0x90(%rsp) + stmxcsr 0xa0(%rsp) /* save MMX control- and status-word */ + fnstcw 0xa4(%rsp) /* save x87 control-word */ +#endif + + /* load NT_TIB */ + movq %gs:(0x30), %r10 + /* save fiber local storage */ + movq 0x20(%r10), %rax + movq %rax, 0xb0(%rsp) + /* save current deallocation stack */ + movq 0x1478(%r10), %rax + movq %rax, 0xb8(%rsp) + /* save current stack limit */ + movq 0x10(%r10), %rax + movq %rax, 0xc0(%rsp) + /* save current stack base */ + movq 0x08(%r10), %rax + movq %rax, 0xc8(%rsp) + + movq %r12, 0xd0(%rsp) /* save R12 */ + movq %r13, 0xd8(%rsp) /* save R13 */ + movq %r14, 0xe0(%rsp) /* save R14 */ + movq %r15, 0xe8(%rsp) /* save R15 */ + movq %rdi, 0xf0(%rsp) /* save RDI */ + movq %rsi, 0xf8(%rsp) /* save RSI */ + movq %rbx, 0x100(%rsp) /* save RBX */ + movq %rbp, 0x108(%rsp) /* save RBP */ + + movq %rcx, 0x110(%rsp) /* save hidden address of transport_t */ + + /* preserve RSP (pointing to context-data) in R9 */ + movq %rsp, %r9 + + /* restore RSP (pointing to context-data) from RDX */ + movq %rdx, %rsp + +#if !defined(BOOST_USE_TSX) + /* restore XMM storage */ + movaps 0x0(%rsp), %xmm6 + movaps 0x10(%rsp), %xmm7 + movaps 0x20(%rsp), %xmm8 + movaps 0x30(%rsp), %xmm9 + movaps 0x40(%rsp), %xmm10 + movaps 0x50(%rsp), %xmm11 + movaps 0x60(%rsp), %xmm12 + movaps 0x70(%rsp), %xmm13 + movaps 0x80(%rsp), %xmm14 + movaps 0x90(%rsp), %xmm15 + ldmxcsr 0xa0(%rsp) /* restore MMX control- and status-word */ + fldcw 0xa4(%rsp) /* restore x87 control-word */ +#endif + + /* load NT_TIB */ + movq %gs:(0x30), %r10 + /* restore fiber local storage */ + movq 0xb0(%rsp), %rax + movq %rax, 0x20(%r10) + /* restore current deallocation stack */ + movq 0xb8(%rsp), %rax + movq %rax, 0x1478(%r10) + /* restore current stack limit */ + movq 0xc0(%rsp), %rax + movq %rax, 0x10(%r10) + /* restore current stack base */ + movq 0xc8(%rsp), %rax + movq %rax, 0x08(%r10) + + movq 0xd0(%rsp), %r12 /* restore R12 */ + movq 0xd8(%rsp), %r13 /* restore R13 */ + movq 0xe0(%rsp), %r14 /* restore R14 */ + movq 0xe8(%rsp), %r15 /* restore R15 */ + movq 0xf0(%rsp), %rdi /* restore RDI */ + movq 0xf8(%rsp), %rsi /* restore RSI */ + movq 0x100(%rsp), %rbx /* restore RBX */ + movq 0x108(%rsp), %rbp /* restore RBP */ + + movq 0x110(%rsp), %rax /* restore hidden address of transport_t */ + + leaq 0x118(%rsp), %rsp /* prepare stack */ + + /* restore return-address */ + popq %r10 + + /* transport_t returned in RAX */ + /* return parent fcontext_t */ + movq %r9, 0x0(%rax) + /* return data */ + movq %r8, 0x8(%rax) + + /* transport_t as 1.arg of context-function */ + movq %rax, %rcx + + /* indirect jump to context */ + jmp *%r10 +.seh_endproc + +.section .drectve +.ascii " -export:\"jump_fcontext\"" diff --git a/lib/context_switcher/asm/fcontext/jump_x86_64_ms_pe_masm.asm b/lib/context_switcher/asm/fcontext/jump_x86_64_ms_pe_masm.asm new file mode 100644 index 0000000..c8a28a5 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/jump_x86_64_ms_pe_masm.asm @@ -0,0 +1,205 @@ + +; Copyright Oliver Kowalke 2009. +; Distributed under the Boost Software License, Version 1.0. +; (See accompanying file LICENSE_1_0.txt or copy at +; http://www.boost.org/LICENSE_1_0.txt) + +; ---------------------------------------------------------------------------------- +; | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | +; ---------------------------------------------------------------------------------- +; | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | +; ---------------------------------------------------------------------------------- +; | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | +; ---------------------------------------------------------------------------------- +; | 0xe40 | 0x44 | 0x48 | 0x4c | 0x50 | 0x54 | 0x58 | 0x5c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | +; ---------------------------------------------------------------------------------- +; | 0x60 | 0x64 | 0x68 | 0x6c | 0x70 | 0x74 | 0x78 | 0x7c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 32 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | +; ---------------------------------------------------------------------------------- +; | 0x80 | 0x84 | 0x88 | 0x8c | 0x90 | 0x94 | 0x98 | 0x9c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | +; ---------------------------------------------------------------------------------- +; | 0xa0 | 0xa4 | 0xa8 | 0xac | 0xb0 | 0xb4 | 0xb8 | 0xbc | +; ---------------------------------------------------------------------------------- +; | fc_mxcsr|fc_x87_cw| | fbr_strg | fc_dealloc | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | +; ---------------------------------------------------------------------------------- +; | 0xc0 | 0xc4 | 0xc8 | 0xcc | 0xd0 | 0xd4 | 0xd8 | 0xdc | +; ---------------------------------------------------------------------------------- +; | limit | base | R12 | R13 | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | +; ---------------------------------------------------------------------------------- +; | 0xe0 | 0xe4 | 0xe8 | 0xec | 0xf0 | 0xf4 | 0xf8 | 0xfc | +; ---------------------------------------------------------------------------------- +; | R14 | R15 | RDI | RSI | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | +; ---------------------------------------------------------------------------------- +; | 0x100 | 0x104 | 0x108 | 0x10c | 0x110 | 0x114 | 0x118 | 0x11c | +; ---------------------------------------------------------------------------------- +; | RBX | RBP | hidden | RIP | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | +; ---------------------------------------------------------------------------------- +; | 0x120 | 0x124 | 0x128 | 0x12c | 0x130 | 0x134 | 0x138 | 0x13c | +; ---------------------------------------------------------------------------------- +; | parameter area | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | +; ---------------------------------------------------------------------------------- +; | 0x140 | 0x144 | 0x148 | 0x14c | 0x150 | 0x154 | 0x158 | 0x15c | +; ---------------------------------------------------------------------------------- +; | FCTX | DATA | | +; ---------------------------------------------------------------------------------- + +.code + +jump_fcontext PROC BOOST_CONTEXT_EXPORT FRAME + .endprolog + + ; prepare stack + lea rsp, [rsp-0118h] + +IFNDEF BOOST_USE_TSX + ; save XMM storage + movaps [rsp], xmm6 + movaps [rsp+010h], xmm7 + movaps [rsp+020h], xmm8 + movaps [rsp+030h], xmm9 + movaps [rsp+040h], xmm10 + movaps [rsp+050h], xmm11 + movaps [rsp+060h], xmm12 + movaps [rsp+070h], xmm13 + movaps [rsp+080h], xmm14 + movaps [rsp+090h], xmm15 + ; save MMX control- and status-word + stmxcsr [rsp+0a0h] + ; save x87 control-word + fnstcw [rsp+0a4h] +ENDIF + + ; load NT_TIB + mov r10, gs:[030h] + ; save fiber local storage + mov rax, [r10+020h] + mov [rsp+0b0h], rax + ; save current deallocation stack + mov rax, [r10+01478h] + mov [rsp+0b8h], rax + ; save current stack limit + mov rax, [r10+010h] + mov [rsp+0c0h], rax + ; save current stack base + mov rax, [r10+08h] + mov [rsp+0c8h], rax + + mov [rsp+0d0h], r12 ; save R12 + mov [rsp+0d8h], r13 ; save R13 + mov [rsp+0e0h], r14 ; save R14 + mov [rsp+0e8h], r15 ; save R15 + mov [rsp+0f0h], rdi ; save RDI + mov [rsp+0f8h], rsi ; save RSI + mov [rsp+0100h], rbx ; save RBX + mov [rsp+0108h], rbp ; save RBP + + mov [rsp+0110h], rcx ; save hidden address of transport_t + + ; preserve RSP (pointing to context-data) in R9 + mov r9, rsp + + ; restore RSP (pointing to context-data) from RDX + mov rsp, rdx + +IFNDEF BOOST_USE_TSX + ; restore XMM storage + movaps xmm6, [rsp] + movaps xmm7, [rsp+010h] + movaps xmm8, [rsp+020h] + movaps xmm9, [rsp+030h] + movaps xmm10, [rsp+040h] + movaps xmm11, [rsp+050h] + movaps xmm12, [rsp+060h] + movaps xmm13, [rsp+070h] + movaps xmm14, [rsp+080h] + movaps xmm15, [rsp+090h] + ; restore MMX control- and status-word + ldmxcsr [rsp+0a0h] + ; save x87 control-word + fldcw [rsp+0a4h] +ENDIF + + ; load NT_TIB + mov r10, gs:[030h] + ; restore fiber local storage + mov rax, [rsp+0b0h] + mov [r10+020h], rax + ; restore current deallocation stack + mov rax, [rsp+0b8h] + mov [r10+01478h], rax + ; restore current stack limit + mov rax, [rsp+0c0h] + mov [r10+010h], rax + ; restore current stack base + mov rax, [rsp+0c8h] + mov [r10+08h], rax + + mov r12, [rsp+0d0h] ; restore R12 + mov r13, [rsp+0d8h] ; restore R13 + mov r14, [rsp+0e0h] ; restore R14 + mov r15, [rsp+0e8h] ; restore R15 + mov rdi, [rsp+0f0h] ; restore RDI + mov rsi, [rsp+0f8h] ; restore RSI + mov rbx, [rsp+0100h] ; restore RBX + mov rbp, [rsp+0108h] ; restore RBP + + mov rax, [rsp+0110h] ; restore hidden address of transport_t + + ; prepare stack + lea rsp, [rsp+0118h] + + ; load return-address + pop r10 + + ; transport_t returned in RAX + ; return parent fcontext_t + mov [rax], r9 + ; return data + mov [rax+08h], r8 + + ; transport_t as 1.arg of context-function + mov rcx, rax + + ; indirect jump to context + jmp r10 +jump_fcontext ENDP +END diff --git a/lib/context_switcher/asm/fcontext/jump_x86_64_sysv_elf_gas.S b/lib/context_switcher/asm/fcontext/jump_x86_64_sysv_elf_gas.S new file mode 100644 index 0000000..d0defc4 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/jump_x86_64_sysv_elf_gas.S @@ -0,0 +1,81 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/**************************************************************************************** + * * + * ---------------------------------------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ---------------------------------------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * + * ---------------------------------------------------------------------------------- * + * | fc_mxcsr|fc_x87_cw| R12 | R13 | R14 | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ---------------------------------------------------------------------------------- * + * | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | * + * ---------------------------------------------------------------------------------- * + * | R15 | RBX | RBP | RIP | * + * ---------------------------------------------------------------------------------- * + * * + ****************************************************************************************/ + +.file "jump_x86_64_sysv_elf_gas.S" +.text +.globl jump_fcontext +.type jump_fcontext,@function +.align 16 +jump_fcontext: + leaq -0x38(%rsp), %rsp /* prepare stack */ + +#if !defined(BOOST_USE_TSX) + stmxcsr (%rsp) /* save MMX control- and status-word */ + fnstcw 0x4(%rsp) /* save x87 control-word */ +#endif + + movq %r12, 0x8(%rsp) /* save R12 */ + movq %r13, 0x10(%rsp) /* save R13 */ + movq %r14, 0x18(%rsp) /* save R14 */ + movq %r15, 0x20(%rsp) /* save R15 */ + movq %rbx, 0x28(%rsp) /* save RBX */ + movq %rbp, 0x30(%rsp) /* save RBP */ + + /* store RSP (pointing to context-data) in RAX */ + movq %rsp, %rax + + /* restore RSP (pointing to context-data) from RDI */ + movq %rdi, %rsp + + movq 0x38(%rsp), %r8 /* restore return-address */ + +#if !defined(BOOST_USE_TSX) + ldmxcsr (%rsp) /* restore MMX control- and status-word */ + fldcw 0x4(%rsp) /* restore x87 control-word */ +#endif + + movq 0x8(%rsp), %r12 /* restore R12 */ + movq 0x10(%rsp), %r13 /* restore R13 */ + movq 0x18(%rsp), %r14 /* restore R14 */ + movq 0x20(%rsp), %r15 /* restore R15 */ + movq 0x28(%rsp), %rbx /* restore RBX */ + movq 0x30(%rsp), %rbp /* restore RBP */ + + leaq 0x40(%rsp), %rsp /* prepare stack */ + + /* return transfer_t from jump */ + /* RAX == fctx, RDX == data */ + movq %rsi, %rdx + /* pass transfer_t as first arg in context function */ + /* RDI == fctx, RSI == data */ + movq %rax, %rdi + + /* indirect jump to context */ + jmp *%r8 +.size jump_fcontext,.-jump_fcontext + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/jump_x86_64_sysv_macho_gas.S b/lib/context_switcher/asm/fcontext/jump_x86_64_sysv_macho_gas.S new file mode 100644 index 0000000..afc3e5c --- /dev/null +++ b/lib/context_switcher/asm/fcontext/jump_x86_64_sysv_macho_gas.S @@ -0,0 +1,75 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/**************************************************************************************** + * * + * ---------------------------------------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ---------------------------------------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * + * ---------------------------------------------------------------------------------- * + * | fc_mxcsr|fc_x87_cw| R12 | R13 | R14 | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ---------------------------------------------------------------------------------- * + * | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | * + * ---------------------------------------------------------------------------------- * + * | R15 | RBX | RBP | RIP | * + * ---------------------------------------------------------------------------------- * + * * + ****************************************************************************************/ + +.text +.globl _jump_fcontext +.align 8 +_jump_fcontext: + leaq -0x38(%rsp), %rsp /* prepare stack */ + +#if !defined(BOOST_USE_TSX) + stmxcsr (%rsp) /* save MMX control- and status-word */ + fnstcw 0x4(%rsp) /* save x87 control-word */ +#endif + + movq %r12, 0x8(%rsp) /* save R12 */ + movq %r13, 0x10(%rsp) /* save R13 */ + movq %r14, 0x18(%rsp) /* save R14 */ + movq %r15, 0x20(%rsp) /* save R15 */ + movq %rbx, 0x28(%rsp) /* save RBX */ + movq %rbp, 0x30(%rsp) /* save RBP */ + + /* store RSP (pointing to context-data) in RAX */ + movq %rsp, %rax + + /* restore RSP (pointing to context-data) from RDI */ + movq %rdi, %rsp + + movq 0x38(%rsp), %r8 /* restore return-address */ + +#if !defined(BOOST_USE_TSX) + ldmxcsr (%rsp) /* restore MMX control- and status-word */ + fldcw 0x4(%rsp) /* restore x87 control-word */ +#endif + + movq 0x8(%rsp), %r12 /* restore R12 */ + movq 0x10(%rsp), %r13 /* restore R13 */ + movq 0x18(%rsp), %r14 /* restore R14 */ + movq 0x20(%rsp), %r15 /* restore R15 */ + movq 0x28(%rsp), %rbx /* restore RBX */ + movq 0x30(%rsp), %rbp /* restore RBP */ + + leaq 0x40(%rsp), %rsp /* prepare stack */ + + /* return transfer_t from jump */ + /* RAX == fctx, RDX == data */ + movq %rsi, %rdx + /* pass transfer_t as first arg in context function */ + /* RDI == fctx, RSI == data */ + movq %rax, %rdi + + /* indirect jump to context */ + jmp *%r8 diff --git a/lib/context_switcher/asm/fcontext/make_arm64_aapcs_elf_gas.S b/lib/context_switcher/asm/fcontext/make_arm64_aapcs_elf_gas.S new file mode 100644 index 0000000..66cfb2d --- /dev/null +++ b/lib/context_switcher/asm/fcontext/make_arm64_aapcs_elf_gas.S @@ -0,0 +1,85 @@ +/* + Copyright Edward Nevill + Oliver Kowalke 2015 + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | d8 | d9 | d10 | d11 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | d12 | d13 | d14 | d15 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 0x40| 0x44| 0x48| 0x4c| 0x50| 0x54| 0x58| 0x5c| * + * ------------------------------------------------- * + * | x19 | x20 | x21 | x22 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 0x60| 0x64| 0x68| 0x6c| 0x70| 0x74| 0x78| 0x7c| * + * ------------------------------------------------- * + * | x23 | x24 | x25 | x26 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 0x80| 0x84| 0x88| 0x8c| 0x90| 0x94| 0x98| 0x9c| * + * ------------------------------------------------- * + * | x27 | x28 | FP | LR | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | | | * + * ------------------------------------------------- * + * | 0xa0| 0xa4| 0xa8| 0xac| | | * + * ------------------------------------------------- * + * | PC | align | | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.file "make_arm64_aapcs_elf_gas.S" +.text +.align 2 +.global make_fcontext +.type make_fcontext, %function +make_fcontext: + # shift address in x0 (allocated stack) to lower 16 byte boundary + and x0, x0, ~0xF + + # reserve space for context-data on context-stack + sub x0, x0, #0xb0 + + # third arg of make_fcontext() == address of context-function + # store address as a PC to jump in + str x2, [x0, #0xa0] + + # save address of finish as return-address for context-function + # will be entered after context-function returns (LR register) + adr x1, finish + str x1, [x0, #0x98] + + ret x30 // return pointer to context-data (x0) + +finish: + # exit code is zero + mov x0, #0 + # exit application + bl _exit + +.size make_fcontext,.-make_fcontext +# Mark that we don't need executable stack. +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/make_arm64_aapcs_macho_gas.S b/lib/context_switcher/asm/fcontext/make_arm64_aapcs_macho_gas.S new file mode 100644 index 0000000..a3716ff --- /dev/null +++ b/lib/context_switcher/asm/fcontext/make_arm64_aapcs_macho_gas.S @@ -0,0 +1,88 @@ +/* + Copyright Edward Nevill + Oliver Kowalke 2015 + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | d8 | d9 | d10 | d11 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | d12 | d13 | d14 | d15 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 0x40| 0x44| 0x48| 0x4c| 0x50| 0x54| 0x58| 0x5c| * + * ------------------------------------------------- * + * | x19 | x20 | x21 | x22 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 0x60| 0x64| 0x68| 0x6c| 0x70| 0x74| 0x78| 0x7c| * + * ------------------------------------------------- * + * | x23 | x24 | x25 | x26 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 0x80| 0x84| 0x88| 0x8c| 0x90| 0x94| 0x98| 0x9c| * + * ------------------------------------------------- * + * | x27 | x28 | FP | LR | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | | | * + * ------------------------------------------------- * + * | 0xa0| 0xa4| 0xa8| 0xac| | | * + * ------------------------------------------------- * + * | PC | align | | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.globl _make_fcontext +.balign 16 + +_make_fcontext: + ; shift address in x0 (allocated stack) to lower 16 byte boundary + and x0, x0, ~0xF + + ; reserve space for context-data on context-stack + sub x0, x0, #0xb0 + + ; third arg of make_fcontext() == address of context-function + ; store address as a PC to jump in + str x2, [x0, #0xa0] + + ; compute abs address of label finish + ; 0x0c = 3 instructions * size (4) before label 'finish' + + ; TODO: Numeric offset since llvm still does not support labels in ADR. Fix: + ; http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20140407/212336.html + adr x1, 0x0c + + ; save address of finish as return-address for context-function + ; will be entered after context-function returns (LR register) + str x1, [x0, #0x98] + + ret lr ; return pointer to context-data (x0) + +finish: + ; exit code is zero + mov x0, #0 + ; exit application + bl __exit + + diff --git a/lib/context_switcher/asm/fcontext/make_arm_aapcs_elf_gas.S b/lib/context_switcher/asm/fcontext/make_arm_aapcs_elf_gas.S new file mode 100644 index 0000000..98ae64b --- /dev/null +++ b/lib/context_switcher/asm/fcontext/make_arm_aapcs_elf_gas.S @@ -0,0 +1,81 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | s16 | s17 | s18 | s19 | s20 | s21 | s22 | s23 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | s24 | s25 | s26 | s27 | s28 | s29 | s30 | s31 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 0x40| 0x44| 0x48| 0x4c| 0x50| 0x54| 0x58| 0x5c| * + * ------------------------------------------------- * + * |hiddn| v1 | v2 | v3 | v4 | v5 | v6 | v7 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 0x60| 0x64| 0x68| 0x6c| 0x70| 0x74| 0x78| 0x7c| * + * ------------------------------------------------- * + * | v8 | lr | pc | FCTX| DATA| | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.file "make_arm_aapcs_elf_gas.S" +.text +.globl make_fcontext +.align 2 +.type make_fcontext,%function +.syntax unified +make_fcontext: + @ shift address in A1 to lower 16 byte boundary + bic a1, a1, #15 + + @ reserve space for context-data on context-stack + sub a1, a1, #124 + + @ third arg of make_fcontext() == address of context-function + str a3, [a1, #104] + + @ compute address of returned transfer_t + add a2, a1, #108 + mov a3, a2 + str a3, [a1, #64] + + @ compute abs address of label finish + adr a2, finish + @ save address of finish as return-address for context-function + @ will be entered after context-function returns + str a2, [a1, #100] + +#if (defined(__VFP_FP__) && !defined(__SOFTFP__)) +#endif + + bx lr @ return pointer to context-data + +finish: + @ exit code is zero + mov a1, #0 + @ exit application + bl _exit@PLT +.size make_fcontext,.-make_fcontext + +@ Mark that we don't need executable stack. +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/make_arm_aapcs_macho_gas.S b/lib/context_switcher/asm/fcontext/make_arm_aapcs_macho_gas.S new file mode 100644 index 0000000..c909ae9 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/make_arm_aapcs_macho_gas.S @@ -0,0 +1,71 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | s16 | s17 | s18 | s19 | s20 | s21 | s22 | s23 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | s24 | s25 | s26 | s27 | s28 | s29 | s30 | s31 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | sjlj|hiddn| v1 | v2 | v3 | v4 | v5 | v6 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | v7 | v8 | lr | pc | FCTX| DATA| | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.globl _make_fcontext +.align 2 +_make_fcontext: + @ shift address in A1 to lower 16 byte boundary + bic a1, a1, #15 + + @ reserve space for context-data on context-stack + sub a1, a1, #124 + + @ third arg of make_fcontext() == address of context-function + str a3, [a1, #108] + + @ compute address of returned transfer_t + add a2, a1, #112 + mov a3, a2 + str a3, [a1, #68] + + @ compute abs address of label finish + adr a2, finish + @ save address of finish as return-address for context-function + @ will be entered after context-function returns + str a2, [a1, #104] + + bx lr @ return pointer to context-data + +finish: + @ exit code is zero + mov a1, #0 + @ exit application + bl __exit diff --git a/lib/context_switcher/asm/fcontext/make_arm_aapcs_pe_armasm.asm b/lib/context_switcher/asm/fcontext/make_arm_aapcs_pe_armasm.asm new file mode 100644 index 0000000..27cbfb0 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/make_arm_aapcs_pe_armasm.asm @@ -0,0 +1,77 @@ +;/* +; Copyright Oliver Kowalke 2009. +; Distributed under the Boost Software License, Version 1.0. +; (See accompanying file LICENSE_1_0.txt or copy at +; http://www.boost.org/LICENSE_1_0.txt) +;*/ + +; ******************************************************* +; * * +; * ------------------------------------------------- * +; * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * +; * ------------------------------------------------- * +; * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * +; * ------------------------------------------------- * +; * |deall|limit| base|hiddn| v1 | v2 | v3 | v4 | * +; * ------------------------------------------------- * +; * ------------------------------------------------- * +; * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * +; * ------------------------------------------------- * +; * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * +; * ------------------------------------------------- * +; * | v5 | v6 | v7 | v8 | lr | pc | FCTX| DATA| * +; * ------------------------------------------------- * +; * * +; ******************************************************* + + + AREA |.text|, CODE + ALIGN 4 + EXPORT make_fcontext + IMPORT _exit + +make_fcontext PROC + ; first arg of make_fcontext() == top of context-stack + ; save top of context-stack (base) A4 + mov a4, a1 + + ; shift address in A1 to lower 16 byte boundary + bic a1, a1, #0x0f + + ; reserve space for context-data on context-stack + sub a1, a1, #0x48 + + ; save top address of context_stack as 'base' + str a4, [a1, #0x8] + ; second arg of make_fcontext() == size of context-stack + ; compute bottom address of context-stack (limit) + sub a4, a4, a2 + ; save bottom address of context-stack as 'limit' + str a4, [a1, #0x4] + ; save bottom address of context-stack as 'dealloction stack' + str a4, [a1, #0x0] + + ; third arg of make_fcontext() == address of context-function + str a3, [a1, #0x34] + + ; compute address of returned transfer_t + add a2, a1, #0x38 + mov a3, a2 + str a3, [a1, #0xc] + + ; compute abs address of label finish + adr a2, finish + ; save address of finish as return-address for context-function + ; will be entered after context-function returns + str a2, [a1, #0x30] + + bx lr ; return pointer to context-data + +finish + ; exit code is zero + mov a1, #0 + ; exit application + bl _exit + + ENDP + END diff --git a/lib/context_switcher/asm/fcontext/make_combined_sysv_macho_gas.S b/lib/context_switcher/asm/fcontext/make_combined_sysv_macho_gas.S new file mode 100644 index 0000000..727e904 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/make_combined_sysv_macho_gas.S @@ -0,0 +1,20 @@ +/* + Copyright Sergue E. Leontiev 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +// Stub file for universal binary + +#if defined(__i386__) + #include "make_i386_sysv_macho_gas.S" +#elif defined(__x86_64__) + #include "make_x86_64_sysv_macho_gas.S" +#elif defined(__ppc__) + #include "make_ppc32_sysv_macho_gas.S" +#elif defined(__ppc64__) + #include "make_ppc64_sysv_macho_gas.S" +#else + #error "No arch's" +#endif diff --git a/lib/context_switcher/asm/fcontext/make_i386_ms_pe_gas.asm b/lib/context_switcher/asm/fcontext/make_i386_ms_pe_gas.asm new file mode 100644 index 0000000..10277bb --- /dev/null +++ b/lib/context_switcher/asm/fcontext/make_i386_ms_pe_gas.asm @@ -0,0 +1,153 @@ +/* + Copyright Oliver Kowalke 2009. + Copyright Thomas Sailer 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/************************************************************************************* +* --------------------------------------------------------------------------------- * +* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * +* --------------------------------------------------------------------------------- * +* | 0h | 04h | 08h | 0ch | 010h | 014h | 018h | 01ch | * +* --------------------------------------------------------------------------------- * +* | fc_mxcsr|fc_x87_cw| fc_strg |fc_deallo| limit | base | fc_seh | EDI | * +* --------------------------------------------------------------------------------- * +* --------------------------------------------------------------------------------- * +* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * +* --------------------------------------------------------------------------------- * +* | 020h | 024h | 028h | 02ch | 030h | 034h | 038h | 03ch | * +* --------------------------------------------------------------------------------- * +* | ESI | EBX | EBP | EIP | to | data | EH NXT |SEH HNDLR| * +* --------------------------------------------------------------------------------- * +**************************************************************************************/ + +.file "make_i386_ms_pe_gas.asm" +.text +.p2align 4,,15 + +/* mark as using no unregistered SEH handlers */ +.globl @feat.00 +.def @feat.00; .scl 3; .type 0; .endef +.set @feat.00, 1 + +.globl _make_fcontext +.def _make_fcontext; .scl 2; .type 32; .endef +_make_fcontext: + /* first arg of make_fcontext() == top of context-stack */ + movl 0x04(%esp), %eax + + /* reserve space for first argument of context-function */ + /* EAX might already point to a 16byte border */ + leal -0x8(%eax), %eax + + /* shift address in EAX to lower 16 byte boundary */ + andl $-16, %eax + + /* reserve space for context-data on context-stack */ + /* size for fc_mxcsr .. EIP + return-address for context-function */ + /* on context-function entry: (ESP -0x4) % 8 == 0 */ + /* additional space is required for SEH */ + leal -0x40(%eax), %eax + + /* save MMX control- and status-word */ + stmxcsr (%eax) + /* save x87 control-word */ + fnstcw 0x4(%eax) + + /* first arg of make_fcontext() == top of context-stack */ + movl 0x4(%esp), %ecx + /* save top address of context stack as 'base' */ + movl %ecx, 0x14(%eax) + /* second arg of make_fcontext() == size of context-stack */ + movl 0x8(%esp), %edx + /* negate stack size for LEA instruction (== substraction) */ + negl %edx + /* compute bottom address of context stack (limit) */ + leal (%ecx,%edx), %ecx + /* save bottom address of context-stack as 'limit' */ + movl %ecx, 0x10(%eax) + /* save bottom address of context-stack as 'dealloction stack' */ + movl %ecx, 0xc(%eax) + /* set fiber-storage to zero */ + xorl %ecx, %ecx + movl %ecx, 0x8(%eax) + + /* third arg of make_fcontext() == address of context-function */ + /* stored in EBX */ + movl 0xc(%esp), %ecx + movl %ecx, 0x24(%eax) + + /* compute abs address of label trampoline */ + movl $trampoline, %ecx + /* save address of trampoline as return-address for context-function */ + /* will be entered after calling jump_fcontext() first time */ + movl %ecx, 0x2c(%eax) + + /* compute abs address of label finish */ + movl $finish, %ecx + /* save address of finish as return-address for context-function */ + /* will be entered after context-function returns */ + movl %ecx, 0x28(%eax) + + /* traverse current seh chain to get the last exception handler installed by Windows */ + /* note that on Windows Server 2008 and 2008 R2, SEHOP is activated by default */ + /* the exception handler chain is tested for the presence of ntdll.dll!FinalExceptionHandler */ + /* at its end by RaiseException all seh andlers are disregarded if not present and the */ + /* program is aborted */ + /* load NT_TIB into ECX */ + movl %fs:(0x0), %ecx + +walk: + /* load 'next' member of current SEH into EDX */ + movl (%ecx), %edx + /* test if 'next' of current SEH is last (== 0xffffffff) */ + incl %edx + jz found + decl %edx + /* exchange content; ECX contains address of next SEH */ + xchgl %ecx, %edx + /* inspect next SEH */ + jmp walk + +found: + /* load 'handler' member of SEH == address of last SEH handler installed by Windows */ + movl 0x04(%ecx), %ecx + /* save address in ECX as SEH handler for context */ + movl %ecx, 0x3c(%eax) + /* set ECX to -1 */ + movl $0xffffffff, %ecx + /* save ECX as next SEH item */ + movl %ecx, 0x38(%eax) + /* load address of next SEH item */ + leal 0x38(%eax), %ecx + /* save next SEH */ + movl %ecx, 0x18(%eax) + + /* return pointer to context-data */ + ret + +trampoline: + /* move transport_t for entering context-function */ + /* FCTX == EAX, DATA == EDX */ + movl %eax, (%esp) + movl %edx, 0x4(%esp) + /* label finish as return-address */ + pushl %ebp + /* jump to context-function */ + jmp *%ebx + +finish: + /* ESP points to same address as ESP on entry of context function + 0x4 */ + xorl %eax, %eax + /* exit code is zero */ + movl %eax, (%esp) + /* exit application */ + call __exit + hlt + +.def __exit; .scl 2; .type 32; .endef /* standard C library function */ + +.section .drectve +.ascii " -export:\"_make_fcontext\"" diff --git a/lib/context_switcher/asm/fcontext/make_i386_ms_pe_masm.asm b/lib/context_switcher/asm/fcontext/make_i386_ms_pe_masm.asm new file mode 100644 index 0000000..5246465 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/make_i386_ms_pe_masm.asm @@ -0,0 +1,140 @@ + +; Copyright Oliver Kowalke 2009. +; Distributed under the Boost Software License, Version 1.0. +; (See accompanying file LICENSE_1_0.txt or copy at +; http://www.boost.org/LICENSE_1_0.txt) + +; --------------------------------------------------------------------------------- +; | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | +; --------------------------------------------------------------------------------- +; | 0h | 04h | 08h | 0ch | 010h | 014h | 018h | 01ch | +; --------------------------------------------------------------------------------- +; | fc_mxcsr|fc_x87_cw| fc_strg |fc_deallo| limit | base | fc_seh | EDI | +; --------------------------------------------------------------------------------- +; --------------------------------------------------------------------------------- +; | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | +; --------------------------------------------------------------------------------- +; | 020h | 024h | 028h | 02ch | 030h | 034h | 038h | 03ch | +; --------------------------------------------------------------------------------- +; | ESI | EBX | EBP | EIP | to | data | EH NXT |SEH HNDLR| +; --------------------------------------------------------------------------------- + +.386 +.XMM +.model flat, c +; standard C library function +_exit PROTO, value:SDWORD +.code + +make_fcontext PROC BOOST_CONTEXT_EXPORT + ; first arg of make_fcontext() == top of context-stack + mov eax, [esp+04h] + + ; reserve space for first argument of context-function + ; EAX might already point to a 16byte border + lea eax, [eax-08h] + + ; shift address in EAX to lower 16 byte boundary + and eax, -16 + + ; reserve space for context-data on context-stack + ; on context-function entry: (ESP -0x4) % 8 == 0 + ; additional space is required for SEH + lea eax, [eax-040h] + + ; save MMX control- and status-word + stmxcsr [eax] + ; save x87 control-word + fnstcw [eax+04h] + + ; first arg of make_fcontext() == top of context-stack + mov ecx, [esp+04h] + ; save top address of context stack as 'base' + mov [eax+014h], ecx + ; second arg of make_fcontext() == size of context-stack + mov edx, [esp+08h] + ; negate stack size for LEA instruction (== substraction) + neg edx + ; compute bottom address of context stack (limit) + lea ecx, [ecx+edx] + ; save bottom address of context-stack as 'limit' + mov [eax+010h], ecx + ; save bottom address of context-stack as 'dealloction stack' + mov [eax+0ch], ecx + ; set fiber-storage to zero + xor ecx, ecx + mov [eax+08h], ecx + + ; third arg of make_fcontext() == address of context-function + ; stored in EBX + mov ecx, [esp+0ch] + mov [eax+024h], ecx + + ; compute abs address of label trampoline + mov ecx, trampoline + ; save address of trampoline as return-address for context-function + ; will be entered after calling jump_fcontext() first time + mov [eax+02ch], ecx + + ; compute abs address of label finish + mov ecx, finish + ; save address of finish as return-address for context-function in EBP + ; will be entered after context-function returns + mov [eax+028h], ecx + + ; traverse current seh chain to get the last exception handler installed by Windows + ; note that on Windows Server 2008 and 2008 R2, SEHOP is activated by default + ; the exception handler chain is tested for the presence of ntdll.dll!FinalExceptionHandler + ; at its end by RaiseException all seh-handlers are disregarded if not present and the + ; program is aborted + assume fs:nothing + ; load NT_TIB into ECX + mov ecx, fs:[0h] + assume fs:error + +walk: + ; load 'next' member of current SEH into EDX + mov edx, [ecx] + ; test if 'next' of current SEH is last (== 0xffffffff) + inc edx + jz found + dec edx + ; exchange content; ECX contains address of next SEH + xchg edx, ecx + ; inspect next SEH + jmp walk + +found: + ; load 'handler' member of SEH == address of last SEH handler installed by Windows + mov ecx, [ecx+04h] + ; save address in ECX as SEH handler for context + mov [eax+03ch], ecx + ; set ECX to -1 + mov ecx, 0ffffffffh + ; save ECX as next SEH item + mov [eax+038h], ecx + ; load address of next SEH item + lea ecx, [eax+038h] + ; save next SEH + mov [eax+018h], ecx + + ret ; return pointer to context-data + +trampoline: + ; move transport_t for entering context-function + ; FCTX == EAX, DATA == EDX + mov [esp], eax + mov [esp+04h], edx + push ebp + ; jump to context-function + jmp ebx + +finish: + ; exit code is zero + xor eax, eax + mov [esp], eax + ; exit application + call _exit + hlt +make_fcontext ENDP +END diff --git a/lib/context_switcher/asm/fcontext/make_i386_sysv_elf_gas.S b/lib/context_switcher/asm/fcontext/make_i386_sysv_elf_gas.S new file mode 100644 index 0000000..b76de26 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/make_i386_sysv_elf_gas.S @@ -0,0 +1,107 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/**************************************************************************************** + * * + * ---------------------------------------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ---------------------------------------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * + * ---------------------------------------------------------------------------------- * + * | fc_mxcsr|fc_x87_cw| EDI | ESI | EBX | EBP | EIP | hidden | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ---------------------------------------------------------------------------------- * + * | 0x20 | 0x24 | | * + * ---------------------------------------------------------------------------------- * + * | to | data | | * + * ---------------------------------------------------------------------------------- * + * * + ****************************************************************************************/ + +.file "make_i386_sysv_elf_gas.S" +.text +.globl make_fcontext +.align 2 +.type make_fcontext,@function +make_fcontext: + /* first arg of make_fcontext() == top of context-stack */ + movl 0x4(%esp), %eax + + /* reserve space for first argument of context-function + eax might already point to a 16byte border */ + leal -0x8(%eax), %eax + + /* shift address in EAX to lower 16 byte boundary */ + andl $-16, %eax + + /* reserve space for context-data on context-stack */ + leal -0x28(%eax), %eax + + /* third arg of make_fcontext() == address of context-function */ + /* stored in EBX */ + movl 0xc(%esp), %ecx + movl %ecx, 0x10(%eax) + + /* save MMX control- and status-word */ + stmxcsr (%eax) + /* save x87 control-word */ + fnstcw 0x4(%eax) + + /* return transport_t */ + /* FCTX == EDI, DATA == ESI */ + leal 0x8(%eax), %ecx + movl %ecx, 0x1c(%eax) + + /* compute abs address of label trampoline */ + call 1f + /* address of trampoline 1 */ +1: popl %ecx + /* compute abs address of label trampoline */ + addl $trampoline-1b, %ecx + /* save address of trampoline as return address */ + /* will be entered after calling jump_fcontext() first time */ + movl %ecx, 0x18(%eax) + + /* compute abs address of label finish */ + call 2f + /* address of label 2 */ +2: popl %ecx + /* compute abs address of label finish */ + addl $finish-2b, %ecx + /* save address of finish as return-address for context-function */ + /* will be entered after context-function returns */ + movl %ecx, 0x14(%eax) + + ret /* return pointer to context-data */ + +trampoline: + /* move transport_t for entering context-function */ + movl %edi, (%esp) + movl %esi, 0x4(%esp) + pushl %ebp + /* jump to context-function */ + jmp *%ebx + +finish: + call 3f + /* address of label 3 */ +3: popl %ebx + /* compute address of GOT and store it in EBX */ + addl $_GLOBAL_OFFSET_TABLE_+[.-3b], %ebx + + /* exit code is zero */ + xorl %eax, %eax + movl %eax, (%esp) + /* exit application */ + call _exit@PLT + hlt +.size make_fcontext,.-make_fcontext + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/make_i386_sysv_macho_gas.S b/lib/context_switcher/asm/fcontext/make_i386_sysv_macho_gas.S new file mode 100644 index 0000000..fdcdb7c --- /dev/null +++ b/lib/context_switcher/asm/fcontext/make_i386_sysv_macho_gas.S @@ -0,0 +1,90 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/**************************************************************************************** + * * + * ---------------------------------------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ---------------------------------------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * + * ---------------------------------------------------------------------------------- * + * | fc_mxcsr|fc_x87_cw| EDI | ESI | EBX | EBP | EIP | to | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ---------------------------------------------------------------------------------- * + * | 0x20 | | * + * ---------------------------------------------------------------------------------- * + * | data | | * + * ---------------------------------------------------------------------------------- * + * * + ****************************************************************************************/ + +.text +.globl _make_fcontext +.align 2 +_make_fcontext: + /* first arg of make_fcontext() == top of context-stack */ + movl 0x4(%esp), %eax + + /* reserve space for first argument of context-function + eax might already point to a 16byte border */ + leal -0x8(%eax), %eax + + /* shift address in EAX to lower 16 byte boundary */ + andl $-16, %eax + + /* reserve space for context-data on context-stack */ + leal -0x2c(%eax), %eax + + /* third arg of make_fcontext() == address of context-function */ + /* stored in EBX */ + movl 0xc(%esp), %ecx + movl %ecx, 0x10(%eax) + + /* save MMX control- and status-word */ + stmxcsr (%eax) + /* save x87 control-word */ + fnstcw 0x4(%eax) + + /* compute abs address of label trampoline */ + call 1f + /* address of trampoline 1 */ +1: popl %ecx + /* compute abs address of label trampoline */ + addl $trampoline-1b, %ecx + /* save address of trampoline as return address */ + /* will be entered after calling jump_fcontext() first time */ + movl %ecx, 0x18(%eax) + + /* compute abs address of label finish */ + call 2f + /* address of label 2 */ +2: popl %ecx + /* compute abs address of label finish */ + addl $finish-2b, %ecx + /* save address of finish as return-address for context-function */ + /* will be entered after context-function returns */ + movl %ecx, 0x14(%eax) + + ret /* return pointer to context-data */ + +trampoline: + /* move transport_t for entering context-function */ + movl %eax, (%esp) + movl %edx, 0x4(%esp) + pushl %ebp + /* jump to context-function */ + jmp *%ebx + +finish: + /* exit code is zero */ + xorl %eax, %eax + movl %eax, (%esp) + /* exit application */ + call __exit + hlt diff --git a/lib/context_switcher/asm/fcontext/make_i386_x86_64_sysv_macho_gas.S b/lib/context_switcher/asm/fcontext/make_i386_x86_64_sysv_macho_gas.S new file mode 100644 index 0000000..e364b2d --- /dev/null +++ b/lib/context_switcher/asm/fcontext/make_i386_x86_64_sysv_macho_gas.S @@ -0,0 +1,16 @@ +/* + Copyright Sergue E. Leontiev 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +// Stub file for universal binary + +#if defined(__i386__) + #include "make_i386_sysv_macho_gas.S" +#elif defined(__x86_64__) + #include "make_x86_64_sysv_macho_gas.S" +#else + #error "No arch's" +#endif diff --git a/lib/context_switcher/asm/fcontext/make_mips32_o32_elf_gas.S b/lib/context_switcher/asm/fcontext/make_mips32_o32_elf_gas.S new file mode 100644 index 0000000..4e11e3d --- /dev/null +++ b/lib/context_switcher/asm/fcontext/make_mips32_o32_elf_gas.S @@ -0,0 +1,97 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | F20 | F22 | F24 | F26 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | F28 | F30 | S0 | S1 | S2 | S3 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | S4 | S5 | S6 | S7 | FP |hiddn| RA | PC | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | ABI ARGS | GP | FCTX| DATA| | * + * ------------------------------------------------- * + * * + * *****************************************************/ + +.file "make_mips32_o32_elf_gas.S" +.text +.globl make_fcontext +.align 2 +.type make_fcontext,@function +.ent make_fcontext +make_fcontext: +#ifdef __PIC__ +.set noreorder +.cpload $t9 +.set reorder +#endif + # shift address in A0 to lower 16 byte boundary + li $v1, -16 # 0xfffffffffffffff0 + and $v0, $v1, $a0 + + # reserve space for context-data on context-stack + # includes an extra 32 bytes for: + # - 16-byte incoming argument area required by mips ABI used when + # jump_context calls the initial function + # - 4 bytes to save our GP register used in finish + # - 8 bytes to as space for transfer_t returned to finish + # - 4 bytes for alignment + addiu $v0, $v0, -128 + + # third arg of make_fcontext() == address of context-function + sw $a2, 92($v0) + # save global pointer in context-data + sw $gp, 112($v0) + + # compute address of returned transfer_t + addiu $t0, $v0, 116 + sw $t0, 84($v0) + + # compute abs address of label finish + la $t9, finish + # save address of finish as return-address for context-function + # will be entered after context-function returns + sw $t9, 88($v0) + + jr $ra # return pointer to context-data + +finish: + # reload our gp register (needed for la) + lw $gp, 16($sp) + + # call _exit(0) + # the previous function should have left the 16 bytes incoming argument + # area on the stack which we reuse for calling _exit + la $t9, _exit + move $a0, $zero + jr $t9 +.end make_fcontext +.size make_fcontext, .-make_fcontext + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/make_mips64_n64_elf_gas.S b/lib/context_switcher/asm/fcontext/make_mips64_n64_elf_gas.S new file mode 100644 index 0000000..7bb30b1 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/make_mips64_n64_elf_gas.S @@ -0,0 +1,96 @@ +/* + Copyright Jiaxun Yang 2018. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 8 | 16 | 24 | * + * ------------------------------------------------- * + * | F24 | F25 | F26 | F27 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 40 | 48 | 56 | * + * ------------------------------------------------- * + * | F28 | F29 | F30 | F31 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 72 | 80 | 88 | * + * ------------------------------------------------- * + * | S0 | S1 | S2 | S3 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | S4 | S5 | S6 | S7 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | * + * ------------------------------------------------- * + * | FP | GP | RA | PC | * + * ------------------------------------------------- * + * * + * *****************************************************/ + +.file "make_mips64_n64_elf_gas.S" +.text +.globl make_fcontext +.align 3 +.type make_fcontext,@function +.ent make_fcontext +make_fcontext: +#ifdef __PIC__ +.set noreorder +.cpload $t9 +.set reorder +#endif + # shift address in A0 to lower 16 byte boundary + li $v1, 0xfffffffffffffff0 + and $v0, $v1, $a0 + + # reserve space for context-data on context-stack + daddiu $v0, $v0, -160 + + # third arg of make_fcontext() == address of context-function + sd $a2, 152($v0) + # save global pointer in context-data + sd $gp, 136($v0) + + # psudo instruction compute abs address of label finish based on GP + dla $t9, finish + + # save address of finish as return-address for context-function + # will be entered after context-function returns + sd $t9, 144($v0) + + jr $ra # return pointer to context-data + +finish: + # reload our gp register (needed for la) + daddiu $t0, $sp, -160 + ld $gp, 136($t0) + + # call _exit(0) + # the previous function should have left the 16 bytes incoming argument + # area on the stack which we reuse for calling _exit + dla $t9, _exit + move $a0, $zero + jr $t9 +.end make_fcontext +.size make_fcontext, .-make_fcontext + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/make_ppc32_ppc64_sysv_macho_gas.S b/lib/context_switcher/asm/fcontext/make_ppc32_ppc64_sysv_macho_gas.S new file mode 100644 index 0000000..52e7220 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/make_ppc32_ppc64_sysv_macho_gas.S @@ -0,0 +1,16 @@ +/* + Copyright Sergue E. Leontiev 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +// Stub file for universal binary + +#if defined(__ppc__) + #include "make_ppc32_sysv_macho_gas.S" +#elif defined(__ppc64__) + #include "make_ppc64_sysv_macho_gas.S" +#else + #error "No arch's" +#endif diff --git a/lib/context_switcher/asm/fcontext/make_ppc32_sysv_elf_gas.S b/lib/context_switcher/asm/fcontext/make_ppc32_sysv_elf_gas.S new file mode 100644 index 0000000..9616c4c --- /dev/null +++ b/lib/context_switcher/asm/fcontext/make_ppc32_sysv_elf_gas.S @@ -0,0 +1,146 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * |bchai|hiddn| fpscr | PC | CR | R14 | R15 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | R16 | R17 | R18 | R19 | R20 | R21 | R22 | R23 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | R24 | R25 | R26 | R27 | R28 | R29 | R30 | R31 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | F14 | F15 | F16 | F17 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | * + * ------------------------------------------------- * + * | F18 | F19 | F20 | F21 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 160 | 164 | 168 | 172 | 176 | 180 | 184 | 188 | * + * ------------------------------------------------- * + * | F22 | F23 | F24 | F25 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 192 | 196 | 200 | 204 | 208 | 212 | 216 | 220 | * + * ------------------------------------------------- * + * | F26 | F27 | F28 | F29 | * + * ------------------------------------------------- * + * ------------------------|------------ * + * | 224 | 228 | 232 | 236 | 240 | 244 | * + * ------------------------|------------ * + * | F30 | F31 |bchai| LR | * + * ------------------------|------------ * + * * + *******************************************************/ + +.file "make_ppc32_sysv_elf_gas.S" +.text +.globl make_fcontext +.align 2 +.type make_fcontext,@function +make_fcontext: + # save return address into R6 + mflr %r6 + + # first arg of make_fcontext() == top address of context-function + # shift address in R3 to lower 16 byte boundary + clrrwi %r3, %r3, 4 + + # reserve space on context-stack, including 16 bytes of linkage + # and parameter area + 240 bytes of context-data (R1 % 16 == 0) + subi %r3, %r3, 16 + 240 + + # third arg of make_fcontext() == address of context-function +#ifdef __linux__ + # save context-function as PC + stw %r5, 16(%r3) +#else + # save context-function for trampoline + stw %r5, 248(%r3) +#endif + + # set back-chain to zero + li %r0, 0 + stw %r0, 240(%r3) + + # copy FPSCR to new context + mffs %f0 + stfd %f0, 8(%r3) + +#ifdef __linux__ + # set hidden pointer for returning transfer_t + la %r0, 248(%r3) + stw %r0, 4(%r3) +#endif + + # load address of label 1 into R4 + bl 1f +1: mflr %r4 +#ifndef __linux__ + # compute abs address of trampoline, use as PC + addi %r7, %r4, trampoline - 1b + stw %r7, 16(%r3) +#endif + # compute abs address of label finish + addi %r4, %r4, finish - 1b + # save address of finish as return-address for context-function + # will be entered after context-function returns + stw %r4, 244(%r3) + + # restore return address from R6 + mtlr %r6 + + blr # return pointer to context-data + +#ifndef __linux__ +trampoline: + # On systems other than Linux, jump_fcontext is returning the + # transfer_t in R3:R4, but we need to pass transfer_t * R3 to + # our context-function. + lwz %r0, 8(%r1) # address of context-function + mtctr %r0 + stw %r3, 8(%r1) + stw %r4, 12(%r1) + la %r3, 8(%r1) # address of transfer_t + bctr +#endif + +finish: + # Use the secure PLT for _exit(0). If we use the insecure BSS PLT + # here, then the linker may use the insecure BSS PLT even if the + # C++ compiler wanted the secure PLT. + + # set R30 for secure PLT, large model + bl 2f +2: mflr %r30 + addis %r30, %r30, .Ltoc - 2b@ha + addi %r30, %r30, .Ltoc - 2b@l + + # call _exit(0) with special addend 0x8000 for large model + li %r3, 0 + bl _exit + 0x8000@plt +.size make_fcontext, .-make_fcontext + +/* Provide the GOT pointer for secure PLT, large model. */ +.section .got2,"aw" +.Ltoc = . + 0x8000 + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/make_ppc32_sysv_macho_gas.S b/lib/context_switcher/asm/fcontext/make_ppc32_sysv_macho_gas.S new file mode 100644 index 0000000..8f35eff --- /dev/null +++ b/lib/context_switcher/asm/fcontext/make_ppc32_sysv_macho_gas.S @@ -0,0 +1,137 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/****************************************************** + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | F14 | F15 | F16 | F17 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | F18 | F19 | F20 | F21 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | F22 | F23 | F24 | F25 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | F26 | F27 | F28 | F29 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | * + * ------------------------------------------------- * + * | F30 | F31 | fpscr | R13 | R14 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | * + * ------------------------------------------------- * + * | 160 | 164 | 168 | 172 | 176 | 180 | 184 | 188 | * + * ------------------------------------------------- * + * | R15 | R16 | R17 | R18 | R19 | R20 | R21 | R22 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | * + * ------------------------------------------------- * + * | 192 | 196 | 200 | 204 | 208 | 212 | 216 | 220 | * + * ------------------------------------------------- * + * | R23 | R24 | R25 | R26 | R27 | R28 | R29 | R30 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | * + * ------------------------------------------------- * + * | 224 | 228 | 232 | 236 | 240 | 244 | 248 | 252 | * + * ------------------------------------------------- * + * | R31 |hiddn| CR | LR | PC |bchai|linkr| FCTX| * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 64 | | * + * ------------------------------------------------- * + * | 256 | | * + * ------------------------------------------------- * + * | DATA| | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.globl _make_fcontext +.align 2 +_make_fcontext: + # save return address into R6 + mflr r6 + + # first arg of make_fcontext() == top address of context-function + # shift address in R3 to lower 16 byte boundary + clrrwi r3, r3, 4 + + # reserve space for context-data on context-stack + # including 64 byte of linkage + parameter area (R1 16 == 0) + subi r3, r3, 336 + + # third arg of make_fcontext() == address of context-function + stw r5, 240(r3) + + # set back-chain to zero + li r0, 0 + stw r0, 244(r3) + + mffs f0 # load FPSCR + stfd f0, 144(r3) # save FPSCR + + # compute address of returned transfer_t + addi r0, r3, 252 + mr r4, r0 + stw r4, 228(r3) + + # load LR + mflr r0 + # jump to label 1 + bl 1f +1: + # load LR into R4 + mflr r4 + # compute abs address of label finish + addi r4, r4, finish - 1b + # restore LR + mtlr r0 + # save address of finish as return-address for context-function + # will be entered after context-function returns + stw r4, 236(r3) + + # restore return address from R6 + mtlr r6 + + blr # return pointer to context-data + +finish: + # save return address into R0 + mflr r0 + # save return address on stack, set up stack frame + stw r0, 4(r1) + # allocate stack space, R1 16 == 0 + stwu r1, -16(r1) + + # exit code is zero + li r3, 0 + # exit application + bl _exit@plt diff --git a/lib/context_switcher/asm/fcontext/make_ppc32_sysv_xcoff_gas.S b/lib/context_switcher/asm/fcontext/make_ppc32_sysv_xcoff_gas.S new file mode 100644 index 0000000..f257258 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/make_ppc32_sysv_xcoff_gas.S @@ -0,0 +1,138 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/****************************************************** + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | F14 | F15 | F16 | F17 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | F18 | F19 | F20 | F21 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | F22 | F23 | F24 | F25 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | F26 | F27 | F28 | F29 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | * + * ------------------------------------------------- * + * | F30 | F31 | fpscr | R13 | R14 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | * + * ------------------------------------------------- * + * | 160 | 164 | 168 | 172 | 176 | 180 | 184 | 188 | * + * ------------------------------------------------- * + * | R15 | R16 | R17 | R18 | R19 | R20 | R21 | R22 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | * + * ------------------------------------------------- * + * | 192 | 196 | 200 | 204 | 208 | 212 | 216 | 220 | * + * ------------------------------------------------- * + * | R23 | R24 | R25 | R26 | R27 | R28 | R29 | R30 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | * + * ------------------------------------------------- * + * | 224 | 228 | 232 | 236 | 240 | 244 | 248 | 252 | * + * ------------------------------------------------- * + * | R31 |hiddn| CR | LR | PC |bchai|linkr| FCTX| * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 64 | | * + * ------------------------------------------------- * + * | 256 | | * + * ------------------------------------------------- * + * | DATA| | * + * ------------------------------------------------- * + * * + *******************************************************/ + .globl make_fcontext[DS] + .globl .make_fcontext[PR] + .align 2 + .csect make_fcontext[DS] +make_fcontext: + .long .make_fcontext[PR] + .csect .make_fcontext[PR], 3 +#.make_fcontext: + # save return address into R6 + mflr 6 + + # first arg of make_fcontext() == top address of context-function + # shift address in R3 to lower 16 byte boundary + clrrwi 3, 3, 4 + + # reserve space for context-data on context-stack + # including 64 byte of linkage + parameter area (R1 % 16 == 0) + subi 3, 3, 336 + + # third arg of make_fcontext() == address of context-function + stw 5, 240(3) + + # set back-chain to zero + li 0, 0 + stw 0, 244(3) + + # compute address of returned transfer_t + addi 0, 3, 252 + mr 4, 0 + stw 4, 228(3) + + # load LR + mflr 0 + # jump to label 1 + bl .Label +.Label: + # load LR into R4 + mflr 4 + # compute abs address of label .L_finish + addi 4, 4, .L_finish - .Label + # restore LR + mtlr 0 + # save address of finish as return-address for context-function + # will be entered after context-function returns + stw 4, 236(3) + + # restore return address from R6 + mtlr 6 + + blr # return pointer to context-data + +.L_finish: + # save return address into R0 + mflr 0 + # save return address on stack, set up stack frame + stw 0, 4(1) + # allocate stack space, R1 % 16 == 0 + stwu 1, -16(1) + + # exit code is zero + li 3, 0 + # exit application + bl ._exit + nop diff --git a/lib/context_switcher/asm/fcontext/make_ppc64_sysv_elf_gas.S b/lib/context_switcher/asm/fcontext/make_ppc64_sysv_elf_gas.S new file mode 100644 index 0000000..c4d7ee5 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/make_ppc64_sysv_elf_gas.S @@ -0,0 +1,177 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | TOC | R14 | R15 | R16 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | R17 | R18 | R19 | R20 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | R21 | R22 | R23 | R24 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | R25 | R26 | R27 | R28 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | * + * ------------------------------------------------- * + * | R29 | R30 | R31 | hidden | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | * + * ------------------------------------------------- * + * | 160 | 164 | 168 | 172 | 176 | 180 | 184 | 188 | * + * ------------------------------------------------- * + * | CR | LR | PC | back-chain| * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | * + * ------------------------------------------------- * + * | 192 | 196 | 200 | 204 | 208 | 212 | 216 | 220 | * + * ------------------------------------------------- * + * | cr saved | lr saved | compiler | linker | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | * + * ------------------------------------------------- * + * | 224 | 228 | 232 | 236 | 240 | 244 | 248 | 252 | * + * ------------------------------------------------- * + * | TOC saved | FCTX | DATA | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.file "make_ppc64_sysv_elf_gas.S" +.globl make_fcontext +#if _CALL_ELF == 2 + .text + .align 2 +make_fcontext: + addis %r2, %r12, .TOC.-make_fcontext@ha + addi %r2, %r2, .TOC.-make_fcontext@l + .localentry make_fcontext, . - make_fcontext +#else + .section ".opd","aw" + .align 3 +make_fcontext: +# ifdef _CALL_LINUX + .quad .L.make_fcontext,.TOC.@tocbase,0 + .type make_fcontext,@function + .text + .align 2 +.L.make_fcontext: +# else + .hidden .make_fcontext + .globl .make_fcontext + .quad .make_fcontext,.TOC.@tocbase,0 + .size make_fcontext,24 + .type .make_fcontext,@function + .text + .align 2 +.make_fcontext: +# endif +#endif + # save return address into R6 + mflr %r6 + + # first arg of make_fcontext() == top address of context-stack + # shift address in R3 to lower 16 byte boundary + clrrdi %r3, %r3, 4 + + # reserve space for context-data on context-stack + # including 64 byte of linkage + parameter area (R1 % 16 == 0) + subi %r3, %r3, 248 + + # third arg of make_fcontext() == address of context-function + # entry point (ELFv2) or descriptor (ELFv1) +#if _CALL_ELF == 2 + # save address of context-function entry point + std %r5, 176(%r3) +#else + # save address of context-function entry point + ld %r4, 0(%r5) + std %r4, 176(%r3) + # save TOC of context-function + ld %r4, 8(%r5) + std %r4, 0(%r3) +#endif + + # set back-chain to zero + li %r0, 0 + std %r0, 184(%r3) + +#if _CALL_ELF != 2 + # zero in r3 indicates first jump to context-function + std %r0, 152(%r3) +#endif + + # load LR + mflr %r0 + # jump to label 1 + bl 1f +1: + # load LR into R4 + mflr %r4 + # compute abs address of label finish + addi %r4, %r4, finish - 1b + # restore LR + mtlr %r0 + # save address of finish as return-address for context-function + # will be entered after context-function returns + std %r4, 168(%r3) + + # restore return address from R6 + mtlr %r6 + + blr # return pointer to context-data + +finish: + # save return address into R0 + mflr %r0 + # save return address on stack, set up stack frame + std %r0, 8(%r1) + # allocate stack space, R1 % 16 == 0 + stdu %r1, -32(%r1) + + # exit code is zero + li %r3, 0 + # exit application + bl _exit + nop +#if _CALL_ELF == 2 + .size make_fcontext, .-make_fcontext +#else +# ifdef _CALL_LINUX + .size .make_fcontext, .-.L.make_fcontext +# else + .size .make_fcontext, .-.make_fcontext +# endif +#endif + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/make_ppc64_sysv_macho_gas.S b/lib/context_switcher/asm/fcontext/make_ppc64_sysv_macho_gas.S new file mode 100644 index 0000000..7b947bb --- /dev/null +++ b/lib/context_switcher/asm/fcontext/make_ppc64_sysv_macho_gas.S @@ -0,0 +1,126 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | TOC | R14 | R15 | R16 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | R17 | R18 | R19 | R20 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | R21 | R22 | R23 | R24 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | R25 | R26 | R27 | R28 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | * + * ------------------------------------------------- * + * | R29 | R30 | R31 | hidden | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | * + * ------------------------------------------------- * + * | 160 | 164 | 168 | 172 | 176 | 180 | 184 | 188 | * + * ------------------------------------------------- * + * | CR | LR | PC | back-chain| * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | * + * ------------------------------------------------- * + * | 192 | 196 | 200 | 204 | 208 | 212 | 216 | 220 | * + * ------------------------------------------------- * + * | cr saved | lr saved | compiler | linker | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | * + * ------------------------------------------------- * + * | 224 | 228 | 232 | 236 | 240 | 244 | 248 | 252 | * + * ------------------------------------------------- * + * | TOC saved | FCTX | DATA | | * + * ------------------------------------------------- * + * * + +.text +.globl _make_fcontext +_make_fcontext: + ; save return address into R6 + mflr r6 + + ; first arg of make_fcontext() == top address of context-function + ; shift address in R3 to lower 16 byte boundary + clrrwi r3, r3, 4 + + ; reserve space for context-data on context-stack + ; including 64 byte of linkage + parameter area (R1 16 == 0) + subi r3, r3, 248 + + ; third arg of make_fcontext() == address of context-function + stw r5, 176(r3) + + ; set back-chain to zero + li %r0, 0 + std %r0, 184(%r3) + + ; compute address of returned transfer_t + addi %r0, %r3, 232 + mr %r4, %r0 + std %r4, 152(%r3) + + ; load LR + mflr r0 + ; jump to label 1 + bl l1 +l1: + ; load LR into R4 + mflr r4 + ; compute abs address of label finish + addi r4, r4, lo16((finish - .) + 4) + ; restore LR + mtlr r0 + ; save address of finish as return-address for context-function + ; will be entered after context-function returns + std r4, 168(r3) + + ; restore return address from R6 + mtlr r6 + + blr ; return pointer to context-data + +finish: + ; save return address into R0 + mflr r0 + ; save return address on stack, set up stack frame + stw r0, 8(r1) + ; allocate stack space, R1 16 == 0 + stwu r1, -32(r1) + + ; set return value to zero + li r3, 0 + ; exit application + bl __exit + nop diff --git a/lib/context_switcher/asm/fcontext/make_ppc64_sysv_xcoff_gas.S b/lib/context_switcher/asm/fcontext/make_ppc64_sysv_xcoff_gas.S new file mode 100644 index 0000000..60ad6b6 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/make_ppc64_sysv_xcoff_gas.S @@ -0,0 +1,68 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + .globl make_fcontext[DS] + .globl .make_fcontext[PR] + .align 2 + .csect .make_fcontext[PR], 3 + .globl _make_fcontext +#._make_fcontext: + # save return address into R6 + mflr 6 + + # first arg of make_fcontext() == top address of context-function + # shift address in R3 to lower 16 byte boundary + clrrwi 3, 3, 4 + + # reserve space for context-data on context-stack + # including 64 byte of linkage + parameter area (R1 % 16 == 0) + subi 3, 3, 248 + + # third arg of make_fcontext() == address of context-function + stw 5, 176(3) + + # set back-chain to zero + li 0, 0 + std 0, 184(3) + + # compute address of returned transfer_t + addi 0, 3, 232 + mr 4, 0 + std 4, 152(3) + + # load LR + mflr 0 + # jump to label 1 + bl .Label +.Label: + # load LR into R4 + mflr 4 + # compute abs address of label .L_finish + addi 4, 4, .L_finish - .Label + # restore LR + mtlr 0 + # save address of finish as return-address for context-function + # will be entered after context-function returns + stw 4, 168(3) + + # restore return address from R6 + mtlr 6 + + blr # return pointer to context-data + +.L_finish: + # save return address into R0 + mflr 0 + # save return address on stack, set up stack frame + stw 0, 8(1) + # allocate stack space, R1 % 16 == 0 + stwu 1, -32(1) + + # exit code is zero + li 3, 0 + # exit application + bl ._exit + nop diff --git a/lib/context_switcher/asm/fcontext/make_riscv64_sysv_elf_gas.S b/lib/context_switcher/asm/fcontext/make_riscv64_sysv_elf_gas.S new file mode 100644 index 0000000..5322e0f --- /dev/null +++ b/lib/context_switcher/asm/fcontext/make_riscv64_sysv_elf_gas.S @@ -0,0 +1,91 @@ +/* + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | fs0 | fs1 | fs2 | fs3 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | fs4 | fs5 | fs6 | fs7 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 0x40| 0x44| 0x48| 0x4c| 0x50| 0x54| 0x58| 0x5c| * + * ------------------------------------------------- * + * | fs8 | fs9 | fs10 | fs11 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 0x60| 0x64| 0x68| 0x6c| 0x70| 0x74| 0x78| 0x7c| * + * ------------------------------------------------- * + * | s0 | s1 | s2 | s3 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 0x80| 0x84| 0x88| 0x8c| 0x90| 0x94| 0x98| 0x9c| * + * ------------------------------------------------- * + * | s4 | s5 | s6 | s7 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | * + * ------------------------------------------------- * + * | 0xa0| 0xa4| 0xa8| 0xac| 0xb0| 0xb4| 0xb8| 0xbc| * + * ------------------------------------------------- * + * | s8 | s9 | s10 | s11 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 48 | 49 | 50 | 51 | | | | | * + * ------------------------------------------------- * + * | 0xc0| 0xc4| 0xc8| 0xcc| | | | | * + * ------------------------------------------------- * + * | ra | pc | | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.file "make_riscv64_sysv_elf_gas.S" +.text +.align 1 +.global make_fcontext +.type make_fcontext, %function +make_fcontext: + # shift address in a0 (allocated stack) to lower 16 byte boundary + andi a0, a0, ~0xF + + # reserve space for context-data on context-stack + addi a0, a0, -0xd0 + + # third arg of make_fcontext() == address of context-function + # store address as a PC to jump in + sd a2, 0xc8(a0) + + # save address of finish as return-address for context-function + # will be entered after context-function returns (RA register) + lla a4, finish + sd a4, 0xc0(a0) + + ret // return pointer to context-data (a0) + +finish: + # exit code is zero + li a0, 0 + # exit application + tail _exit@plt + +.size make_fcontext,.-make_fcontext +# Mark that we don't need executable stack. +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/make_s390x_sysv_elf_gas.S b/lib/context_switcher/asm/fcontext/make_s390x_sysv_elf_gas.S new file mode 100644 index 0000000..d02856c --- /dev/null +++ b/lib/context_switcher/asm/fcontext/make_s390x_sysv_elf_gas.S @@ -0,0 +1,104 @@ +/******************************************************* +* * +* ------------------------------------------------- * +* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * +* ------------------------------------------------- * +* | 0 | 8 | 16 | 24 | * +* ------------------------------------------------- * +* | R6 | R7 | R8 | R9 | * +* ------------------------------------------------- * +* ------------------------------------------------- * +* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * +* ------------------------------------------------- * +* | 32 | 40 | 48 | 56 | * +* ------------------------------------------------- * +* | R10 | R11 | R12 | R13 | * +* ------------------------------------------------- * +* ------------------------------------------------- * +* | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * +* ------------------------------------------------- * +* | 64 | 72 | 80 | 88 | * +* ------------------------------------------------- * +* | R14/LR | R15 | F1 | F3 | * +* ------------------------------------------------- * +* ------------------------------------------------- * +* | 24 | 25 | 26 | 27 | 28 | 29 | | * +* ------------------------------------------------- * +* | 96 | 104 | 112 | 120 | * +* ------------------------------------------------- * +* | F5 | F7 | PC | | * +* ------------------------------------------------- * +* *****************************************************/ + +.file "make_s390x_sysv_elf_gas.S" +.text +.align 4 # According to the sample code in the ELF ABI docs +.global make_fcontext +.type make_fcontext, @function + +#define GR_OFFSET 0 +#define LR_OFFSET 64 +#define SP_OFFSET 72 +#define FP_OFFSET 80 +#define PC_OFFSET 112 +#define L_CTX 120 +#define L_STACKFRAME 120 + +make_fcontext: + + # make_fcontext takes in 3 arguments + # arg1 --> The address where the context needs to be made + # arg2 --> The size of the context + # arg3 --> The address of the context function + + # According to the ELF ABI, the register R2 holds the first arg. + # R2 also acts as the register which holds return value + # Register R3 holds the second, R4 the third so on. + + # Shift the address in R2 to a lower 8 byte boundary + + # This is done because according to the ELF ABI Doc, the stack needs + # to be 8 byte aligned. + # In order to do so, we need to make sure that the address is divisible + # by 8. We can check this, by checking if the the last 3 bits of the + # address is zero or not. If not AND it with `-8`. + + # Here we AND the lower 16 bits of the memory address present in the + # R2 with the bits 1111 1111 1111 1000 + nill %r2,0xfff0 + + # Reserve space for context-data on context-stack. + # This is done by shifting the SP/address by 112 bytes. + aghi %r2,-L_CTX + + # third arg of make_fcontext() == address of the context-function + # Store the address as a PC to jump in, whenever we call the + # make_fcontext. + stg %r4,PC_OFFSET(%r2) + + # Save the address of finish as return-address for context-function + # This will be entered after context-function return + # The address of finish will be saved in Link register, this register + # specifies where we need to jump after the function executes + # completely. + larl %r1,finish + stg %r1,LR_OFFSET(%r2) + + # Return pointer to context data + # R14 acts as the link register + # R2 holds the address of the context stack. When we return from the + # make_fcontext, R2 is passed back. + br %r14 + + finish: + + # In finish tasks, you load the exit code and exit the make_fcontext + # This is called when the context-function is entirely executed + + lghi %r2,0 + brasl %r14,_exit@PLT + +.size make_fcontext,.-make_fcontext +# Mark that we don't need executable stack. +.section .note.GNU-stack,"",%progbits + diff --git a/lib/context_switcher/asm/fcontext/make_x86_64_ms_pe_gas.asm b/lib/context_switcher/asm/fcontext/make_x86_64_ms_pe_gas.asm new file mode 100644 index 0000000..958a2a7 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/make_x86_64_ms_pe_gas.asm @@ -0,0 +1,174 @@ +/* + Copyright Oliver Kowalke 2009. + Copyright Thomas Sailer 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/************************************************************************************* +* ---------------------------------------------------------------------------------- * +* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * +* ---------------------------------------------------------------------------------- * +* | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * +* ---------------------------------------------------------------------------------- * +* | SEE registers (XMM6-XMM15) | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * +* ---------------------------------------------------------------------------------- * +* | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | * +* ---------------------------------------------------------------------------------- * +* | SEE registers (XMM6-XMM15) | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * +* ---------------------------------------------------------------------------------- * +* | 0xe40 | 0x44 | 0x48 | 0x4c | 0x50 | 0x54 | 0x58 | 0x5c | * +* ---------------------------------------------------------------------------------- * +* | SEE registers (XMM6-XMM15) | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * +* ---------------------------------------------------------------------------------- * +* | 0x60 | 0x64 | 0x68 | 0x6c | 0x70 | 0x74 | 0x78 | 0x7c | * +* ---------------------------------------------------------------------------------- * +* | SEE registers (XMM6-XMM15) | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 32 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | * +* ---------------------------------------------------------------------------------- * +* | 0x80 | 0x84 | 0x88 | 0x8c | 0x90 | 0x94 | 0x98 | 0x9c | * +* ---------------------------------------------------------------------------------- * +* | SEE registers (XMM6-XMM15) | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | * +* ---------------------------------------------------------------------------------- * +* | 0xa0 | 0xa4 | 0xa8 | 0xac | 0xb0 | 0xb4 | 0xb8 | 0xbc | * +* ---------------------------------------------------------------------------------- * +* | fc_mxcsr|fc_x87_cw| | fbr_strg | fc_dealloc | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | * +* ---------------------------------------------------------------------------------- * +* | 0xc0 | 0xc4 | 0xc8 | 0xcc | 0xd0 | 0xd4 | 0xd8 | 0xdc | * +* ---------------------------------------------------------------------------------- * +* | limit | base | R12 | R13 | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | * +* ---------------------------------------------------------------------------------- * +* | 0xe0 | 0xe4 | 0xe8 | 0xec | 0xf0 | 0xf4 | 0xf8 | 0xfc | * +* ---------------------------------------------------------------------------------- * +* | R14 | R15 | RDI | RSI | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | * +* ---------------------------------------------------------------------------------- * +* | 0x100 | 0x104 | 0x108 | 0x10c | 0x110 | 0x114 | 0x118 | 0x11c | * +* ---------------------------------------------------------------------------------- * +* | RBX | RBP | hidden | RIP | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | * +* ---------------------------------------------------------------------------------- * +* | 0x120 | 0x124 | 0x128 | 0x12c | 0x130 | 0x134 | 0x138 | 0x13c | * +* ---------------------------------------------------------------------------------- * +* | parameter area | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | * +* ---------------------------------------------------------------------------------- * +* | 0x140 | 0x144 | 0x148 | 0x14c | 0x150 | 0x154 | 0x158 | 0x15c | * +* ---------------------------------------------------------------------------------- * +* | FCTX | DATA | | * +* ---------------------------------------------------------------------------------- * +**************************************************************************************/ + +.file "make_x86_64_ms_pe_gas.asm" +.text +.p2align 4,,15 +.globl make_fcontext +.def make_fcontext; .scl 2; .type 32; .endef +.seh_proc make_fcontext +make_fcontext: +.seh_endprologue + + /* first arg of make_fcontext() == top of context-stack */ + movq %rcx, %rax + + /* shift address in RAX to lower 16 byte boundary */ + /* == pointer to fcontext_t and address of context stack */ + andq $-16, %rax + + /* reserve space for context-data on context-stack */ + /* on context-function entry: (RSP -0x8) % 16 == 0 */ + leaq -0x150(%rax), %rax + + /* third arg of make_fcontext() == address of context-function */ + movq %r8, 0x100(%rax) + + /* first arg of make_fcontext() == top of context-stack */ + /* save top address of context stack as 'base' */ + movq %rcx, 0xc8(%rax) + /* second arg of make_fcontext() == size of context-stack */ + /* negate stack size for LEA instruction (== substraction) */ + negq %rdx + /* compute bottom address of context stack (limit) */ + leaq (%rcx,%rdx), %rcx + /* save bottom address of context stack as 'limit' */ + movq %rcx, 0xc0(%rax) + /* save address of context stack limit as 'dealloction stack' */ + movq %rcx, 0xb8(%rax) + /* set fiber-storage to zero */ + xorq %rcx, %rcx + movq %rcx, 0xb0(%rax) + + /* save MMX control- and status-word */ + stmxcsr 0xa0(%rax) + /* save x87 control-word */ + fnstcw 0xa4(%rax) + + /* compute address of transport_t */ + leaq 0x140(%rax), %rcx + /* store address of transport_t in hidden field */ + movq %rcx, 0x110(%rax) + + /* compute abs address of label trampoline */ + leaq trampoline(%rip), %rcx + /* save address of finish as return-address for context-function */ + /* will be entered after jump_fcontext() first time */ + movq %rcx, 0x118(%rax) + + /* compute abs address of label finish */ + leaq finish(%rip), %rcx + /* save address of finish as return-address for context-function */ + /* will be entered after context-function returns */ + movq %rcx, 0x108(%rax) + + ret /* return pointer to context-data */ + +trampoline: + /* store return address on stack */ + /* fix stack alignment */ + pushq %rbp + /* jump to context-function */ + jmp *%rbx + +finish: + /* 32byte shadow-space for _exit() */ + andq $-32, %rsp + /* 32byte shadow-space for _exit() are */ + /* already reserved by make_fcontext() */ + /* exit code is zero */ + xorq %rcx, %rcx + /* exit application */ + call _exit + hlt +.seh_endproc + +.def _exit; .scl 2; .type 32; .endef /* standard C library function */ + +.section .drectve +.ascii " -export:\"make_fcontext\"" diff --git a/lib/context_switcher/asm/fcontext/make_x86_64_ms_pe_masm.asm b/lib/context_switcher/asm/fcontext/make_x86_64_ms_pe_masm.asm new file mode 100644 index 0000000..8f6c959 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/make_x86_64_ms_pe_masm.asm @@ -0,0 +1,163 @@ + +; Copyright Oliver Kowalke 2009. +; Distributed under the Boost Software License, Version 1.0. +; (See accompanying file LICENSE_1_0.txt or copy at +; http://www.boost.org/LICENSE_1_0.txt) + +; ---------------------------------------------------------------------------------- +; | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | +; ---------------------------------------------------------------------------------- +; | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | +; ---------------------------------------------------------------------------------- +; | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | +; ---------------------------------------------------------------------------------- +; | 0xe40 | 0x44 | 0x48 | 0x4c | 0x50 | 0x54 | 0x58 | 0x5c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | +; ---------------------------------------------------------------------------------- +; | 0x60 | 0x64 | 0x68 | 0x6c | 0x70 | 0x74 | 0x78 | 0x7c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 32 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | +; ---------------------------------------------------------------------------------- +; | 0x80 | 0x84 | 0x88 | 0x8c | 0x90 | 0x94 | 0x98 | 0x9c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | +; ---------------------------------------------------------------------------------- +; | 0xa0 | 0xa4 | 0xa8 | 0xac | 0xb0 | 0xb4 | 0xb8 | 0xbc | +; ---------------------------------------------------------------------------------- +; | fc_mxcsr|fc_x87_cw| | fbr_strg | fc_dealloc | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | +; ---------------------------------------------------------------------------------- +; | 0xc0 | 0xc4 | 0xc8 | 0xcc | 0xd0 | 0xd4 | 0xd8 | 0xdc | +; ---------------------------------------------------------------------------------- +; | limit | base | R12 | R13 | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | +; ---------------------------------------------------------------------------------- +; | 0xe0 | 0xe4 | 0xe8 | 0xec | 0xf0 | 0xf4 | 0xf8 | 0xfc | +; ---------------------------------------------------------------------------------- +; | R14 | R15 | RDI | RSI | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | +; ---------------------------------------------------------------------------------- +; | 0x100 | 0x104 | 0x108 | 0x10c | 0x110 | 0x114 | 0x118 | 0x11c | +; ---------------------------------------------------------------------------------- +; | RBX | RBP | hidden | RIP | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | +; ---------------------------------------------------------------------------------- +; | 0x120 | 0x124 | 0x128 | 0x12c | 0x130 | 0x134 | 0x138 | 0x13c | +; ---------------------------------------------------------------------------------- +; | parameter area | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | +; ---------------------------------------------------------------------------------- +; | 0x140 | 0x144 | 0x148 | 0x14c | 0x150 | 0x154 | 0x158 | 0x15c | +; ---------------------------------------------------------------------------------- +; | FCTX | DATA | | +; ---------------------------------------------------------------------------------- + +; standard C library function +EXTERN _exit:PROC +.code + +; generate function table entry in .pdata and unwind information in +make_fcontext PROC BOOST_CONTEXT_EXPORT FRAME + ; .xdata for a function's structured exception handling unwind behavior + .endprolog + + ; first arg of make_fcontext() == top of context-stack + mov rax, rcx + + ; shift address in RAX to lower 16 byte boundary + ; == pointer to fcontext_t and address of context stack + and rax, -16 + + ; reserve space for context-data on context-stack + ; on context-function entry: (RSP -0x8) % 16 == 0 + sub rax, 0150h + + ; third arg of make_fcontext() == address of context-function + ; stored in RBX + mov [rax+0100h], r8 + + ; first arg of make_fcontext() == top of context-stack + ; save top address of context stack as 'base' + mov [rax+0c8h], rcx + ; second arg of make_fcontext() == size of context-stack + ; negate stack size for LEA instruction (== substraction) + neg rdx + ; compute bottom address of context stack (limit) + lea rcx, [rcx+rdx] + ; save bottom address of context stack as 'limit' + mov [rax+0c0h], rcx + ; save address of context stack limit as 'dealloction stack' + mov [rax+0b8h], rcx + ; set fiber-storage to zero + xor rcx, rcx + mov [rax+0b0h], rcx + + ; save MMX control- and status-word + stmxcsr [rax+0a0h] + ; save x87 control-word + fnstcw [rax+0a4h] + + ; compute address of transport_t + lea rcx, [rax+0140h] + ; store address of transport_t in hidden field + mov [rax+0110h], rcx + + ; compute abs address of label trampoline + lea rcx, trampoline + ; save address of trampoline as return-address for context-function + ; will be entered after calling jump_fcontext() first time + mov [rax+0118h], rcx + + ; compute abs address of label finish + lea rcx, finish + ; save address of finish as return-address for context-function in RBP + ; will be entered after context-function returns + mov [rax+0108h], rcx + + ret ; return pointer to context-data + +trampoline: + ; store return address on stack + ; fix stack alignment + push rbp + ; jump to context-function + jmp rbx + +finish: + ; exit code is zero + xor rcx, rcx + ; exit application + call _exit + hlt +make_fcontext ENDP +END diff --git a/lib/context_switcher/asm/fcontext/make_x86_64_sysv_elf_gas.S b/lib/context_switcher/asm/fcontext/make_x86_64_sysv_elf_gas.S new file mode 100644 index 0000000..0ef3756 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/make_x86_64_sysv_elf_gas.S @@ -0,0 +1,82 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/**************************************************************************************** + * * + * ---------------------------------------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ---------------------------------------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * + * ---------------------------------------------------------------------------------- * + * | fc_mxcsr|fc_x87_cw| R12 | R13 | R14 | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ---------------------------------------------------------------------------------- * + * | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | * + * ---------------------------------------------------------------------------------- * + * | R15 | RBX | RBP | RIP | * + * ---------------------------------------------------------------------------------- * + * * + ****************************************************************************************/ + +.file "make_x86_64_sysv_elf_gas.S" +.text +.globl make_fcontext +.type make_fcontext,@function +.align 16 +make_fcontext: + /* first arg of make_fcontext() == top of context-stack */ + movq %rdi, %rax + + /* shift address in RAX to lower 16 byte boundary */ + andq $-16, %rax + + /* reserve space for context-data on context-stack */ + /* on context-function entry: (RSP -0x8) % 16 == 0 */ + leaq -0x40(%rax), %rax + + /* third arg of make_fcontext() == address of context-function */ + /* stored in RBX */ + movq %rdx, 0x28(%rax) + + /* save MMX control- and status-word */ + stmxcsr (%rax) + /* save x87 control-word */ + fnstcw 0x4(%rax) + + /* compute abs address of label trampoline */ + leaq trampoline(%rip), %rcx + /* save address of trampoline as return-address for context-function */ + /* will be entered after calling jump_fcontext() first time */ + movq %rcx, 0x38(%rax) + + /* compute abs address of label finish */ + leaq finish(%rip), %rcx + /* save address of finish as return-address for context-function */ + /* will be entered after context-function returns */ + movq %rcx, 0x30(%rax) + + ret /* return pointer to context-data */ + +trampoline: + /* store return address on stack */ + /* fix stack alignment */ + push %rbp + /* jump to context-function */ + jmp *%rbx + +finish: + /* exit code is zero */ + xorq %rdi, %rdi + /* exit application */ + call _exit@PLT + hlt +.size make_fcontext,.-make_fcontext + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/make_x86_64_sysv_macho_gas.S b/lib/context_switcher/asm/fcontext/make_x86_64_sysv_macho_gas.S new file mode 100644 index 0000000..5d6c543 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/make_x86_64_sysv_macho_gas.S @@ -0,0 +1,76 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/**************************************************************************************** + * * + * ---------------------------------------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ---------------------------------------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * + * ---------------------------------------------------------------------------------- * + * | fc_mxcsr|fc_x87_cw| R12 | R13 | R14 | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ---------------------------------------------------------------------------------- * + * | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | * + * ---------------------------------------------------------------------------------- * + * | R15 | RBX | RBP | RIP | * + * ---------------------------------------------------------------------------------- * + * * + ****************************************************************************************/ + +.text +.globl _make_fcontext +.align 8 +_make_fcontext: + /* first arg of make_fcontext() == top of context-stack */ + movq %rdi, %rax + + /* shift address in RAX to lower 16 byte boundary */ + andq $-16, %rax + + /* reserve space for context-data on context-stack */ + /* on context-function entry: (RSP -0x8) % 16 == 0 */ + leaq -0x40(%rax), %rax + + /* third arg of make_fcontext() == address of context-function */ + /* stored in RBX */ + movq %rdx, 0x28(%rax) + + /* save MMX control- and status-word */ + stmxcsr (%rax) + /* save x87 control-word */ + fnstcw 0x4(%rax) + + /* compute abs address of label trampoline */ + leaq trampoline(%rip), %rcx + /* save address of trampoline as return-address for context-function */ + /* will be entered after calling jump_fcontext() first time */ + movq %rcx, 0x38(%rax) + + /* compute abs address of label finish */ + leaq finish(%rip), %rcx + /* save address of finish as return-address for context-function */ + /* will be entered after context-function returns */ + movq %rcx, 0x30(%rax) + + ret /* return pointer to context-data */ + +trampoline: + /* store return address on stack */ + /* fix stack alignment */ + push %rbp + /* jump to context-function */ + jmp *%rbx + +finish: + /* exit code is zero */ + xorq %rdi, %rdi + /* exit application */ + call __exit + hlt diff --git a/lib/context_switcher/asm/fcontext/ontop_arm64_aapcs_elf_gas.S b/lib/context_switcher/asm/fcontext/ontop_arm64_aapcs_elf_gas.S new file mode 100644 index 0000000..665ca5a --- /dev/null +++ b/lib/context_switcher/asm/fcontext/ontop_arm64_aapcs_elf_gas.S @@ -0,0 +1,113 @@ +/* + Copyright Edward Nevill + Oliver Kowalke 2015 + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | d8 | d9 | d10 | d11 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | d12 | d13 | d14 | d15 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 0x40| 0x44| 0x48| 0x4c| 0x50| 0x54| 0x58| 0x5c| * + * ------------------------------------------------- * + * | x19 | x20 | x21 | x22 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 0x60| 0x64| 0x68| 0x6c| 0x70| 0x74| 0x78| 0x7c| * + * ------------------------------------------------- * + * | x23 | x24 | x25 | x26 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 0x80| 0x84| 0x88| 0x8c| 0x90| 0x94| 0x98| 0x9c| * + * ------------------------------------------------- * + * | x27 | x28 | FP | LR | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | | | * + * ------------------------------------------------- * + * | 0xa0| 0xa4| 0xa8| 0xac| | | * + * ------------------------------------------------- * + * | PC | align | | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.file "ontop_arm64_aapcs_elf_gas.S" +.text +.align 2 +.global ontop_fcontext +.type ontop_fcontext, %function +ontop_fcontext: + # prepare stack for GP + FPU + sub sp, sp, #0xb0 + + # save d8 - d15 + stp d8, d9, [sp, #0x00] + stp d10, d11, [sp, #0x10] + stp d12, d13, [sp, #0x20] + stp d14, d15, [sp, #0x30] + + # save x19-x30 + stp x19, x20, [sp, #0x40] + stp x21, x22, [sp, #0x50] + stp x23, x24, [sp, #0x60] + stp x25, x26, [sp, #0x70] + stp x27, x28, [sp, #0x80] + stp x29, x30, [sp, #0x90] + + # save LR as PC + str x30, [sp, #0xa0] + + # store RSP (pointing to context-data) in X5 + mov x4, sp + + # restore RSP (pointing to context-data) from X1 + mov sp, x0 + + # load d8 - d15 + ldp d8, d9, [sp, #0x00] + ldp d10, d11, [sp, #0x10] + ldp d12, d13, [sp, #0x20] + ldp d14, d15, [sp, #0x30] + + # load x19-x30 + ldp x19, x20, [sp, #0x40] + ldp x21, x22, [sp, #0x50] + ldp x23, x24, [sp, #0x60] + ldp x25, x26, [sp, #0x70] + ldp x27, x28, [sp, #0x80] + ldp x29, x30, [sp, #0x90] + + # return transfer_t from jump + # pass transfer_t as first arg in context function + # X0 == FCTX, X1 == DATA + mov x0, x4 + + # skip pc + # restore stack from GP + FPU + add sp, sp, #0xb0 + + # jump to ontop-function + ret x2 +.size ontop_fcontext,.-ontop_fcontext +# Mark that we don't need executable stack. +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/ontop_arm64_aapcs_macho_gas.S b/lib/context_switcher/asm/fcontext/ontop_arm64_aapcs_macho_gas.S new file mode 100644 index 0000000..a387d06 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/ontop_arm64_aapcs_macho_gas.S @@ -0,0 +1,108 @@ +/* + Copyright Edward Nevill + Oliver Kowalke 2015 + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | d8 | d9 | d10 | d11 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | d12 | d13 | d14 | d15 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 0x40| 0x44| 0x48| 0x4c| 0x50| 0x54| 0x58| 0x5c| * + * ------------------------------------------------- * + * | x19 | x20 | x21 | x22 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 0x60| 0x64| 0x68| 0x6c| 0x70| 0x74| 0x78| 0x7c| * + * ------------------------------------------------- * + * | x23 | x24 | x25 | x26 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 0x80| 0x84| 0x88| 0x8c| 0x90| 0x94| 0x98| 0x9c| * + * ------------------------------------------------- * + * | x27 | x28 | FP | LR | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | | | * + * ------------------------------------------------- * + * | 0xa0| 0xa4| 0xa8| 0xac| | | * + * ------------------------------------------------- * + * | PC | align | | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.global _ontop_fcontext +.balign 16 +_ontop_fcontext: + ; prepare stack for GP + FPU + sub sp, sp, #0xb0 + + ; save d8 - d15 + stp d8, d9, [sp, #0x00] + stp d10, d11, [sp, #0x10] + stp d12, d13, [sp, #0x20] + stp d14, d15, [sp, #0x30] + + ; save x19-x30 + stp x19, x20, [sp, #0x40] + stp x21, x22, [sp, #0x50] + stp x23, x24, [sp, #0x60] + stp x25, x26, [sp, #0x70] + stp x27, x28, [sp, #0x80] + stp x29, x30, [sp, #0x90] + + ; save LR as PC + str x30, [sp, #0xa0] + + ; store RSP (pointing to context-data) in X5 + mov x4, sp + + ; restore RSP (pointing to context-data) from X1 + mov sp, x0 + + ; load d8 - d15 + ldp d8, d9, [sp, #0x00] + ldp d10, d11, [sp, #0x10] + ldp d12, d13, [sp, #0x20] + ldp d14, d15, [sp, #0x30] + + ; load x19-x30 + ldp x19, x20, [sp, #0x40] + ldp x21, x22, [sp, #0x50] + ldp x23, x24, [sp, #0x60] + ldp x25, x26, [sp, #0x70] + ldp x27, x28, [sp, #0x80] + ldp x29, x30, [sp, #0x90] + + ; return transfer_t from jump + ; pass transfer_t as first arg in context function + ; X0 == FCTX, X1 == DATA + mov x0, x4 + + ; skip pc + ; restore stack from GP + FPU + add sp, sp, #0xb0 + + ; jump to ontop-function + ret x2 diff --git a/lib/context_switcher/asm/fcontext/ontop_arm_aapcs_elf_gas.S b/lib/context_switcher/asm/fcontext/ontop_arm_aapcs_elf_gas.S new file mode 100644 index 0000000..59ad5ca --- /dev/null +++ b/lib/context_switcher/asm/fcontext/ontop_arm_aapcs_elf_gas.S @@ -0,0 +1,93 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | s16 | s17 | s18 | s19 | s20 | s21 | s22 | s23 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | s24 | s25 | s26 | s27 | s28 | s29 | s30 | s31 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 0x40| 0x44| 0x48| 0x4c| 0x50| 0x54| 0x58| 0x5c| * + * ------------------------------------------------- * + * |hiddn| v1 | v2 | v3 | v4 | v5 | v6 | v7 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 0x60| 0x64| 0x68| 0x6c| 0x70| 0x74| 0x78| 0x7c| * + * ------------------------------------------------- * + * | v8 | lr | pc | FCTX| DATA| | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.file "ontop_arm_aapcs_elf_gas.S" +.text +.globl ontop_fcontext +.align 2 +.type ontop_fcontext,%function +.syntax unified +ontop_fcontext: + @ save LR as PC + push {lr} + @ save hidden,V1-V8,LR + push {a1,v1-v8,lr} + + @ prepare stack for FPU + sub sp, sp, #64 +#if (defined(__VFP_FP__) && !defined(__SOFTFP__)) + @ save S16-S31 + vstmia sp, {d8-d15} +#endif + + @ store RSP (pointing to context-data) in A1 + mov a1, sp + + @ restore RSP (pointing to context-data) from A2 + mov sp, a2 + + @ store parent context in A2 + mov a2, a1 + +#if (defined(__VFP_FP__) && !defined(__SOFTFP__)) + @ restore S16-S31 + vldmia sp, {d8-d15} +#endif + @ prepare stack for FPU + add sp, sp, #64 + + @ restore hidden,V1-V8,LR + pop {a1,v1-v8,lr} + + @ return transfer_t from jump + str a2, [a1, #0] + str a3, [a1, #4] + @ pass transfer_t as first arg in context function + @ A1 == hidden, A2 == FCTX, A3 == DATA + + @ skip PC + add sp, sp, #4 + + @ jump to ontop-function + bx a4 +.size ontop_fcontext,.-ontop_fcontext + +@ Mark that we don't need executable stack. +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/ontop_arm_aapcs_macho_gas.S b/lib/context_switcher/asm/fcontext/ontop_arm_aapcs_macho_gas.S new file mode 100644 index 0000000..421fcb4 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/ontop_arm_aapcs_macho_gas.S @@ -0,0 +1,100 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | s16 | s17 | s18 | s19 | s20 | s21 | s22 | s23 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | s24 | s25 | s26 | s27 | s28 | s29 | s30 | s31 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | sjlj|hiddn| v1 | v2 | v3 | v4 | v5 | v6 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | v7 | v8 | lr | pc | FCTX| DATA| | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.globl _ontop_fcontext +.align 2 +_ontop_fcontext: + @ save LR as PC + push {lr} + @ save hidden,V1-V8,LR + push {a1,v1-v8,lr} + + @ locate TLS to save/restore SjLj handler + mrc p15, 0, v2, c13, c0, #3 + bic v2, v2, #3 + + @ load TLS[__PTK_LIBC_DYLD_Unwind_SjLj_Key] + ldr v1, [v2, #8] + @ save SjLj handler + push {v1} + + @ prepare stack for FPU + sub sp, sp, #64 +#if (defined(__VFP_FP__) && !defined(__SOFTFP__)) + @ save S16-S31 + vstmia sp, {d8-d15} +#endif + + @ store RSP (pointing to context-data) in A1 + mov a1, sp + + @ restore RSP (pointing to context-data) from A2 + mov sp, a2 + +#if (defined(__VFP_FP__) && !defined(__SOFTFP__)) + @ restore S16-S31 + vldmia sp, {d8-d15} +#endif + @ prepare stack for FPU + add sp, sp, #64 + + @ restore SjLj handler + pop {v1} + @ store SjLj handler in TLS + str v1, [v2, #8] + + @ store parent context in A2 + mov a2, a1 + + @ restore hidden,V1-V8,LR + pop {a1,v1-v8,lr} + + @ return transfer_t from jump + str a2, [a1, #0] + str a3, [a1, #4] + @ pass transfer_t as first arg in context function + @ A1 == hidden, A2 == FCTX, A3 == DATA + + @ skip PC + add sp, sp, #4 + + @ jump to ontop-function + bx a4 diff --git a/lib/context_switcher/asm/fcontext/ontop_arm_aapcs_pe_armasm.asm b/lib/context_switcher/asm/fcontext/ontop_arm_aapcs_pe_armasm.asm new file mode 100644 index 0000000..f360a8f --- /dev/null +++ b/lib/context_switcher/asm/fcontext/ontop_arm_aapcs_pe_armasm.asm @@ -0,0 +1,86 @@ +;/* +; Copyright Oliver Kowalke 2009. +; Distributed under the Boost Software License, Version 1.0. +; (See accompanying file LICENSE_1_0.txt or copy at +; http://www.boost.org/LICENSE_1_0.txt) +;*/ + +; ******************************************************* +; * * +; * ------------------------------------------------- * +; * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * +; * ------------------------------------------------- * +; * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * +; * ------------------------------------------------- * +; * |deall|limit| base|hiddn| v1 | v2 | v3 | v4 | * +; * ------------------------------------------------- * +; * ------------------------------------------------- * +; * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * +; * ------------------------------------------------- * +; * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * +; * ------------------------------------------------- * +; * | v5 | v6 | v7 | v8 | lr | pc | FCTX| DATA| * +; * ------------------------------------------------- * +; * * +; ******************************************************* + + AREA |.text|, CODE + ALIGN 4 + EXPORT ontop_fcontext + +ontop_fcontext PROC + ; save LR as PC + push {lr} + ; save hidden,V1-V8,LR + push {a1,v1-v8,lr} + + ; load TIB to save/restore thread size and limit. + ; we do not need preserve CPU flag and can use it's arg register + mrc p15, #0, v1, c13, c0, #2 + + ; save current stack base + ldr a1, [v1, #0x04] + push {a1} + ; save current stack limit + ldr a1, [v1, #0x08] + push {a1} + ; save current deallocation stack + ldr a1, [v1, #0xe0c] + push {a1} + + ; store RSP (pointing to context-data) in A1 + mov a1, sp + + ; restore RSP (pointing to context-data) from A2 + mov sp, a2 + + ; restore stack base + pop {a1} + str a1, [v1, #0x04] + ; restore stack limit + pop {a1} + str a1, [v1, #0x08] + ; restore deallocation stack + pop {a1} + str a1, [v1, #0xe0c] + + ; store parent context in A2 + mov a2, a1 + + ; restore hidden,V1-V8,LR + pop {a1,v1-v8,lr} + + ; return transfer_t from jump + str a2, [a1, #0] + str a3, [a1, #4] + ; pass transfer_t as first arg in context function + ; A1 == hidden, A2 == FCTX, A3 == DATA + + ; skip PC + add sp, sp, #4 + + ; jump to ontop-function + bx a4 + + ENDP + END diff --git a/lib/context_switcher/asm/fcontext/ontop_combined_sysv_macho_gas.S b/lib/context_switcher/asm/fcontext/ontop_combined_sysv_macho_gas.S new file mode 100644 index 0000000..20cbeb9 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/ontop_combined_sysv_macho_gas.S @@ -0,0 +1,20 @@ +/* + Copyright Sergue E. Leontiev 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +// Stub file for universal binary + +#if defined(__i386__) + #include "ontop_i386_sysv_macho_gas.S" +#elif defined(__x86_64__) + #include "ontop_x86_64_sysv_macho_gas.S" +#elif defined(__ppc__) + #include "ontop_ppc32_sysv_macho_gas.S" +#elif defined(__ppc64__) + #include "ontop_ppc64_sysv_macho_gas.S" +#else + #error "No arch's" +#endif diff --git a/lib/context_switcher/asm/fcontext/ontop_i386_ms_pe_gas.asm b/lib/context_switcher/asm/fcontext/ontop_i386_ms_pe_gas.asm new file mode 100644 index 0000000..c23e18a --- /dev/null +++ b/lib/context_switcher/asm/fcontext/ontop_i386_ms_pe_gas.asm @@ -0,0 +1,131 @@ +/* + Copyright Oliver Kowalke 2009. + Copyright Thomas Sailer 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/************************************************************************************* +* --------------------------------------------------------------------------------- * +* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * +* --------------------------------------------------------------------------------- * +* | 0h | 04h | 08h | 0ch | 010h | 014h | 018h | 01ch | * +* --------------------------------------------------------------------------------- * +* | fc_mxcsr|fc_x87_cw| fc_strg |fc_deallo| limit | base | fc_seh | EDI | * +* --------------------------------------------------------------------------------- * +* --------------------------------------------------------------------------------- * +* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * +* --------------------------------------------------------------------------------- * +* | 020h | 024h | 028h | 02ch | 030h | 034h | 038h | 03ch | * +* --------------------------------------------------------------------------------- * +* | ESI | EBX | EBP | EIP | to | data | EH NXT |SEH HNDLR| * +* --------------------------------------------------------------------------------- * +**************************************************************************************/ + +.file "ontop_i386_ms_pe_gas.asm" +.text +.p2align 4,,15 + +/* mark as using no unregistered SEH handlers */ +.globl @feat.00 +.def @feat.00; .scl 3; .type 0; .endef +.set @feat.00, 1 + +.globl _ontop_fcontext +.def _ontop_fcontext; .scl 2; .type 32; .endef +_ontop_fcontext: + /* prepare stack */ + leal -0x2c(%esp), %esp + +#if !defined(BOOST_USE_TSX) + /* save MMX control- and status-word */ + stmxcsr (%esp) + /* save x87 control-word */ + fnstcw 0x4(%esp) +#endif + + /* load NT_TIB */ + movl %fs:(0x18), %edx + /* load fiber local storage */ + movl 0x10(%edx), %eax + movl %eax, 0x8(%esp) + /* load current dealloction stack */ + movl 0xe0c(%edx), %eax + movl %eax, 0xc(%esp) + /* load current stack limit */ + movl 0x8(%edx), %eax + movl %eax, 0x10(%esp) + /* load current stack base */ + movl 0x4(%edx), %eax + movl %eax, 0x14(%esp) + /* load current SEH exception list */ + movl (%edx), %eax + movl %eax, 0x18(%esp) + + movl %edi, 0x1c(%esp) /* save EDI */ + movl %esi, 0x20(%esp) /* save ESI */ + movl %ebx, 0x24(%esp) /* save EBX */ + movl %ebp, 0x28(%esp) /* save EBP */ + + /* store ESP (pointing to context-data) in ECX */ + movl %esp, %ecx + + /* first arg of ontop_fcontext() == fcontext to jump to */ + movl 0x30(%esp), %eax + + /* pass parent fcontext_t */ + movl %ecx, 0x30(%eax) + + /* second arg of ontop_fcontext() == data to be transferred */ + movl 0x34(%esp), %ecx + + /* pass data */ + movl %ecx, 0x34(%eax) + + /* third arg of ontop_fcontext() == ontop-function */ + movl 0x38(%esp), %ecx + + /* restore ESP (pointing to context-data) from EDX */ + movl %eax, %esp + +#if !defined(BOOST_USE_TSX) + /* restore MMX control- and status-word */ + ldmxcsr (%esp) + /* restore x87 control-word */ + fldcw 0x4(%esp) +#endif + + /* restore NT_TIB into EDX */ + movl %fs:(0x18), %edx + /* restore fiber local storage */ + movl 0x8(%esp), %eax + movl %eax, 0x10(%edx) + /* restore current deallocation stack */ + movl 0xc(%esp), %eax + movl %eax, 0xe0c(%edx) + /* restore current stack limit */ + movl 0x10(%esp), %eax + movl %eax, 0x08(%edx) + /* restore current stack base */ + movl 0x14(%esp), %eax + movl %eax, 0x04(%edx) + /* restore current SEH exception list */ + movl 0x18(%esp), %eax + movl %eax, (%edx) + + movl 0x1c(%esp), %edi /* restore EDI */ + movl 0x20(%esp), %esi /* restore ESI */ + movl 0x24(%esp), %ebx /* restore EBX */ + movl 0x28(%esp), %ebp /* restore EBP */ + + /* prepare stack */ + leal 0x2c(%esp), %esp + + /* keep return-address on stack */ + + /* jump to context */ + jmp *%ecx + +.section .drectve +.ascii " -export:\"_ontop_fcontext\"" diff --git a/lib/context_switcher/asm/fcontext/ontop_i386_ms_pe_masm.asm b/lib/context_switcher/asm/fcontext/ontop_i386_ms_pe_masm.asm new file mode 100644 index 0000000..82246a4 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/ontop_i386_ms_pe_masm.asm @@ -0,0 +1,124 @@ + +; Copyright Oliver Kowalke 2009. +; Distributed under the Boost Software License, Version 1.0. +; (See accompanying file LICENSE_1_0.txt or copy at +; http://www.boost.org/LICENSE_1_0.txt) + +; --------------------------------------------------------------------------------- +; | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | +; --------------------------------------------------------------------------------- +; | 0h | 04h | 08h | 0ch | 010h | 014h | 018h | 01ch | +; --------------------------------------------------------------------------------- +; | fc_mxcsr|fc_x87_cw| fc_strg |fc_deallo| limit | base | fc_seh | EDI | +; --------------------------------------------------------------------------------- +; --------------------------------------------------------------------------------- +; | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | +; --------------------------------------------------------------------------------- +; | 020h | 024h | 028h | 02ch | 030h | 034h | 038h | 03ch | +; --------------------------------------------------------------------------------- +; | ESI | EBX | EBP | EIP | to | data | EH NXT |SEH HNDLR| +; --------------------------------------------------------------------------------- + +.386 +.XMM +.model flat, c +.code + +ontop_fcontext PROC BOOST_CONTEXT_EXPORT + ; prepare stack + lea esp, [esp-02ch] + +IFNDEF BOOST_USE_TSX + ; save MMX control- and status-word + stmxcsr [esp] + ; save x87 control-word + fnstcw [esp+04h] +ENDIF + + assume fs:nothing + ; load NT_TIB into ECX + mov edx, fs:[018h] + assume fs:error + ; load fiber local storage + mov eax, [edx+010h] + mov [esp+08h], eax + ; load current deallocation stack + mov eax, [edx+0e0ch] + mov [esp+0ch], eax + ; load current stack limit + mov eax, [edx+08h] + mov [esp+010h], eax + ; load current stack base + mov eax, [edx+04h] + mov [esp+014h], eax + ; load current SEH exception list + mov eax, [edx] + mov [esp+018h], eax + + mov [esp+01ch], edi ; save EDI + mov [esp+020h], esi ; save ESI + mov [esp+024h], ebx ; save EBX + mov [esp+028h], ebp ; save EBP + + ; store ESP (pointing to context-data) in ECX + mov ecx, esp + + ; first arg of ontop_fcontext() == fcontext to jump to + mov eax, [esp+030h] + + ; pass parent fcontext_t + mov [eax+030h], ecx + + ; second arg of ontop_fcontext() == data to be transferred + mov ecx, [esp+034h] + + ; pass data + mov [eax+034h], ecx + + ; third arg of ontop_fcontext() == ontop-function + mov ecx, [esp+038h] + + ; restore ESP (pointing to context-data) from EAX + mov esp, eax + +IFNDEF BOOST_USE_TSX + ; restore MMX control- and status-word + ldmxcsr [esp] + ; restore x87 control-word + fldcw [esp+04h] +ENDIF + + assume fs:nothing + ; load NT_TIB into EDX + mov edx, fs:[018h] + assume fs:error + ; restore fiber local storage + mov eax, [esp+08h] + mov [edx+010h], eax + ; restore current deallocation stack + mov eax, [esp+0ch] + mov [edx+0e0ch], eax + ; restore current stack limit + mov eax, [esp+010h] + mov [edx+08h], eax + ; restore current stack base + mov eax, [esp+014h] + mov [edx+04h], eax + ; restore current SEH exception list + mov eax, [esp+018h] + mov [edx], eax + + mov edi, [esp+01ch] ; restore EDI + mov esi, [esp+020h] ; restore ESI + mov ebx, [esp+024h] ; restore EBX + mov ebp, [esp+028h] ; restore EBP + + ; prepare stack + lea esp, [esp+02ch] + + ; keep return-address on stack + + ; jump to context + jmp ecx +ontop_fcontext ENDP +END diff --git a/lib/context_switcher/asm/fcontext/ontop_i386_sysv_elf_gas.S b/lib/context_switcher/asm/fcontext/ontop_i386_sysv_elf_gas.S new file mode 100644 index 0000000..40fe6c2 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/ontop_i386_sysv_elf_gas.S @@ -0,0 +1,90 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/**************************************************************************************** + * * + * ---------------------------------------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ---------------------------------------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * + * ---------------------------------------------------------------------------------- * + * | fc_mxcsr|fc_x87_cw| EDI | ESI | EBX | EBP | EIP | hidden | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ---------------------------------------------------------------------------------- * + * | 0x20 | 0x24 | | * + * ---------------------------------------------------------------------------------- * + * | to | data | | * + * ---------------------------------------------------------------------------------- * + * * + ****************************************************************************************/ + +.file "ontop_i386_sysv_elf_gas.S" +.text +.globl ontop_fcontext +.align 2 +.type ontop_fcontext,@function +ontop_fcontext: + leal -0x18(%esp), %esp /* prepare stack */ + +#if !defined(BOOST_USE_TSX) + stmxcsr (%esp) /* save MMX control- and status-word */ + fnstcw 0x4(%esp) /* save x87 control-word */ +#endif + + movl %edi, 0x8(%esp) /* save EDI */ + movl %esi, 0xc(%esp) /* save ESI */ + movl %ebx, 0x10(%esp) /* save EBX */ + movl %ebp, 0x14(%esp) /* save EBP */ + + /* store ESP (pointing to context-data) in ECX */ + movl %esp, %ecx + + /* first arg of ontop_fcontext() == fcontext to jump to */ + movl 0x20(%esp), %eax + + /* pass parent fcontext_t */ + movl %ecx, 0x20(%eax) + + /* second arg of ontop_fcontext() == data to be transferred */ + movl 0x24(%esp), %ecx + + /* pass data */ + movl %ecx, 0x24(%eax) + + /* third arg of ontop_fcontext() == ontop-function */ + movl 0x28(%esp), %ecx + + /* restore ESP (pointing to context-data) from EAX */ + movl %eax, %esp + + /* address of returned transport_t */ + movl 0x1c(%esp), %eax + /* return parent fcontext_t */ + movl %ecx, (%eax) + /* return data */ + movl %edx, 0x4(%eax) + +#if !defined(BOOST_USE_TSX) + ldmxcsr (%esp) /* restore MMX control- and status-word */ + fldcw 0x4(%esp) /* restore x87 control-word */ +#endif + + movl 0x8(%esp), %edi /* restore EDI */ + movl 0xc(%esp), %esi /* restore ESI */ + movl 0x10(%esp), %ebx /* restore EBX */ + movl 0x14(%esp), %ebp /* restore EBP */ + + leal 0x18(%esp), %esp /* prepare stack */ + + /* jump to context */ + jmp *%ecx +.size ontop_fcontext,.-ontop_fcontext + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/ontop_i386_sysv_macho_gas.S b/lib/context_switcher/asm/fcontext/ontop_i386_sysv_macho_gas.S new file mode 100644 index 0000000..3a88372 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/ontop_i386_sysv_macho_gas.S @@ -0,0 +1,81 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/**************************************************************************************** + * * + * ---------------------------------------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ---------------------------------------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * + * ---------------------------------------------------------------------------------- * + * | fc_mxcsr|fc_x87_cw| EDI | ESI | EBX | EBP | EIP | to | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ---------------------------------------------------------------------------------- * + * | 0x20 | | * + * ---------------------------------------------------------------------------------- * + * | data | | * + * ---------------------------------------------------------------------------------- * + * * + ****************************************************************************************/ + +.text +.globl _ontop_fcontext +.align 2 +_ontop_fcontext: + leal -0x18(%esp), %esp /* prepare stack */ + +#if !defined(BOOST_USE_TSX) + stmxcsr (%esp) /* save MMX control- and status-word */ + fnstcw 0x4(%esp) /* save x87 control-word */ +#endif + + movl %edi, 0x8(%esp) /* save EDI */ + movl %esi, 0xc(%esp) /* save ESI */ + movl %ebx, 0x10(%esp) /* save EBX */ + movl %ebp, 0x14(%esp) /* save EBP */ + + /* store ESP (pointing to context-data) in ECX */ + movl %esp, %ecx + + /* first arg of ontop_fcontext() == fcontext to jump to */ + movl 0x1c(%esp), %eax + + /* pass parent fcontext_t */ + movl %ecx, 0x1c(%eax) + + /* second arg of ontop_fcontext() == data to be transferred */ + movl 0x20(%esp), %ecx + + /* pass data */ + movl %ecx, 0x20(%eax) + + /* third arg of ontop_fcontext() == ontop-function */ + movl 0x24(%esp), %ecx + + /* restore ESP (pointing to context-data) from EAX */ + movl %eax, %esp + + /* return parent fcontext_t */ + movl %ecx, %eax + /* returned data is stored in EDX */ + +#if !defined(BOOST_USE_TSX) + ldmxcsr (%esp) /* restore MMX control- and status-word */ + fldcw 0x4(%esp) /* restore x87 control-word */ +#endif + + movl 0x8(%esp), %edi /* restore EDI */ + movl 0xc(%esp), %esi /* restore ESI */ + movl 0x10(%esp), %ebx /* restore EBX */ + movl 0x14(%esp), %ebp /* restore EBP */ + + leal 0x18(%esp), %esp /* prepare stack */ + + /* jump to context */ + jmp *%ecx diff --git a/lib/context_switcher/asm/fcontext/ontop_i386_x86_64_sysv_macho_gas.S b/lib/context_switcher/asm/fcontext/ontop_i386_x86_64_sysv_macho_gas.S new file mode 100644 index 0000000..393c5fe --- /dev/null +++ b/lib/context_switcher/asm/fcontext/ontop_i386_x86_64_sysv_macho_gas.S @@ -0,0 +1,16 @@ +/* + Copyright Sergue E. Leontiev 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +// Stub file for universal binary + +#if defined(__i386__) + #include "ontop_i386_sysv_macho_gas.S" +#elif defined(__x86_64__) + #include "ontop_x86_64_sysv_macho_gas.S" +#else + #error "No arch's" +#endif diff --git a/lib/context_switcher/asm/fcontext/ontop_mips32_o32_elf_gas.S b/lib/context_switcher/asm/fcontext/ontop_mips32_o32_elf_gas.S new file mode 100644 index 0000000..c69203c --- /dev/null +++ b/lib/context_switcher/asm/fcontext/ontop_mips32_o32_elf_gas.S @@ -0,0 +1,120 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | F20 | F22 | F24 | F26 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | F28 | F30 | S0 | S1 | S2 | S3 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | S4 | S5 | S6 | S7 | FP |hiddn| RA | PC | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | ABI ARGS | GP | FCTX| DATA| | * + * ------------------------------------------------- * + * * + * *****************************************************/ + +.file "ontop_mips32_o32_elf_gas.S" +.text +.globl ontop_fcontext +.align 2 +.type ontop_fcontext,@function +.ent ontop_fcontext +ontop_fcontext: + # reserve space on stack + addiu $sp, $sp, -96 + + sw $s0, 48($sp) # save S0 + sw $s1, 52($sp) # save S1 + sw $s2, 56($sp) # save S2 + sw $s3, 60($sp) # save S3 + sw $s4, 64($sp) # save S4 + sw $s5, 68($sp) # save S5 + sw $s6, 72($sp) # save S6 + sw $s7, 76($sp) # save S7 + sw $fp, 80($sp) # save FP + sw $a0, 84($sp) # save hidden, address of returned transfer_t + sw $ra, 88($sp) # save RA + sw $ra, 92($sp) # save RA as PC + +#if defined(__mips_hard_float) + s.d $f20, ($sp) # save F20 + s.d $f22, 8($sp) # save F22 + s.d $f24, 16($sp) # save F24 + s.d $f26, 24($sp) # save F26 + s.d $f28, 32($sp) # save F28 + s.d $f30, 40($sp) # save F30 +#endif + + # store SP (pointing to context-data) in A0 + move $a0, $sp + + # restore SP (pointing to context-data) from A1 + move $sp, $a1 + +#if defined(__mips_hard_float) + l.d $f20, ($sp) # restore F20 + l.d $f22, 8($sp) # restore F22 + l.d $f24, 16($sp) # restore F24 + l.d $f26, 24($sp) # restore F26 + l.d $f28, 32($sp) # restore F28 + l.d $f30, 40($sp) # restore F30 +#endif + + lw $s0, 48($sp) # restore S0 + lw $s1, 52($sp) # restore S1 + lw $s2, 56($sp) # restore S2 + lw $s3, 60($sp) # restore S3 + lw $s4, 64($sp) # restore S4 + lw $s5, 68($sp) # restore S5 + lw $s6, 72($sp) # restore S6 + lw $s7, 76($sp) # restore S7 + lw $fp, 80($sp) # restore FP + lw $v0, 84($sp) # restore hidden, address of returned transfer_t + lw $ra, 88($sp) # restore RA + + # load PC + move $t9, $a3 + + # adjust stack + addiu $sp, $sp, 96 + + # return transfer_t from jump + sw $a0, ($v0) # fctx of transfer_t + sw $a2, 4($v0) # data of transfer_t + # pass transfer_t as first arg in context function + # A0 == hidden, A1 == fctx, A2 == data + move $a1, $a0 + move $a0, $v0 + + # jump to context + jr $t9 +.end ontop_fcontext +.size ontop_fcontext, .-ontop_fcontext + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/ontop_mips64_n64_elf_gas.S b/lib/context_switcher/asm/fcontext/ontop_mips64_n64_elf_gas.S new file mode 100644 index 0000000..a8a4f8a --- /dev/null +++ b/lib/context_switcher/asm/fcontext/ontop_mips64_n64_elf_gas.S @@ -0,0 +1,120 @@ +/* + Copyright Jiaxun Yang 2018. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 8 | 16 | 24 | * + * ------------------------------------------------- * + * | F24 | F25 | F26 | F27 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 40 | 48 | 56 | * + * ------------------------------------------------- * + * | F28 | F29 | F30 | F31 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 72 | 80 | 88 | * + * ------------------------------------------------- * + * | S0 | S1 | S2 | S3 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | S4 | S5 | S6 | S7 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | * + * ------------------------------------------------- * + * | FP | GP | RA | PC | * + * ------------------------------------------------- * + * * + * *****************************************************/ + +.file "ontop_mips64_n64_elf_gas.S" +.text +.globl ontop_fcontext +.align 3 +.type ontop_fcontext,@function +.ent ontop_fcontext +ontop_fcontext: + # reserve space on stack + daddiu $sp, $sp, -160 + + sd $s0, 64($sp) # save S0 + sd $s1, 72($sp) # save S1 + sd $s2, 80($sp) # save S2 + sd $s3, 88($sp) # save S3 + sd $s4, 96($sp) # save S4 + sd $s5, 104($sp) # save S5 + sd $s6, 112($sp) # save S6 + sd $s7, 120($sp) # save S7 + sd $fp, 128($sp) # save FP + sd $ra, 144($sp) # save RA + sd $ra, 152($sp) # save RA as PC + + + s.d $f24, 0($sp) # save F24 + s.d $f25, 8($sp) # save F25 + s.d $f26, 16($sp) # save F26 + s.d $f27, 24($sp) # save F27 + s.d $f28, 32($sp) # save F28 + s.d $f29, 40($sp) # save F29 + s.d $f30, 48($sp) # save F30 + s.d $f31, 56($sp) # save F31 + + # store SP (pointing to context-data) in t0 + move $t0, $sp + + # restore SP (pointing to context-data) from a0 + move $sp, $a0 + + l.d $f24, 0($sp) # restore F24 + l.d $f25, 8($sp) # restore F25 + l.d $f26, 16($sp) # restore F26 + l.d $f27, 24($sp) # restore F27 + l.d $f28, 32($sp) # restore F28 + l.d $f29, 40($sp) # restore F29 + l.d $f30, 48($sp) # restore F30 + l.d $f31, 56($sp) # restore F31 + + ld $s0, 64($sp) # restore S0 + ld $s1, 72($sp) # restore S1 + ld $s2, 80($sp) # restore S2 + ld $s3, 88($sp) # restore S3 + ld $s4, 96($sp) # restore S4 + ld $s5, 104($sp) # restore S5 + ld $s6, 112($sp) # restore S6 + ld $s7, 120($sp) # restore S7 + ld $fp, 128($sp) # restore FP + ld $ra, 144($sp) # restore RA + + # load PC + move $t9, $a2 + + # adjust stack + daddiu $sp, $sp, 160 + + move $a0, $t0 # move param from t0 to a0 as param + + # jump to context + jr $t9 +.end ontop_fcontext +.size ontop_fcontext, .-ontop_fcontext + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/ontop_ppc32_ppc64_sysv_macho_gas.S b/lib/context_switcher/asm/fcontext/ontop_ppc32_ppc64_sysv_macho_gas.S new file mode 100644 index 0000000..4632f4c --- /dev/null +++ b/lib/context_switcher/asm/fcontext/ontop_ppc32_ppc64_sysv_macho_gas.S @@ -0,0 +1,16 @@ +/* + Copyright Sergue E. Leontiev 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +// Stub file for universal binary + +#if defined(__ppc__) + #include "ontop_ppc32_sysv_macho_gas.S" +#elif defined(__ppc64__) + #include "ontop_ppc64_sysv_macho_gas.S" +#else + #error "No arch's" +#endif diff --git a/lib/context_switcher/asm/fcontext/ontop_ppc32_sysv_elf_gas.S b/lib/context_switcher/asm/fcontext/ontop_ppc32_sysv_elf_gas.S new file mode 100644 index 0000000..464d99d --- /dev/null +++ b/lib/context_switcher/asm/fcontext/ontop_ppc32_sysv_elf_gas.S @@ -0,0 +1,193 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * |bchai|hiddn| fpscr | PC | CR | R14 | R15 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | R16 | R17 | R18 | R19 | R20 | R21 | R22 | R23 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | R24 | R25 | R26 | R27 | R28 | R29 | R30 | R31 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | F14 | F15 | F16 | F17 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | * + * ------------------------------------------------- * + * | F18 | F19 | F20 | F21 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 160 | 164 | 168 | 172 | 176 | 180 | 184 | 188 | * + * ------------------------------------------------- * + * | F22 | F23 | F24 | F25 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 192 | 196 | 200 | 204 | 208 | 212 | 216 | 220 | * + * ------------------------------------------------- * + * | F26 | F27 | F28 | F29 | * + * ------------------------------------------------- * + * ------------------------|------------ * + * | 224 | 228 | 232 | 236 | 240 | 244 | * + * ------------------------|------------ * + * | F30 | F31 |bchai| LR | * + * ------------------------|------------ * + * * + *******************************************************/ + +.file "ontop_ppc32_sysv_elf_gas.S" +.text +.globl ontop_fcontext +.align 2 +.type ontop_fcontext,@function +ontop_fcontext: + # Linux: ontop_fcontext( hidden transfer_t * R3, R4, R5, R6) + # Other: transfer_t R3:R4 = jump_fcontext( R3, R4, R5) + + mflr %r0 # return address from LR + mffs %f0 # FPSCR + mfcr %r8 # condition register + + stwu %r1, -240(%r1) # allocate stack space, R1 % 16 == 0 + stw %r0, 244(%r1) # save LR in caller's frame + +#ifdef __linux__ + stw %r3, 4(%r1) # hidden pointer +#endif + + stfd %f0, 8(%r1) # FPSCR + stw %r0, 16(%r1) # LR as PC + stw %r8, 20(%r1) # CR + + # Save registers R14 to R31. + # Don't change R2, the thread-local storage pointer. + # Don't change R13, the small data pointer. + stw %r14, 24(%r1) + stw %r15, 28(%r1) + stw %r16, 32(%r1) + stw %r17, 36(%r1) + stw %r18, 40(%r1) + stw %r19, 44(%r1) + stw %r20, 48(%r1) + stw %r21, 52(%r1) + stw %r22, 56(%r1) + stw %r23, 60(%r1) + stw %r24, 64(%r1) + stw %r25, 68(%r1) + stw %r26, 72(%r1) + stw %r27, 76(%r1) + stw %r28, 80(%r1) + stw %r29, 84(%r1) + stw %r30, 88(%r1) + stw %r31, 92(%r1) + + # Save registers F14 to F31 in slots with 8-byte alignment. + # 4-byte alignment may stall the pipeline of some processors. + # Less than 4 may cause alignment traps. + stfd %f14, 96(%r1) + stfd %f15, 104(%r1) + stfd %f16, 112(%r1) + stfd %f17, 120(%r1) + stfd %f18, 128(%r1) + stfd %f19, 136(%r1) + stfd %f20, 144(%r1) + stfd %f21, 152(%r1) + stfd %f22, 160(%r1) + stfd %f23, 168(%r1) + stfd %f24, 176(%r1) + stfd %f25, 184(%r1) + stfd %f26, 192(%r1) + stfd %f27, 200(%r1) + stfd %f28, 208(%r1) + stfd %f29, 216(%r1) + stfd %f30, 224(%r1) + stfd %f31, 232(%r1) + + # store RSP (pointing to context-data) in R7/R6 + # restore RSP (pointing to context-data) from R4/R3 +#ifdef __linux__ + mr %r7, %r1 + mr %r1, %r4 + lwz %r3, 4(%r1) # hidden pointer +#else + mr %r6, %r1 + mr %r1, %r3 +#endif + + # ignore PC at 16(%r1) + lfd %f0, 8(%r1) # FPSCR + lwz %r8, 20(%r1) # CR + + mtfsf 0xff, %f0 # restore FPSCR + mtcr %r8 # restore CR + + # restore R14 to R31 + lwz %r14, 24(%r1) + lwz %r15, 28(%r1) + lwz %r16, 32(%r1) + lwz %r17, 36(%r1) + lwz %r18, 40(%r1) + lwz %r19, 44(%r1) + lwz %r20, 48(%r1) + lwz %r21, 52(%r1) + lwz %r22, 56(%r1) + lwz %r23, 60(%r1) + lwz %r24, 64(%r1) + lwz %r25, 68(%r1) + lwz %r26, 72(%r1) + lwz %r27, 76(%r1) + lwz %r28, 80(%r1) + lwz %r29, 84(%r1) + lwz %r30, 88(%r1) + lwz %r31, 92(%r1) + + # restore F14 to F31 + lfd %f14, 96(%r1) + lfd %f15, 104(%r1) + lfd %f16, 112(%r1) + lfd %f17, 120(%r1) + lfd %f18, 128(%r1) + lfd %f19, 136(%r1) + lfd %f20, 144(%r1) + lfd %f21, 152(%r1) + lfd %f22, 160(%r1) + lfd %f23, 168(%r1) + lfd %f24, 176(%r1) + lfd %f25, 184(%r1) + lfd %f26, 192(%r1) + lfd %f27, 200(%r1) + lfd %f28, 208(%r1) + lfd %f29, 216(%r1) + lfd %f30, 224(%r1) + lfd %f31, 232(%r1) + + # restore LR from caller's frame + lwz %r0, 244(%r1) + mtlr %r0 + + # adjust stack + addi %r1, %r1, 240 + + # see tail_ppc32_sysv_elf_gas.cpp + # Linux: fcontext_ontop_tail( hidden transfer_t * R3, R4, R5, R6, R7) + # Other: transfer_t R3:R4 = fcontext_ontop_tail( R3, R4, R5, R6) + b ontop_fcontext_tail +.size ontop_fcontext, .-ontop_fcontext + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/ontop_ppc32_sysv_macho_gas.S b/lib/context_switcher/asm/fcontext/ontop_ppc32_sysv_macho_gas.S new file mode 100644 index 0000000..1eb5f93 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/ontop_ppc32_sysv_macho_gas.S @@ -0,0 +1,201 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/****************************************************** + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | F14 | F15 | F16 | F17 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | F18 | F19 | F20 | F21 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | F22 | F23 | F24 | F25 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | F26 | F27 | F28 | F29 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | * + * ------------------------------------------------- * + * | F30 | F31 | fpscr | R13 | R14 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | * + * ------------------------------------------------- * + * | 160 | 164 | 168 | 172 | 176 | 180 | 184 | 188 | * + * ------------------------------------------------- * + * | R15 | R16 | R17 | R18 | R19 | R20 | R21 | R22 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | * + * ------------------------------------------------- * + * | 192 | 196 | 200 | 204 | 208 | 212 | 216 | 220 | * + * ------------------------------------------------- * + * | R23 | R24 | R25 | R26 | R27 | R28 | R29 | R30 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | * + * ------------------------------------------------- * + * | 224 | 228 | 232 | 236 | 240 | 244 | 248 | 252 | * + * ------------------------------------------------- * + * | R31 |hiddn| CR | LR | PC |bchai|linkr| FCTX| * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 64 | | * + * ------------------------------------------------- * + * | 256 | | * + * ------------------------------------------------- * + * | DATA| | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.globl _ontop_fcontext +.align 2 +_ontop_fcontext: + # reserve space on stack + subi r1, r1, 244 + + stfd f14, 0(r1) # save F14 + stfd f15, 8(r1) # save F15 + stfd f16, 16(r1) # save F16 + stfd f17, 24(r1) # save F17 + stfd f18, 32(r1) # save F18 + stfd f19, 40(r1) # save F19 + stfd f20, 48(r1) # save F20 + stfd f21, 56(r1) # save F21 + stfd f22, 64(r1) # save F22 + stfd f23, 72(r1) # save F23 + stfd f24, 80(r1) # save F24 + stfd f25, 88(r1) # save F25 + stfd f26, 96(r1) # save F26 + stfd f27, 104(r1) # save F27 + stfd f28, 112(r1) # save F28 + stfd f29, 120(r1) # save F29 + stfd f30, 128(r1) # save F30 + stfd f31, 136(r1) # save F31 + mffs f0 # load FPSCR + stfd f0, 144(r1) # save FPSCR + + stw r13, 152(r1) # save R13 + stw r14, 156(r1) # save R14 + stw r15, 160(r1) # save R15 + stw r16, 164(r1) # save R16 + stw r17, 168(r1) # save R17 + stw r18, 172(r1) # save R18 + stw r19, 176(r1) # save R19 + stw r20, 180(r1) # save R20 + stw r21, 184(r1) # save R21 + stw r22, 188(r1) # save R22 + stw r23, 192(r1) # save R23 + stw r24, 196(r1) # save R24 + stw r25, 200(r1) # save R25 + stw r26, 204(r1) # save R26 + stw r27, 208(r1) # save R27 + stw r28, 212(r1) # save R28 + stw r29, 216(r1) # save R29 + stw r30, 220(r1) # save R30 + stw r31, 224(r1) # save R31 + stw r3, 228(r1) # save hidden + + # save CR + mfcr r0 + stw r0, 232(r1) + # save LR + mflr r0 + stw r0, 236(r1) + # save LR as PC + stw r0, 240(r1) + + # store RSP (pointing to context-data) in R7 + mr r7, r1 + + # restore RSP (pointing to context-data) from R4 + mr r1, r4 + + lfd f14, 0(r1) # restore F14 + lfd f15, 8(r1) # restore F15 + lfd f16, 16(r1) # restore F16 + lfd f17, 24(r1) # restore F17 + lfd f18, 32(r1) # restore F18 + lfd f19, 40(r1) # restore F19 + lfd f20, 48(r1) # restore F20 + lfd f21, 56(r1) # restore F21 + lfd f22, 64(r1) # restore F22 + lfd f23, 72(r1) # restore F23 + lfd f24, 80(r1) # restore F24 + lfd f25, 88(r1) # restore F25 + lfd f26, 96(r1) # restore F26 + lfd f27, 104(r1) # restore F27 + lfd f28, 112(r1) # restore F28 + lfd f29, 120(r1) # restore F29 + lfd f30, 128(r1) # restore F30 + lfd f31, 136(r1) # restore F31 + lfd f0, 144(r1) # load FPSCR + mtfsf 0xff, f0 # restore FPSCR + + lwz r13, 152(r1) # restore R13 + lwz r14, 156(r1) # restore R14 + lwz r15, 160(r1) # restore R15 + lwz r16, 164(r1) # restore R16 + lwz r17, 168(r1) # restore R17 + lwz r18, 172(r1) # restore R18 + lwz r19, 176(r1) # restore R19 + lwz r20, 180(r1) # restore R20 + lwz r21, 184(r1) # restore R21 + lwz r22, 188(r1) # restore R22 + lwz r23, 192(r1) # restore R23 + lwz r24, 196(r1) # restore R24 + lwz r25, 200(r1) # restore R25 + lwz r26, 204(r1) # restore R26 + lwz r27, 208(r1) # restore R27 + lwz r28, 212(r1) # restore R28 + lwz r29, 216(r1) # restore R29 + lwz r30, 220(r1) # restore R30 + lwz r31, 224(r1) # restore R31 + lwz r4, 228(r1) # restore hidden + + # restore CR + lwz r0, 232(r1) + mtcr r0 + # restore LR + lwz r0, 236(r1) + mtlr r0 + # ignore PC + + # adjust stack + addi r1, r1, 244 + + # return transfer_t + stw r7, 0(r4) + stw r5, 4(r4) + + # restore CTR + mtctr r6 + + # jump to ontop-function + bctr diff --git a/lib/context_switcher/asm/fcontext/ontop_ppc32_sysv_xcoff_gas.S b/lib/context_switcher/asm/fcontext/ontop_ppc32_sysv_xcoff_gas.S new file mode 100644 index 0000000..a3c9fa2 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/ontop_ppc32_sysv_xcoff_gas.S @@ -0,0 +1,203 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/****************************************************** + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | F14 | F15 | F16 | F17 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | F18 | F19 | F20 | F21 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | F22 | F23 | F24 | F25 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | F26 | F27 | F28 | F29 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | * + * ------------------------------------------------- * + * | F30 | F31 | fpscr | R13 | R14 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | * + * ------------------------------------------------- * + * | 160 | 164 | 168 | 172 | 176 | 180 | 184 | 188 | * + * ------------------------------------------------- * + * | R15 | R16 | R17 | R18 | R19 | R20 | R21 | R22 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | * + * ------------------------------------------------- * + * | 192 | 196 | 200 | 204 | 208 | 212 | 216 | 220 | * + * ------------------------------------------------- * + * | R23 | R24 | R25 | R26 | R27 | R28 | R29 | R30 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | * + * ------------------------------------------------- * + * | 224 | 228 | 232 | 236 | 240 | 244 | 248 | 252 | * + * ------------------------------------------------- * + * | R31 |hiddn| CR | LR | PC |bchai|linkr| FCTX| * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 64 | | * + * ------------------------------------------------- * + * | 256 | | * + * ------------------------------------------------- * + * | DATA| | * + * ------------------------------------------------- * + * * + *******************************************************/ +.globl .ontop_fcontext +.globl ontop_fcontext[DS] +.align 2 +.csect ontop_fcontext[DS] +ontop_fcontext: + .long .ontop_fcontext +.ontop_fcontext: + # reserve space on stack + subi r1, r1, 244 + + stfd f14, 0(r1) # save F14 + stfd f15, 8(r1) # save F15 + stfd f16, 16(r1) # save F16 + stfd f17, 24(r1) # save F17 + stfd f18, 32(r1) # save F18 + stfd f19, 40(r1) # save F19 + stfd f20, 48(r1) # save F20 + stfd f21, 56(r1) # save F21 + stfd f22, 64(r1) # save F22 + stfd f23, 72(r1) # save F23 + stfd f24, 80(r1) # save F24 + stfd f25, 88(r1) # save F25 + stfd f26, 96(r1) # save F26 + stfd f27, 104(r1) # save F27 + stfd f28, 112(r1) # save F28 + stfd f29, 120(r1) # save F29 + stfd f30, 128(r1) # save F30 + stfd f31, 136(r1) # save F31 + mffs f0 # load FPSCR + stfd f0, 144(r1) # save FPSCR + + stw r13, 152(r1) # save R13 + stw r14, 156(r1) # save R14 + stw r15, 160(r1) # save R15 + stw r16, 164(r1) # save R16 + stw r17, 168(r1) # save R17 + stw r18, 172(r1) # save R18 + stw r19, 176(r1) # save R19 + stw r20, 180(r1) # save R20 + stw r21, 184(r1) # save R21 + stw r22, 188(r1) # save R22 + stw r23, 192(r1) # save R23 + stw r24, 196(r1) # save R24 + stw r25, 200(r1) # save R25 + stw r26, 204(r1) # save R26 + stw r27, 208(r1) # save R27 + stw r28, 212(r1) # save R28 + stw r29, 216(r1) # save R29 + stw r30, 220(r1) # save R30 + stw r31, 224(r1) # save R31 + stw r3, 228(r1) # save hidden + + # save CR + mfcr r0 + stw r0, 232(r1) + # save LR + mflr r0 + stw r0, 236(r1) + # save LR as PC + stw r0, 240(r1) + + # store RSP (pointing to context-data) in R7 + mr r7, r1 + + # restore RSP (pointing to context-data) from R4 + mr r1, r4 + + lfd f14, 0(r1) # restore F14 + lfd f15, 8(r1) # restore F15 + lfd f16, 16(r1) # restore F16 + lfd f17, 24(r1) # restore F17 + lfd f18, 32(r1) # restore F18 + lfd f19, 40(r1) # restore F19 + lfd f20, 48(r1) # restore F20 + lfd f21, 56(r1) # restore F21 + lfd f22, 64(r1) # restore F22 + lfd f23, 72(r1) # restore F23 + lfd f24, 80(r1) # restore F24 + lfd f25, 88(r1) # restore F25 + lfd f26, 96(r1) # restore F26 + lfd f27, 104(r1) # restore F27 + lfd f28, 112(r1) # restore F28 + lfd f29, 120(r1) # restore F29 + lfd f30, 128(r1) # restore F30 + lfd f31, 136(r1) # restore F31 + lfd f0, 144(r1) # load FPSCR + mtfsf 0xff, f0 # restore FPSCR + + lwz r13, 152(r1) # restore R13 + lwz r14, 156(r1) # restore R14 + lwz r15, 160(r1) # restore R15 + lwz r16, 164(r1) # restore R16 + lwz r17, 168(r1) # restore R17 + lwz r18, 172(r1) # restore R18 + lwz r19, 176(r1) # restore R19 + lwz r20, 180(r1) # restore R20 + lwz r21, 184(r1) # restore R21 + lwz r22, 188(r1) # restore R22 + lwz r23, 192(r1) # restore R23 + lwz r24, 196(r1) # restore R24 + lwz r25, 200(r1) # restore R25 + lwz r26, 204(r1) # restore R26 + lwz r27, 208(r1) # restore R27 + lwz r28, 212(r1) # restore R28 + lwz r29, 216(r1) # restore R29 + lwz r30, 220(r1) # restore R30 + lwz r31, 224(r1) # restore R31 + lwz r4, 228(r1) # restore hidden + + # restore CR + lwz r0, 232(r1) + mtcr r0 + # restore LR + lwz r0, 236(r1) + mtlr r0 + # ignore PC + + # adjust stack + addi r1, r1, 244 + + # return transfer_t + stw r7, 0(r4) + stw r5, 4(r4) + + # restore CTR + mtctr r6 + + # jump to ontop-function + bctr diff --git a/lib/context_switcher/asm/fcontext/ontop_ppc64_sysv_elf_gas.S b/lib/context_switcher/asm/fcontext/ontop_ppc64_sysv_elf_gas.S new file mode 100644 index 0000000..cd97f45 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/ontop_ppc64_sysv_elf_gas.S @@ -0,0 +1,244 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | TOC | R14 | R15 | R16 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | R17 | R18 | R19 | R20 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | R21 | R22 | R23 | R24 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | R25 | R26 | R27 | R28 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | * + * ------------------------------------------------- * + * | R29 | R30 | R31 | hidden | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | * + * ------------------------------------------------- * + * | 160 | 164 | 168 | 172 | 176 | 180 | 184 | 188 | * + * ------------------------------------------------- * + * | CR | LR | PC | back-chain| * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | * + * ------------------------------------------------- * + * | 192 | 196 | 200 | 204 | 208 | 212 | 216 | 220 | * + * ------------------------------------------------- * + * | cr saved | lr saved | compiler | linker | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | * + * ------------------------------------------------- * + * | 224 | 228 | 232 | 236 | 240 | 244 | 248 | 252 | * + * ------------------------------------------------- * + * | TOC saved | FCTX | DATA | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.file "ontop_ppc64_sysv_elf_gas.S" +.globl ontop_fcontext +#if _CALL_ELF == 2 + .text + .align 2 +ontop_fcontext: + addis %r2, %r12, .TOC.-ontop_fcontext@ha + addi %r2, %r2, .TOC.-ontop_fcontext@l + .localentry ontop_fcontext, . - ontop_fcontext +#else + .section ".opd","aw" + .align 3 +ontop_fcontext: +# ifdef _CALL_LINUX + .quad .L.ontop_fcontext,.TOC.@tocbase,0 + .type ontop_fcontext,@function + .text + .align 2 +.L.ontop_fcontext: +# else + .hidden .ontop_fcontext + .globl .ontop_fcontext + .quad .ontop_fcontext,.TOC.@tocbase,0 + .size ontop_fcontext,24 + .type .ontop_fcontext,@function + .text + .align 2 +.ontop_fcontext: +# endif +#endif + # reserve space on stack + subi %r1, %r1, 184 + +#if _CALL_ELF != 2 + std %r2, 0(%r1) # save TOC +#endif + std %r14, 8(%r1) # save R14 + std %r15, 16(%r1) # save R15 + std %r16, 24(%r1) # save R16 + std %r17, 32(%r1) # save R17 + std %r18, 40(%r1) # save R18 + std %r19, 48(%r1) # save R19 + std %r20, 56(%r1) # save R20 + std %r21, 64(%r1) # save R21 + std %r22, 72(%r1) # save R22 + std %r23, 80(%r1) # save R23 + std %r24, 88(%r1) # save R24 + std %r25, 96(%r1) # save R25 + std %r26, 104(%r1) # save R26 + std %r27, 112(%r1) # save R27 + std %r28, 120(%r1) # save R28 + std %r29, 128(%r1) # save R29 + std %r30, 136(%r1) # save R30 + std %r31, 144(%r1) # save R31 +#if _CALL_ELF != 2 + std %r3, 152(%r1) # save hidden +#endif + + # save CR + mfcr %r0 + std %r0, 160(%r1) + # save LR + mflr %r0 + std %r0, 168(%r1) + # save LR as PC + std %r0, 176(%r1) + + # store RSP (pointing to context-data) in R7 + mr %r7, %r1 + +#if _CALL_ELF == 2 + # restore RSP (pointing to context-data) from R3 + mr %r1, %r3 +#else + # restore RSP (pointing to context-data) from R4 + mr %r1, %r4 +#endif + + ld %r14, 8(%r1) # restore R14 + ld %r15, 16(%r1) # restore R15 + ld %r16, 24(%r1) # restore R16 + ld %r17, 32(%r1) # restore R17 + ld %r18, 40(%r1) # restore R18 + ld %r19, 48(%r1) # restore R19 + ld %r20, 56(%r1) # restore R20 + ld %r21, 64(%r1) # restore R21 + ld %r22, 72(%r1) # restore R22 + ld %r23, 80(%r1) # restore R23 + ld %r24, 88(%r1) # restore R24 + ld %r25, 96(%r1) # restore R25 + ld %r26, 104(%r1) # restore R26 + ld %r27, 112(%r1) # restore R27 + ld %r28, 120(%r1) # restore R28 + ld %r29, 128(%r1) # restore R29 + ld %r30, 136(%r1) # restore R30 + ld %r31, 144(%r1) # restore R31 +#if _CALL_ELF != 2 + ld %r3, 152(%r1) # restore hidden +#endif + + # restore CR + ld %r0, 160(%r1) + mtcr %r0 + +#if _CALL_ELF == 2 + # restore CTR + mtctr %r5 + + # store cb entrypoint in %r12, used for TOC calculation + mr %r12, %r5 + + # copy transfer_t into ontop_fn arg registers + mr %r3, %r7 + # arg pointer already in %r4 +#else + # copy transfer_t into ontop_fn arg registers + mr %r4, %r7 + # arg pointer already in %r5 + # hidden arg already in %r3 + + # restore CTR + ld %r7, 0(%r6) + mtctr %r7 + # restore TOC + ld %r2, 8(%r6) + + # zero in r3 indicates first jump to context-function + cmpdi %r3, 0 + beq use_entry_arg +#endif + +return_to_ctx: + # restore LR + ld %r0, 168(%r1) + mtlr %r0 + + # adjust stack + addi %r1, %r1, 184 + + # jump to context + bctr + +#if _CALL_ELF == 2 + .size ontop_fcontext, .-ontop_fcontext +#else +use_entry_arg: + # compute return-value struct address + # (passed has hidden arg to ontop_fn) + addi %r3, %r1, 8 + + # jump to context and update LR + bctrl + + # restore CTR + ld %r7, 176(%r1) + mtctr %r7 +#if _CALL_ELF != 2 + # restore TOC + ld %r2, 0(%r1) +#endif + + # copy returned transfer_t into entry_fn arg registers + ld %r3, 8(%r1) + ld %r4, 16(%r1) + + b return_to_ctx +# ifdef _CALL_LINUX + .size .ontop_fcontext, .-.L.ontop_fcontext +# else + .size .ontop_fcontext, .-.ontop_fcontext +# endif +#endif + + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/ontop_ppc64_sysv_macho_gas.S b/lib/context_switcher/asm/fcontext/ontop_ppc64_sysv_macho_gas.S new file mode 100644 index 0000000..5de8acd --- /dev/null +++ b/lib/context_switcher/asm/fcontext/ontop_ppc64_sysv_macho_gas.S @@ -0,0 +1,151 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | * + * ------------------------------------------------- * + * | TOC | R14 | R15 | R16 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | * + * ------------------------------------------------- * + * | R17 | R18 | R19 | R20 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | * + * ------------------------------------------------- * + * | R21 | R22 | R23 | R24 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | * + * ------------------------------------------------- * + * | R25 | R26 | R27 | R28 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | * + * ------------------------------------------------- * + * | R29 | R30 | R31 | hidden | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | * + * ------------------------------------------------- * + * | 160 | 164 | 168 | 172 | 176 | 180 | 184 | 188 | * + * ------------------------------------------------- * + * | CR | LR | PC | back-chain| * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | * + * ------------------------------------------------- * + * | 192 | 196 | 200 | 204 | 208 | 212 | 216 | 220 | * + * ------------------------------------------------- * + * | cr saved | lr saved | compiler | linker | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | * + * ------------------------------------------------- * + * | 224 | 228 | 232 | 236 | 240 | 244 | 248 | 252 | * + * ------------------------------------------------- * + * | TOC saved | FCTX | DATA | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.text +.align 2 +.globl _ontop_fcontext + +_ontop_fcontext: + ; reserve space on stack + subi r1, r1, 184 + + std r14, 8(r1) ; save R14 + std r15, 16(r1) ; save R15 + std r16, 24(r1) ; save R16 + std r17, 32(r1) ; save R17 + std r18, 40(r1) ; save R18 + std r19, 48(r1) ; save R19 + std r20, 56(r1) ; save R20 + std r21, 64(r1) ; save R21 + std r22, 72(r1) ; save R22 + std r23, 80(r1) ; save R23 + std r24, 88(r1) ; save R24 + std r25, 96(r1) ; save R25 + std r26, 104(r1) ; save R26 + std r27, 112(r1) ; save R27 + std r28, 120(r1) ; save R28 + std r29, 128(r1) ; save R29 + std r30, 136(r1) ; save R30 + std r31, 144(r1) ; save R31 + std r3, 152(r1) ; save hidden + + ; save CR + mfcr r0 + std r0, 160(r1) + ; save LR + mflr r0 + std r0, 168(r1) + ; save LR as PC + std r0, 176(r1) + + ; store RSP (pointing to context-data) in R7 + mr r7, r1 + + ; restore RSP (pointing to context-data) from R4 + mr r1, r4 + + ld r14, 8(r1) ; restore R14 + ld r15, 16(r1) ; restore R15 + ld r16, 24(r1) ; restore R16 + ld r17, 32(r1) ; restore R17 + ld r18, 40(r1) ; restore R18 + ld r19, 48(r1) ; restore R19 + ld r20, 56(r1) ; restore R20 + ld r21, 64(r1) ; restore R21 + ld r22, 72(r1) ; restore R22 + ld r23, 80(r1) ; restore R23 + ld r24, 88(r1) ; restore R24 + ld r25, 96(r1) ; restore R25 + ld r26, 104(r1) ; restore R26 + ld r27, 112(r1) ; restore R27 + ld r28, 120(r1) ; restore R28 + ld r29, 128(r1) ; restore R29 + ld r30, 136(r1) ; restore R30 + ld r31, 144(r1) ; restore R31 + ld r4, 152(r1) ; restore hidden + + ; restore CR + ld r0, 160(r1) + mtcr r0 + ; restore LR + ld r0, 168(r1) + mtlr r0 + ; ignore PC + + ; adjust stack + addi r1, r1, 184 + + ; return transfer_t + std r7, 0(r4) + std r5, 8(r4) + + ; restore CTR + mtctr r6 + + ; jump to context + bctr diff --git a/lib/context_switcher/asm/fcontext/ontop_ppc64_sysv_xcoff_gas.S b/lib/context_switcher/asm/fcontext/ontop_ppc64_sysv_xcoff_gas.S new file mode 100644 index 0000000..93f8c23 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/ontop_ppc64_sysv_xcoff_gas.S @@ -0,0 +1,83 @@ +.align 2 +.globl .ontop_fcontext +.ontop_fcontext: + # reserve space on stack + subi 1, 1, 184 + + std 13, 0(1) # save R13 + std 14, 8(1) # save R14 + std 15, 16(1) # save R15 + std 16, 24(1) # save R16 + std 17, 32(1) # save R17 + std 18, 40(1) # save R18 + std 19, 48(1) # save R19 + std 20, 56(1) # save R20 + std 21, 64(1) # save R21 + std 22, 72(1) # save R22 + std 23, 80(1) # save R23 + std 24, 88(1) # save R24 + std 25, 96(1) # save R25 + std 26, 104(1) # save R26 + std 27, 112(1) # save R27 + std 29, 120(1) # save R28 + std 29, 128(1) # save R29 + std 30, 136(1) # save R30 + std 31, 144(1) # save R31 + std 3, 152(1) # save hidden + + # save CR + mfcr 0 + std 0, 160(1) + # save LR + mflr 0 + std 0, 168(1) + # save LR as PC + std 0, 176(1) + + # store RSP (pointing to context-data) in R7 + mr 7, 1 + + # restore RSP (pointing to context-data) from R4 + mr 1, 4 + + ld 13, 0(1) # restore R13 + ld 14, 8(1) # restore R14 + ld 15, 16(1) # restore R15 + ld 16, 24(1) # restore R16 + ld 17, 32(1) # restore R17 + ld 18, 40(1) # restore R18 + ld 19, 48(1) # restore R19 + ld 20, 56(1) # restore R20 + ld 21, 64(1) # restore R21 + ld 22, 72(1) # restore R22 + ld 23, 80(1) # restore R23 + ld 24, 88(1) # restore R24 + ld 25, 96(1) # restore R25 + ld 26, 104(1) # restore R26 + ld 27, 112(1) # restore R27 + ld 28, 120(1) # restore R28 + ld 29, 128(1) # restore R29 + ld 30, 136(1) # restore R30 + ld 31, 144(1) # restore R31 + ld 4, 152(1) # restore hidden + + # restore CR + ld 0, 160(1) + mtcr 0 + # restore LR + ld 0, 168(1) + mtlr 0 + # ignore PC + + # adjust stack + addi 1, 1, 184 + + # return transfer_t + std 7, 0(4) + std 5, 8(4) + + # restore CTR + mtctr 6 + + # jump to context + bctr diff --git a/lib/context_switcher/asm/fcontext/ontop_riscv64_sysv_elf_gas.S b/lib/context_switcher/asm/fcontext/ontop_riscv64_sysv_elf_gas.S new file mode 100644 index 0000000..61ab46b --- /dev/null +++ b/lib/context_switcher/asm/fcontext/ontop_riscv64_sysv_elf_gas.S @@ -0,0 +1,149 @@ +/* + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | fs0 | fs1 | fs2 | fs3 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | fs4 | fs5 | fs6 | fs7 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 0x40| 0x44| 0x48| 0x4c| 0x50| 0x54| 0x58| 0x5c| * + * ------------------------------------------------- * + * | fs8 | fs9 | fs10 | fs11 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 0x60| 0x64| 0x68| 0x6c| 0x70| 0x74| 0x78| 0x7c| * + * ------------------------------------------------- * + * | s0 | s1 | s2 | s3 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 0x80| 0x84| 0x88| 0x8c| 0x90| 0x94| 0x98| 0x9c| * + * ------------------------------------------------- * + * | s4 | s5 | s6 | s7 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | * + * ------------------------------------------------- * + * | 0xa0| 0xa4| 0xa8| 0xac| 0xb0| 0xb4| 0xb8| 0xbc| * + * ------------------------------------------------- * + * | s8 | s9 | s10 | s11 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 48 | 49 | 50 | 51 | | | | | * + * ------------------------------------------------- * + * | 0xc0| 0xc4| 0xc8| 0xcc| | | | | * + * ------------------------------------------------- * + * | ra | pc | | | * + * ------------------------------------------------- * + * * + *******************************************************/ + +.file "ontop_riscv64_sysv_elf_gas.S" +.text +.align 1 +.global ontop_fcontext +.type ontop_fcontext, %function +ontop_fcontext: + # prepare stack for GP + FPU + addi sp, sp, -0xd0 + + # save fs0 - fs11 + fsd fs0, 0x00(sp) + fsd fs1, 0x08(sp) + fsd fs2, 0x10(sp) + fsd fs3, 0x18(sp) + fsd fs4, 0x20(sp) + fsd fs5, 0x28(sp) + fsd fs6, 0x30(sp) + fsd fs7, 0x38(sp) + fsd fs8, 0x40(sp) + fsd fs9, 0x48(sp) + fsd fs10, 0x50(sp) + fsd fs11, 0x58(sp) + + # save s0-s11, ra + sd s0, 0x60(sp) + sd s1, 0x68(sp) + sd s2, 0x70(sp) + sd s3, 0x78(sp) + sd s4, 0x80(sp) + sd s5, 0x88(sp) + sd s6, 0x90(sp) + sd s7, 0x98(sp) + sd s8, 0xa0(sp) + sd s9, 0xa8(sp) + sd s10, 0xb0(sp) + sd s11, 0xb8(sp) + sd ra, 0xc0(sp) + + # save RA as PC + sd ra, 0xc8(sp) + + # store SP (pointing to context-data) in A3 + mv a3, sp + + # restore SP (pointing to context-data) from A0 + mv sp, a0 + + # load fs0 - fs11 + fld fs0, 0x00(sp) + fld fs1, 0x08(sp) + fld fs2, 0x10(sp) + fld fs3, 0x18(sp) + fld fs4, 0x20(sp) + fld fs5, 0x28(sp) + fld fs6, 0x30(sp) + fld fs7, 0x38(sp) + fld fs8, 0x40(sp) + fld fs9, 0x48(sp) + fld fs10, 0x50(sp) + fld fs11, 0x58(sp) + + # load s0-s11,ra + ld s0, 0x60(sp) + ld s1, 0x68(sp) + ld s2, 0x70(sp) + ld s3, 0x78(sp) + ld s4, 0x80(sp) + ld s5, 0x88(sp) + ld s6, 0x90(sp) + ld s7, 0x98(sp) + ld s8, 0xa0(sp) + ld s9, 0xa8(sp) + ld s10, 0xb0(sp) + ld s11, 0xb8(sp) + ld ra, 0xc0(sp) + + # return transfer_t from jump + # pass transfer_t as first arg in context function + # a0 == FCTX, a1 == DATA + mv a0, a3 + + # skip pc + # restore stack from GP + FPU + addi sp, sp, 0xd0 + + # jump to ontop-function + jr a2 +.size ontop_fcontext,.-ontop_fcontext +# Mark that we don't need executable stack. +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/ontop_s390x_sysv_elf_gas.S b/lib/context_switcher/asm/fcontext/ontop_s390x_sysv_elf_gas.S new file mode 100644 index 0000000..4488654 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/ontop_s390x_sysv_elf_gas.S @@ -0,0 +1,112 @@ +/******************************************************* +* * +* ------------------------------------------------- * +* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * +* ------------------------------------------------- * +* | 0 | 8 | 16 | 24 | * +* ------------------------------------------------- * +* | R6 | R7 | R8 | R9 | * +* ------------------------------------------------- * +* ------------------------------------------------- * +* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * +* ------------------------------------------------- * +* | 32 | 40 | 48 | 56 | * +* ------------------------------------------------- * +* | R10 | R11 | R12 | R13 | * +* ------------------------------------------------- * +* ------------------------------------------------- * +* | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * +* ------------------------------------------------- * +* | 64 | 72 | 80 | 88 | * +* ------------------------------------------------- * +* | R14/LR | R15 | F1 | F3 | * +* ------------------------------------------------- * +* ------------------------------------------------- * +* | 24 | 25 | 26 | 27 | 28 | 29 | | * +* ------------------------------------------------- * +* | 96 | 104 | 112 | 120 | * +* ------------------------------------------------- * +* | F5 | F7 | PC | | * +* ------------------------------------------------- * +* *****************************************************/ + +.file "ontop_s390x_sysv_elf_gas.S" +.text +.align 4 # According to the sample code in the ELF ABI docs +.global ontop_fcontext +.type ontop_fcontext, @function + +#define GR_OFFSET 0 +#define LR_OFFSET 64 +#define SP_OFFSET 72 +#define FP_OFFSET 80 +#define PC_OFFSET 112 +#define L_CTX 120 + +ontop_fcontext: + + # Reserved the space for stack to store the data of current context + # before we jump to the new context. + aghi %r15,-L_CTX + + # save the registers to the stack + stmg %r6, %r15, GR_OFFSET(%r15) + + # save the floating point registers + std %f0,FP_OFFSET(%r15) + std %f3,FP_OFFSET+8(%r15) + std %f5,FP_OFFSET+16(%r15) + std %f7,FP_OFFSET+24(%r15) + # Save LR as PC + stg %r14,PC_OFFSET(%r15) + + # Store the SP pointing to the old context-data into R0 + lgr %r0,%r15 + + # Get the SP pointing to the new context-data + # Note: Since the return type of the jump_fcontext is struct whose + # size is more than 8. The compiler automatically passes the + # address of the transfer_t where the data needs to store into R2. + + # Hence the first param passed to the jump_fcontext which represent + # the fctx we want to switch to is present in R3 + # R2 --> Address of the return transfer_t struct + # R3 --> Context we want to switch to + # R4 --> Data + lgr %r15,%r3 + + # Load the registers with the data present in context-data of the + # context we are going to switch to + lmg %r6,%r15,GR_OFFSET(%r15) + + # Restore Floating point registers + ld %f1,FP_OFFSET(%r15) + ld %f3,FP_OFFSET+8(%r15) + ld %f5,FP_OFFSET+16(%r15) + ld %f7,FP_OFFSET+24(%r15) + + # Skip PC + + # Adjust the stack + aghi %r15,L_CTX + + # R2 --> Address where the return transfer_t is stored + # R0 --> FCTX + # R4 --> DATA + # R5 --> Context function + + # Store the elements to return transfer_t + stg %r15, 0(%r2) + stg %r4, 8(%r2) + + # Note: The address in R2 points to the place where the return + # transfer_t is stored. Since context_function take transfer_t + # as first parameter. And R2 is the register which holds the + # first parameter value. + + #jump to context function + br %r5 + +.size ontop_fcontext,.-ontop_fcontext +# Mark that we don't need executable stack. +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/ontop_x86_64_ms_pe_gas.asm b/lib/context_switcher/asm/fcontext/ontop_x86_64_ms_pe_gas.asm new file mode 100644 index 0000000..02e040c --- /dev/null +++ b/lib/context_switcher/asm/fcontext/ontop_x86_64_ms_pe_gas.asm @@ -0,0 +1,211 @@ +/* + Copyright Oliver Kowalke 2009. + Copyright Thomas Sailer 2013. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/************************************************************************************* +* ---------------------------------------------------------------------------------- * +* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * +* ---------------------------------------------------------------------------------- * +* | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * +* ---------------------------------------------------------------------------------- * +* | SEE registers (XMM6-XMM15) | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * +* ---------------------------------------------------------------------------------- * +* | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | * +* ---------------------------------------------------------------------------------- * +* | SEE registers (XMM6-XMM15) | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * +* ---------------------------------------------------------------------------------- * +* | 0xe40 | 0x44 | 0x48 | 0x4c | 0x50 | 0x54 | 0x58 | 0x5c | * +* ---------------------------------------------------------------------------------- * +* | SEE registers (XMM6-XMM15) | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * +* ---------------------------------------------------------------------------------- * +* | 0x60 | 0x64 | 0x68 | 0x6c | 0x70 | 0x74 | 0x78 | 0x7c | * +* ---------------------------------------------------------------------------------- * +* | SEE registers (XMM6-XMM15) | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 32 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | * +* ---------------------------------------------------------------------------------- * +* | 0x80 | 0x84 | 0x88 | 0x8c | 0x90 | 0x94 | 0x98 | 0x9c | * +* ---------------------------------------------------------------------------------- * +* | SEE registers (XMM6-XMM15) | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | * +* ---------------------------------------------------------------------------------- * +* | 0xa0 | 0xa4 | 0xa8 | 0xac | 0xb0 | 0xb4 | 0xb8 | 0xbc | * +* ---------------------------------------------------------------------------------- * +* | fc_mxcsr|fc_x87_cw| | fbr_strg | fc_dealloc | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | * +* ---------------------------------------------------------------------------------- * +* | 0xc0 | 0xc4 | 0xc8 | 0xcc | 0xd0 | 0xd4 | 0xd8 | 0xdc | * +* ---------------------------------------------------------------------------------- * +* | limit | base | R12 | R13 | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | * +* ---------------------------------------------------------------------------------- * +* | 0xe0 | 0xe4 | 0xe8 | 0xec | 0xf0 | 0xf4 | 0xf8 | 0xfc | * +* ---------------------------------------------------------------------------------- * +* | R14 | R15 | RDI | RSI | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | * +* ---------------------------------------------------------------------------------- * +* | 0x100 | 0x104 | 0x108 | 0x10c | 0x110 | 0x114 | 0x118 | 0x11c | * +* ---------------------------------------------------------------------------------- * +* | RBX | RBP | hidden | RIP | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | * +* ---------------------------------------------------------------------------------- * +* | 0x120 | 0x124 | 0x128 | 0x12c | 0x130 | 0x134 | 0x138 | 0x13c | * +* ---------------------------------------------------------------------------------- * +* | parameter area | * +* ---------------------------------------------------------------------------------- * +* ---------------------------------------------------------------------------------- * +* | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | * +* ---------------------------------------------------------------------------------- * +* | 0x140 | 0x144 | 0x148 | 0x14c | 0x150 | 0x154 | 0x158 | 0x15c | * +* ---------------------------------------------------------------------------------- * +* | FCTX | DATA | | * +* ---------------------------------------------------------------------------------- * +**************************************************************************************/ + +.file "ontop_x86_64_ms_pe_gas.asm" +.text +.p2align 4,,15 +.globl ontop_fcontext +.def ontop_fcontext; .scl 2; .type 32; .endef +.seh_proc ontop_fcontext +ontop_fcontext: +.seh_endprologue + + leaq -0x118(%rsp), %rsp /* prepare stack */ + +#if !defined(BOOST_USE_TSX) + /* save XMM storage */ + movaps %xmm6, 0x0(%rsp) + movaps %xmm7, 0x10(%rsp) + movaps %xmm8, 0x20(%rsp) + movaps %xmm9, 0x30(%rsp) + movaps %xmm10, 0x40(%rsp) + movaps %xmm11, 0x50(%rsp) + movaps %xmm12, 0x60(%rsp) + movaps %xmm13, 0x70(%rsp) + movaps %xmm14, 0x80(%rsp) + movaps %xmm15, 0x90(%rsp) + stmxcsr 0xa0(%rsp) /* save MMX control- and status-word */ + fnstcw 0xa4(%rsp) /* save x87 control-word */ +#endif + + /* load NT_TIB */ + movq %gs:(0x30), %r10 + /* save fiber local storage */ + movq 0x20(%r10), %rax + movq %rax, 0xb0(%rsp) + /* save current deallocation stack */ + movq 0x1478(%r10), %rax + movq %rax, 0xb8(%rsp) + /* save current stack limit */ + movq 0x10(%r10), %rax + movq %rax, 0xc0(%rsp) + /* save current stack base */ + movq 0x08(%r10), %rax + movq %rax, 0xc8(%rsp) + + movq %r12, 0xd0(%rsp) /* save R12 */ + movq %r13, 0xd8(%rsp) /* save R13 */ + movq %r14, 0xe0(%rsp) /* save R14 */ + movq %r15, 0xe8(%rsp) /* save R15 */ + movq %rdi, 0xf0(%rsp) /* save RDI */ + movq %rsi, 0xf8(%rsp) /* save RSI */ + movq %rbx, 0x100(%rsp) /* save RBX */ + movq %rbp, 0x108(%rsp) /* save RBP */ + + movq %rcx, 0x110(%rsp) /* save hidden address of transport_t */ + + /* preserve RSP (pointing to context-data) in RCX */ + movq %rsp, %rcx + + /* restore RSP (pointing to context-data) from RDX */ + movq %rdx, %rsp + +#if !defined(BOOST_USE_TSX) + /* restore XMM storage */ + movaps 0x0(%rsp), %xmm6 + movaps 0x10(%rsp), %xmm7 + movaps 0x20(%rsp), %xmm8 + movaps 0x30(%rsp), %xmm9 + movaps 0x40(%rsp), %xmm10 + movaps 0x50(%rsp), %xmm11 + movaps 0x60(%rsp), %xmm12 + movaps 0x70(%rsp), %xmm13 + movaps 0x80(%rsp), %xmm14 + movaps 0x90(%rsp), %xmm15 + ldmxcsr 0xa0(%rsp) /* restore MMX control- and status-word */ + fldcw 0xa4(%rsp) /* restore x87 control-word */ +#endif + + /* load NT_TIB */ + movq %gs:(0x30), %r10 + /* restore fiber local storage */ + movq 0xb0(%rsp), %rax + movq %rax, 0x20(%r10) + /* restore current deallocation stack */ + movq 0xb8(%rsp), %rax + movq %rax, 0x1478(%r10) + /* restore current stack limit */ + movq 0xc0(%rsp), %rax + movq %rax, 0x10(%r10) + /* restore current stack base */ + movq 0xc8(%rsp), %rax + movq %rax, 0x08(%r10) + + movq 0xd0(%rsp), %r12 /* restore R12 */ + movq 0xd8(%rsp), %r13 /* restore R13 */ + movq 0xe0(%rsp), %r14 /* restore R14 */ + movq 0xe8(%rsp), %r15 /* restore R15 */ + movq 0xf0(%rsp), %rdi /* restore RDI */ + movq 0xf8(%rsp), %rsi /* restore RSI */ + movq 0x100(%rsp), %rbx /* restore RBX */ + movq 0x108(%rsp), %rbp /* restore RBP */ + + movq 0x110(%rsp), %rax /* restore hidden address of transport_t */ + + leaq 0x118(%rsp), %rsp /* prepare stack */ + + /* keep return-address on stack */ + + /* transport_t returned in RAX */ + /* return parent fcontext_t */ + movq %rcx, 0x0(%rax) + /* return data */ + movq %r8, 0x8(%rax) + + /* transport_t as 1.arg of context-function */ + /* RCX contains address of returned (hidden) transfer_t */ + movq %rax, %rcx + /* RDX contains address of passed transfer_t */ + movq %rax, %rdx + + /* indirect jump to context */ + jmp *%r9 +.seh_endproc + +.section .drectve +.ascii " -export:\"ontop_fcontext\"" diff --git a/lib/context_switcher/asm/fcontext/ontop_x86_64_ms_pe_masm.asm b/lib/context_switcher/asm/fcontext/ontop_x86_64_ms_pe_masm.asm new file mode 100644 index 0000000..b57dd15 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/ontop_x86_64_ms_pe_masm.asm @@ -0,0 +1,207 @@ + +; Copyright Oliver Kowalke 2009. +; Distributed under the Boost Software License, Version 1.0. +; (See accompanying file LICENSE_1_0.txt or copy at +; http://www.boost.org/LICENSE_1_0.txt) + +; ---------------------------------------------------------------------------------- +; | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | +; ---------------------------------------------------------------------------------- +; | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | +; ---------------------------------------------------------------------------------- +; | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | +; ---------------------------------------------------------------------------------- +; | 0xe40 | 0x44 | 0x48 | 0x4c | 0x50 | 0x54 | 0x58 | 0x5c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | +; ---------------------------------------------------------------------------------- +; | 0x60 | 0x64 | 0x68 | 0x6c | 0x70 | 0x74 | 0x78 | 0x7c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 32 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | +; ---------------------------------------------------------------------------------- +; | 0x80 | 0x84 | 0x88 | 0x8c | 0x90 | 0x94 | 0x98 | 0x9c | +; ---------------------------------------------------------------------------------- +; | SEE registers (XMM6-XMM15) | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | +; ---------------------------------------------------------------------------------- +; | 0xa0 | 0xa4 | 0xa8 | 0xac | 0xb0 | 0xb4 | 0xb8 | 0xbc | +; ---------------------------------------------------------------------------------- +; | fc_mxcsr|fc_x87_cw| | fbr_strg | fc_dealloc | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | +; ---------------------------------------------------------------------------------- +; | 0xc0 | 0xc4 | 0xc8 | 0xcc | 0xd0 | 0xd4 | 0xd8 | 0xdc | +; ---------------------------------------------------------------------------------- +; | limit | base | R12 | R13 | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | +; ---------------------------------------------------------------------------------- +; | 0xe0 | 0xe4 | 0xe8 | 0xec | 0xf0 | 0xf4 | 0xf8 | 0xfc | +; ---------------------------------------------------------------------------------- +; | R14 | R15 | RDI | RSI | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | +; ---------------------------------------------------------------------------------- +; | 0x100 | 0x104 | 0x108 | 0x10c | 0x110 | 0x114 | 0x118 | 0x11c | +; ---------------------------------------------------------------------------------- +; | RBX | RBP | hidden | RIP | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | +; ---------------------------------------------------------------------------------- +; | 0x120 | 0x124 | 0x128 | 0x12c | 0x130 | 0x134 | 0x138 | 0x13c | +; ---------------------------------------------------------------------------------- +; | parameter area | +; ---------------------------------------------------------------------------------- +; ---------------------------------------------------------------------------------- +; | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | +; ---------------------------------------------------------------------------------- +; | 0x140 | 0x144 | 0x148 | 0x14c | 0x150 | 0x154 | 0x158 | 0x15c | +; ---------------------------------------------------------------------------------- +; | FCTX | DATA | | +; ---------------------------------------------------------------------------------- + +.code + +ontop_fcontext PROC BOOST_CONTEXT_EXPORT FRAME + .endprolog + + ; prepare stack + lea rsp, [rsp-0118h] + +IFNDEF BOOST_USE_TSX + ; save XMM storage + movaps [rsp], xmm6 + movaps [rsp+010h], xmm7 + movaps [rsp+020h], xmm8 + movaps [rsp+030h], xmm9 + movaps [rsp+040h], xmm10 + movaps [rsp+050h], xmm11 + movaps [rsp+060h], xmm12 + movaps [rsp+070h], xmm13 + movaps [rsp+080h], xmm14 + movaps [rsp+090h], xmm15 + ; save MMX control- and status-word + stmxcsr [rsp+0a0h] + ; save x87 control-word + fnstcw [rsp+0a4h] +ENDIF + + ; load NT_TIB + mov r10, gs:[030h] + ; save fiber local storage + mov rax, [r10+020h] + mov [rsp+0b0h], rax + ; save current deallocation stack + mov rax, [r10+01478h] + mov [rsp+0b8h], rax + ; save current stack limit + mov rax, [r10+010h] + mov [rsp+0c0h], rax + ; save current stack base + mov rax, [r10+08h] + mov [rsp+0c8h], rax + + mov [rsp+0d0h], r12 ; save R12 + mov [rsp+0d8h], r13 ; save R13 + mov [rsp+0e0h], r14 ; save R14 + mov [rsp+0e8h], r15 ; save R15 + mov [rsp+0f0h], rdi ; save RDI + mov [rsp+0f8h], rsi ; save RSI + mov [rsp+0100h], rbx ; save RBX + mov [rsp+0108h], rbp ; save RBP + + mov [rsp+0110h], rcx ; save hidden address of transport_t + + ; preserve RSP (pointing to context-data) in RCX + mov rcx, rsp + + ; restore RSP (pointing to context-data) from RDX + mov rsp, rdx + +IFNDEF BOOST_USE_TSX + ; restore XMM storage + movaps xmm6, [rsp] + movaps xmm7, [rsp+010h] + movaps xmm8, [rsp+020h] + movaps xmm9, [rsp+030h] + movaps xmm10, [rsp+040h] + movaps xmm11, [rsp+050h] + movaps xmm12, [rsp+060h] + movaps xmm13, [rsp+070h] + movaps xmm14, [rsp+080h] + movaps xmm15, [rsp+090h] + ; restore MMX control- and status-word + ldmxcsr [rsp+0a0h] + ; save x87 control-word + fldcw [rsp+0a4h] +ENDIF + + ; load NT_TIB + mov r10, gs:[030h] + ; restore fiber local storage + mov rax, [rsp+0b0h] + mov [r10+020h], rax + ; restore current deallocation stack + mov rax, [rsp+0b8h] + mov [r10+01478h], rax + ; restore current stack limit + mov rax, [rsp+0c0h] + mov [r10+010h], rax + ; restore current stack base + mov rax, [rsp+0c8h] + mov [r10+08h], rax + + mov r12, [rsp+0d0h] ; restore R12 + mov r13, [rsp+0d8h] ; restore R13 + mov r14, [rsp+0e0h] ; restore R14 + mov r15, [rsp+0e8h] ; restore R15 + mov rdi, [rsp+0f0h] ; restore RDI + mov rsi, [rsp+0f8h] ; restore RSI + mov rbx, [rsp+0100h] ; restore RBX + mov rbp, [rsp+0108h] ; restore RBP + + mov rax, [rsp+0110h] ; restore hidden address of transport_t + + ; prepare stack + lea rsp, [rsp+0118h] + + ; keep return-address on stack + + ; transport_t returned in RAX + ; return parent fcontext_t + mov [rax], rcx + ; return data + mov [rax+08h], r8 + + ; transport_t as 1.arg of context-function + ; RCX contains address of returned (hidden) transfer_t + mov rcx, rax + ; RDX contains address of passed transfer_t + mov rdx, rax + + ; indirect jump to context + jmp r9 +ontop_fcontext ENDP +END diff --git a/lib/context_switcher/asm/fcontext/ontop_x86_64_sysv_elf_gas.S b/lib/context_switcher/asm/fcontext/ontop_x86_64_sysv_elf_gas.S new file mode 100644 index 0000000..447a1ec --- /dev/null +++ b/lib/context_switcher/asm/fcontext/ontop_x86_64_sysv_elf_gas.S @@ -0,0 +1,84 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/**************************************************************************************** + * * + * ---------------------------------------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ---------------------------------------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * + * ---------------------------------------------------------------------------------- * + * | fc_mxcsr|fc_x87_cw| R12 | R13 | R14 | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ---------------------------------------------------------------------------------- * + * | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | * + * ---------------------------------------------------------------------------------- * + * | R15 | RBX | RBP | RIP | * + * ---------------------------------------------------------------------------------- * + * * + ****************************************************************************************/ + +.file "ontop_x86_64_sysv_elf_gas.S" +.text +.globl ontop_fcontext +.type ontop_fcontext,@function +.align 16 +ontop_fcontext: + /* preserve ontop-function in R8 */ + movq %rdx, %r8 + + leaq -0x38(%rsp), %rsp /* prepare stack */ + +#if !defined(BOOST_USE_TSX) + stmxcsr (%rsp) /* save MMX control- and status-word */ + fnstcw 0x4(%rsp) /* save x87 control-word */ +#endif + + movq %r12, 0x8(%rsp) /* save R12 */ + movq %r13, 0x10(%rsp) /* save R13 */ + movq %r14, 0x18(%rsp) /* save R14 */ + movq %r15, 0x20(%rsp) /* save R15 */ + movq %rbx, 0x28(%rsp) /* save RBX */ + movq %rbp, 0x30(%rsp) /* save RBP */ + + /* store RSP (pointing to context-data) in RAX */ + movq %rsp, %rax + + /* restore RSP (pointing to context-data) from RDI */ + movq %rdi, %rsp + +#if !defined(BOOST_USE_TSX) + ldmxcsr (%rsp) /* restore MMX control- and status-word */ + fldcw 0x4(%rsp) /* restore x87 control-word */ +#endif + + movq 0x8(%rsp), %r12 /* restore R12 */ + movq 0x10(%rsp), %r13 /* restore R13 */ + movq 0x18(%rsp), %r14 /* restore R14 */ + movq 0x20(%rsp), %r15 /* restore R15 */ + movq 0x28(%rsp), %rbx /* restore RBX */ + movq 0x30(%rsp), %rbp /* restore RBP */ + + leaq 0x38(%rsp), %rsp /* prepare stack */ + + /* return transfer_t from jump */ + /* RAX == fctx, RDX == data */ + movq %rsi, %rdx + /* pass transfer_t as first arg in context function */ + /* RDI == fctx, RSI == data */ + movq %rax, %rdi + + /* keep return-address on stack */ + + /* indirect jump to context */ + jmp *%r8 +.size ontop_fcontext,.-ontop_fcontext + +/* Mark that we don't need executable stack. */ +.section .note.GNU-stack,"",%progbits diff --git a/lib/context_switcher/asm/fcontext/ontop_x86_64_sysv_macho_gas.S b/lib/context_switcher/asm/fcontext/ontop_x86_64_sysv_macho_gas.S new file mode 100644 index 0000000..49755c6 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/ontop_x86_64_sysv_macho_gas.S @@ -0,0 +1,78 @@ +/* + Copyright Oliver Kowalke 2009. + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ + +/**************************************************************************************** + * * + * ---------------------------------------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ---------------------------------------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | * + * ---------------------------------------------------------------------------------- * + * | fc_mxcsr|fc_x87_cw| R12 | R13 | R14 | * + * ---------------------------------------------------------------------------------- * + * ---------------------------------------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ---------------------------------------------------------------------------------- * + * | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | * + * ---------------------------------------------------------------------------------- * + * | R15 | RBX | RBP | RIP | * + * ---------------------------------------------------------------------------------- * + * * + ****************************************************************************************/ + +.text +.globl _ontop_fcontext +.align 8 +_ontop_fcontext: + /* preserve ontop-function in R8 */ + movq %rdx, %r8 + + leaq -0x38(%rsp), %rsp /* prepare stack */ + +#if !defined(BOOST_USE_TSX) + stmxcsr (%rsp) /* save MMX control- and status-word */ + fnstcw 0x4(%rsp) /* save x87 control-word */ +#endif + + movq %r12, 0x8(%rsp) /* save R12 */ + movq %r13, 0x10(%rsp) /* save R13 */ + movq %r14, 0x18(%rsp) /* save R14 */ + movq %r15, 0x20(%rsp) /* save R15 */ + movq %rbx, 0x28(%rsp) /* save RBX */ + movq %rbp, 0x30(%rsp) /* save RBP */ + + /* store RSP (pointing to context-data) in RAX */ + movq %rsp, %rax + + /* restore RSP (pointing to context-data) from RDI */ + movq %rdi, %rsp + +#if !defined(BOOST_USE_TSX) + ldmxcsr (%rsp) /* restore MMX control- and status-word */ + fldcw 0x4(%rsp) /* restore x87 control-word */ +#endif + + movq 0x8(%rsp), %r12 /* restore R12 */ + movq 0x10(%rsp), %r13 /* restore R13 */ + movq 0x18(%rsp), %r14 /* restore R14 */ + movq 0x20(%rsp), %r15 /* restore R15 */ + movq 0x28(%rsp), %rbx /* restore RBX */ + movq 0x30(%rsp), %rbp /* restore RBP */ + + leaq 0x38(%rsp), %rsp /* prepare stack */ + + /* return transfer_t from jump */ + /* RAX == fctx, RDX == data */ + movq %rsi, %rdx + /* pass transfer_t as first arg in context function */ + /* RDI == fctx, RSI == data */ + movq %rax, %rdi + + /* keep return-address on stack */ + + /* indirect jump to context */ + jmp *%r8 diff --git a/lib/context_switcher/asm/fcontext/tail_ppc32_sysv_elf_gas.cpp b/lib/context_switcher/asm/fcontext/tail_ppc32_sysv_elf_gas.cpp new file mode 100644 index 0000000..3486084 --- /dev/null +++ b/lib/context_switcher/asm/fcontext/tail_ppc32_sysv_elf_gas.cpp @@ -0,0 +1,18 @@ + +// Copyright Oliver Kowalke 2009. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +#include + +using boost::context::detail::fcontext_t; +using boost::context::detail::transfer_t; + +// This C++ tail of ontop_fcontext() allocates transfer_t{ from, vp } +// on the stack. If fn() throws a C++ exception, then the C++ runtime +// must remove this tail's stack frame. +extern "C" transfer_t +ontop_fcontext_tail( int ignore, void * vp, transfer_t (* fn)(transfer_t), fcontext_t const from) { + return fn( transfer_t{ from, vp }); +} diff --git a/lib/context_switcher/context_switcher-config.cmake b/lib/context_switcher/context_switcher-config.cmake new file mode 100644 index 0000000..25d3a1f --- /dev/null +++ b/lib/context_switcher/context_switcher-config.cmake @@ -0,0 +1,2 @@ +get_filename_component(SELF_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) +include(${SELF_DIR}/plsTargets.cmake) diff --git a/lib/context_switcher/include/context_switcher/context.h b/lib/context_switcher/include/context_switcher/context.h new file mode 100644 index 0000000..5f637af --- /dev/null +++ b/lib/context_switcher/include/context_switcher/context.h @@ -0,0 +1,29 @@ + +#ifndef CONTEXT_SWITCHER_CONTEXT_H_ +#define CONTEXT_SWITCHER_CONTEXT_H_ + +#ifdef CS_USE_CSCONTEXT +#include "cscontext.h" +#else +#include "fcontext.h" +#endif + +/** + * Picks the correct context switch implementation + * selected by the build system/configuration. + */ +namespace context_switcher { + +#ifdef CS_USE_CSCONTEXT +using continuation_t = cscontext::continuation_t; +using stack_pointer_t = cscontext::stack_pointer_t; +using callback_t = cscontext::callback_t; +#else +using continuation_t = fcontext::continuation_t; +using stack_pointer_t = fcontext::stack_pointer_t; +using callback_t = fcontext::callback_t; +#endif + +} + +#endif //CONTEXT_SWITCHER_CONTEXT_H_ diff --git a/lib/context_switcher/include/context_switcher/context_switcher.h b/lib/context_switcher/include/context_switcher/context_switcher.h new file mode 100644 index 0000000..73465d6 --- /dev/null +++ b/lib/context_switcher/include/context_switcher/context_switcher.h @@ -0,0 +1,218 @@ +#ifndef CONTEXT_SWITCHER_CONTEXT_SWITCHER_H_ +#define CONTEXT_SWITCHER_CONTEXT_SWITCHER_H_ + +#include +#include +#include +#include + +#include "context.h" +#include "continuation.h" + +#ifdef THREAD_SANITIZER +#include +#endif + +/** + * Exposes main interface to start a stackful coroutine and to + * switch between different context. + * + * Both cscontext and fcontext implementations require no global state + * (everything is passed over in the context transfer routines). + * + * Thread sanitizer support uses global state to cache coroutine handles, + * as recycling them frequently causes problems. To do this coroutines + * started with the same stack frame are 'recycled'. For the end user this + * means that the stack memory used by the coroutines MUST NOT be used by anything + * else between usages (you can pool your stack memory as long as you always use + * the same stacks to create coroutines). + */ +namespace context_switcher { +// cscontext implementation. +// Creates coroutines on the fly while entering them, making it very fast for +// extremely short lived routines (e.g. to implement a cactus stack like construct). +#ifdef CS_USE_CSCONTEXT +continuation switch_context(continuation &&cont); + +template +struct lambda_capture { + template + explicit lambda_capture(FARG &&lambda) : lambda_{std::forward(lambda)} {} + + continuation operator()(continuation_t cont) { + return lambda_(continuation{cont}); + } + + private: + F lambda_; +}; + +template +static lambda_capture::type> *place_lambda_capture(F &&lambda, char *memory) { + return new(memory) lambda_capture::type>(std::forward(lambda)); +} + +template +cscontext::continuation_t lambda_capture_callback(cscontext::continuation_t continuation_pointer, + void *lambda_capture_param) { + // Perform Call + T *lambda_capture = reinterpret_cast(lambda_capture_param); + continuation cont = (*lambda_capture)(continuation_pointer); + + // Free resources and switch to result_continuation (this execution thread is finished with the return) + lambda_capture->~T(); + return cont.consume(); +} + +template +continuation enter_context(stack_pointer_t stack_memory, size_t stack_size, F &&lambda) { + stack_pointer_t lambda_memory = stack_memory + stack_size - sizeof(lambda_capture); + auto *captured_lambda = place_lambda_capture(std::forward(lambda), lambda_memory); + + stack_pointer_t stack_base = lambda_memory - 64; + stack_pointer_t stack_limit = stack_memory; + + cscontext::callback_t callback = lambda_capture_callback>; + + void *result = cscontext::__cs_enter_context(stack_base, captured_lambda, callback, stack_limit); + return continuation{result}; +} +#endif + +// fcontext implementation (no thread sanitizer). +// Creates a new coroutine, then jumps control to it. +// This creation followed by a jump is slightly slower than the cscontext direct +// creation and jump, but the boost.context assembly is available for more plaforms. +#if defined(CS_USE_FCONTEXT) and not defined(THREAD_SANITIZER) +continuation switch_context(continuation &&cont); + +template +struct lambda_capture { + template + explicit lambda_capture(FARG &&lambda) : lambda_{std::forward(lambda)} {} + + continuation operator()(continuation_t cont) { + return lambda_(continuation{cont}); + } + + private: + F lambda_; +}; + +template +void lambda_capture_callback(fcontext::transfer_t transfer) { + // Perform Call + T *lambda_capture = reinterpret_cast(transfer.data); + continuation cont = (*lambda_capture)(transfer.continuation); + + // Free resources and switch to result_continuation (this execution thread is finished with the return) + lambda_capture->~T(); + + continuation_t cont_pointer = cont.consume(); + if (cont_pointer == nullptr) { + printf("Error!!!\n"); + } + fcontext::jump_fcontext(cont_pointer, (void *) 0); +} + +template +static lambda_capture::type> *place_lambda_capture(F &&lambda, char *memory) { + return new(memory) lambda_capture::type>(std::forward(lambda)); +} + +template +continuation enter_context(stack_pointer_t stack_memory, size_t stack_size, F &&lambda) { + stack_pointer_t lambda_memory = stack_memory + stack_size - sizeof(lambda_capture); + auto *captured_lambda = place_lambda_capture(std::forward(lambda), lambda_memory); + + stack_pointer_t stack_base = lambda_memory - 64; + stack_pointer_t stack_limit = stack_memory; + + fcontext::callback_t callback = lambda_capture_callback>; + + fcontext::continuation_t new_context = fcontext::make_fcontext(stack_base, stack_base - stack_limit, callback); + fcontext::transfer_t transfer = fcontext::jump_fcontext(new_context, captured_lambda); + if (transfer.data) { + return continuation{transfer.continuation}; + } else { + return continuation{nullptr}; + } +} +#endif + +// thread sanitizer implementation. +// Thread sanitizer, while having fiber support since mid 2019, has trouble +// with the creation/destruction of a high amount of short lived coroutines. +// To circumvent this we create one coroutine per stack base address and cache them in global variables. +// Please use with care and only for testing purposes. +#if defined(THREAD_SANITIZER) +continuation switch_context(continuation &&cont); + +template +struct lambda_capture { + template + explicit lambda_capture(FARG &&lambda) : lambda_{std::forward(lambda)} {} + + continuation operator()(continuation &&cont) { + return lambda_(std::move(cont)); + } + + private: + F lambda_; +}; + +template +void lambda_capture_callback(fcontext::transfer_t transfer) { + fcontext::fiber_data received_fiber_data = *(fcontext::fiber_data *) transfer.data; + + // Perform Call + T *lambda_capture = reinterpret_cast(received_fiber_data.data); + continuation cont = (*lambda_capture)({transfer.continuation, received_fiber_data.old_fiber}); + + // Free resources and switch to result_continuation (this execution thread is finished with the return) + lambda_capture->~T(); + + continuation_t cont_pointer = cont.consume(); + void *last_fiber = __tsan_get_current_fiber(); + fcontext::fiber_data send_fiber_data{last_fiber, nullptr, true}; + + __tsan_switch_to_fiber(cont.get_tsan_fiber(), 0); + fcontext::jump_fcontext(cont_pointer, &send_fiber_data); +} + +template +static lambda_capture::type> *place_lambda_capture(F &&lambda, char *memory) { + return new(memory) lambda_capture::type>(std::forward(lambda)); +} + +template +continuation enter_context(stack_pointer_t stack_memory, size_t stack_size, F &&lambda) { + stack_pointer_t lambda_memory = stack_memory + stack_size - sizeof(lambda_capture); + auto *captured_lambda = place_lambda_capture(std::forward(lambda), lambda_memory); + + stack_pointer_t stack_base = lambda_memory - 64; + stack_pointer_t stack_limit = stack_memory; + + fcontext::callback_t callback = lambda_capture_callback>; + fcontext::continuation_t new_context = fcontext::make_fcontext(stack_base, stack_base - stack_limit, callback); + + void *next_fiber = __tsan_create_fiber(0); + void *last_fiber = __tsan_get_current_fiber(); + fcontext::fiber_data send_fiber_data{last_fiber, captured_lambda, false}; + + __tsan_switch_to_fiber(next_fiber, 0); + fcontext::transfer_t transfer = fcontext::jump_fcontext(new_context, &send_fiber_data); + fcontext::fiber_data received_fiber_data = *(fcontext::fiber_data *) transfer.data; + + if (received_fiber_data.finished) { + __tsan_destroy_fiber(received_fiber_data.old_fiber); + return continuation{nullptr, nullptr}; + } else { + return continuation{transfer.continuation, received_fiber_data.old_fiber}; + } +} +#endif + +} + +#endif //CONTEXT_SWITCHER_CONTEXT_SWITCHER_H_ diff --git a/lib/context_switcher/include/context_switcher/continuation.h b/lib/context_switcher/include/context_switcher/continuation.h new file mode 100644 index 0000000..3c196c0 --- /dev/null +++ b/lib/context_switcher/include/context_switcher/continuation.h @@ -0,0 +1,73 @@ + +#ifndef CONTEXT_SWITCHER_CONTINUATION_H_ +#define CONTEXT_SWITCHER_CONTINUATION_H_ + +#include "context.h" + +#include +#include + +namespace context_switcher { + +/** + * One-Shot continuation. Represents the paused state of an execution thread. + * Can be used exactly once to jump back to that state. + * + * Move only to ensure semantics of one time use. + */ +struct continuation { + public: + continuation() = default; +#ifdef THREAD_SANITIZER + continuation(continuation_t cont_pointer, void *tsan_fiber) : cont_pointer_{cont_pointer}, tsan_fiber_{tsan_fiber} {}; +#else + explicit continuation(continuation_t cont_pointer) : cont_pointer_{cont_pointer} {}; +#endif + + // Move-Only Semantics + continuation(const continuation &other) = delete; + continuation(continuation &&other) noexcept { + cont_pointer_ = other.cont_pointer_; + other.cont_pointer_ = nullptr; +#ifdef THREAD_SANITIZER + tsan_fiber_ = other.tsan_fiber_; +#endif + } + + continuation &operator=(const continuation &other) = delete; + continuation &operator=(continuation &&other) noexcept { + cont_pointer_ = other.cont_pointer_; + other.cont_pointer_ = nullptr; +#ifdef THREAD_SANITIZER + tsan_fiber_ = other.tsan_fiber_; +#endif + return *this; + } + + // Semantics as 'one time use' + bool valid() const { + return cont_pointer_ != nullptr; + } + + continuation_t consume() { + auto tmp = cont_pointer_; + cont_pointer_ = nullptr; + return tmp; + } + +#ifdef THREAD_SANITIZER + void *get_tsan_fiber() { + return tsan_fiber_; + } +#endif + + private: + continuation_t cont_pointer_{nullptr}; +#ifdef THREAD_SANITIZER + void *tsan_fiber_{nullptr}; +#endif +}; + +} + +#endif //CONTEXT_SWITCHER_CONTINUATION_H_ diff --git a/lib/context_switcher/include/context_switcher/cscontext.h b/lib/context_switcher/include/context_switcher/cscontext.h new file mode 100644 index 0000000..120b05c --- /dev/null +++ b/lib/context_switcher/include/context_switcher/cscontext.h @@ -0,0 +1,37 @@ + +#ifndef CONTEXT_SWITCHER_ASSEMBLY_BINDINGS_H_ +#define CONTEXT_SWITCHER_ASSEMBLY_BINDINGS_H_ + +/** + * Low level bindings to the two assembly functions used to switch context. + * Can be used standalone to build new control structures, but should be handled with care. + * This is the API to the cscontext version (favoring fast creating of coroutines/short running contexts). + * + * Basic usage: + * // Switch control to a new stack. + * continuation_t cont = __cs_enter_context(my_stack, arg_pointer, callback_func, my_stack_limit); + * // Will only be run when 'jumped back' by either returning the callback or an explicit jump. + * // cont will then hold the context of the jump origin (or null if it was a return form a finished callback). + * cont = __switch_context(cont); + * // Re-Enter the context that jumped to us. + */ + +namespace context_switcher { +namespace cscontext { + +using continuation_t = void *; +using stack_pointer_t = char *; +using callback_t = continuation_t (*)(continuation_t, void *); + +extern "C" { +continuation_t __cs_enter_context(stack_pointer_t stack_base, + void *callback_arg, + callback_t callback, + stack_pointer_t stack_limit); +continuation_t __cs_switch_context(continuation_t continuation); +} + +} +} + +#endif //CONTEXT_SWITCHER_ASSEMBLY_BINDINGS_H_ diff --git a/lib/context_switcher/include/context_switcher/fcontext.h b/lib/context_switcher/include/context_switcher/fcontext.h new file mode 100644 index 0000000..56f4772 --- /dev/null +++ b/lib/context_switcher/include/context_switcher/fcontext.h @@ -0,0 +1,44 @@ +#ifndef CONTEXT_SWITCHER_FCONTEXT_H_ +#define CONTEXT_SWITCHER_FCONTEXT_H_ + +#include +#include + +/** + * Low level bindings to the two assembly functions used to switch context. + * Can be used standalone to build new control structures, but should be handled with care. + * This is the API to the fcontext (boost.context assembly). + */ + +namespace context_switcher { +namespace fcontext { + +using continuation_t = void *; +using stack_pointer_t = char *; + +struct transfer_t { + continuation_t continuation; + void *data; +}; + +#ifdef THREAD_SANITIZER +struct fiber_data { + void *old_fiber; + void *data; + bool finished; +}; +#endif + +using callback_t = void (*)(transfer_t); + +extern "C" { +transfer_t jump_fcontext(continuation_t cont, void *data); +continuation_t make_fcontext(stack_pointer_t sp, size_t size, callback_t corofn); + +transfer_t ontop_fcontext(continuation_t cont, void *data, transfer_t(*fn)(transfer_t)); +} + +} +} + +#endif //CONTEXT_SWITCHER_FCONTEXT_H_ diff --git a/lib/context_switcher/src/context_switcher.cpp b/lib/context_switcher/src/context_switcher.cpp new file mode 100644 index 0000000..1fc0c60 --- /dev/null +++ b/lib/context_switcher/src/context_switcher.cpp @@ -0,0 +1,48 @@ +#include "context_switcher/context_switcher.h" + +#ifdef THREAD_SANITIZER +#include +#endif + +namespace context_switcher { + +#ifdef CS_USE_CSCONTEXT +continuation switch_context(continuation &&cont) { + continuation_t cont_pointer = cont.consume(); + continuation_t result = cscontext::__cs_switch_context(cont_pointer); + return continuation{result}; +} +#endif +#if defined(CS_USE_FCONTEXT) and not defined(THREAD_SANITIZER) +continuation switch_context(continuation &&cont) { + continuation_t cont_pointer = cont.consume(); + fcontext::transfer_t transfer = fcontext::jump_fcontext(cont_pointer, (void *) 1); + + if (transfer.data) { + return continuation{transfer.continuation}; + } else { + return continuation{nullptr}; + } +} +#endif +#ifdef THREAD_SANITIZER +continuation switch_context(continuation &&cont) { + continuation_t cont_pointer = cont.consume(); + void *next_fiber = cont.get_tsan_fiber(); + void *last_fiber = __tsan_get_current_fiber(); + fcontext::fiber_data send_fiber_data{last_fiber, nullptr, false}; + + __tsan_switch_to_fiber(next_fiber, 0); + fcontext::transfer_t transfer = fcontext::jump_fcontext(cont_pointer, &send_fiber_data); + fcontext::fiber_data received_fiber_data = *(fcontext::fiber_data *) transfer.data; + + if (received_fiber_data.finished) { + __tsan_destroy_fiber(received_fiber_data.old_fiber); + return continuation{nullptr, nullptr}; + } else { + return continuation{transfer.continuation, received_fiber_data.old_fiber}; + } +} +#endif + +} diff --git a/lib/pls/CMakeLists.txt b/lib/pls/CMakeLists.txt index f199d07..3311e62 100644 --- a/lib/pls/CMakeLists.txt +++ b/lib/pls/CMakeLists.txt @@ -1,62 +1,49 @@ # List all required files here (cmake best practice to NOT automate this step!) add_library(pls STATIC - include/pls/pls.h src/pls.cpp - - include/pls/algorithms/invoke.h - include/pls/algorithms/invoke_impl.h - include/pls/algorithms/for_each.h - include/pls/algorithms/for_each_impl.h - include/pls/algorithms/scan.h - include/pls/algorithms/scan_impl.h - - include/pls/dataflow/dataflow.h - include/pls/dataflow/internal/inputs.h - include/pls/dataflow/internal/outputs.h - include/pls/dataflow/internal/token.h - include/pls/dataflow/internal/in_port.h - include/pls/dataflow/internal/out_port.h - include/pls/dataflow/internal/function_node.h - include/pls/dataflow/internal/node.h - include/pls/dataflow/internal/graph.h - include/pls/dataflow/internal/build_state.h - include/pls/dataflow/internal/function_node_impl.h - include/pls/dataflow/internal/graph_impl.h - include/pls/dataflow/internal/switch_node.h - include/pls/dataflow/internal/merge_node.h + include/pls/algorithms/for_each.h include/pls/algorithms/for_each_impl.h + include/pls/algorithms/invoke.h include/pls/algorithms/invoke_impl.h + include/pls/algorithms/loop_partition_strategy.h + include/pls/algorithms/reduce.h include/pls/algorithms/reduce_impl.h include/pls/internal/base/spin_lock.h include/pls/internal/base/tas_spin_lock.h src/internal/base/tas_spin_lock.cpp include/pls/internal/base/ttas_spin_lock.h src/internal/base/ttas_spin_lock.cpp include/pls/internal/base/swmr_spin_lock.h src/internal/base/swmr_spin_lock.cpp - include/pls/internal/base/thread.h src/internal/base/thread.cpp - include/pls/internal/base/thread_impl.h include/pls/internal/base/barrier.h src/internal/base/barrier.cpp - include/pls/internal/base/system_details.h - include/pls/internal/base/error_handling.h + include/pls/internal/base/system_details.h src/internal/base/system_details.cpp + include/pls/internal/base/error_handling.h src/internal/base/error_handling.cpp include/pls/internal/base/alignment.h src/internal/base/alignment.cpp + include/pls/internal/base/stack_allocator.h src/internal/base/stack_allocator.cpp - include/pls/internal/scheduling/data_structures/aligned_stack.h src/internal/scheduling/data_structures/aligned_stack.cpp - include/pls/internal/scheduling/data_structures/aligned_stack_impl.h - include/pls/internal/scheduling/data_structures/deque.h - include/pls/internal/scheduling/data_structures/locking_deque.h - include/pls/internal/scheduling/data_structures/locking_deque_impl.h - include/pls/internal/scheduling/data_structures/work_stealing_deque.h include/pls/internal/scheduling/data_structures/work_stealing_deque_impl.h - include/pls/internal/scheduling/data_structures/stamped_integer.h + include/pls/internal/data_structures/aligned_stack.h src/internal/data_structures/aligned_stack.cpp + include/pls/internal/data_structures/aligned_stack_impl.h + include/pls/internal/data_structures/stamped_integer.h + include/pls/internal/data_structures/delayed_initialization.h + include/pls/internal/data_structures/bounded_trading_deque.h + include/pls/internal/data_structures/bounded_ws_deque.h + include/pls/internal/data_structures/optional.h include/pls/internal/helpers/prohibit_new.h include/pls/internal/helpers/profiler.h - include/pls/internal/helpers/mini_benchmark.h include/pls/internal/helpers/unique_id.h include/pls/internal/helpers/range.h include/pls/internal/helpers/seqence.h + include/pls/internal/helpers/member_function.h - include/pls/internal/scheduling/thread_state.h - include/pls/internal/scheduling/scheduler.h src/internal/scheduling/scheduler.cpp - include/pls/internal/scheduling/scheduler_impl.h + include/pls/internal/scheduling/thread_state.h src/internal/scheduling/thread_state.cpp + include/pls/internal/scheduling/scheduler.h include/pls/internal/scheduling/scheduler_impl.h src/internal/scheduling/scheduler.cpp + include/pls/internal/scheduling/task_manager.h include/pls/internal/scheduling/task_manager_impl.h src/internal/scheduling/task_manager.cpp include/pls/internal/scheduling/task.h src/internal/scheduling/task.cpp - include/pls/internal/scheduling/scheduler_memory.h src/internal/scheduling/scheduler_memory.cpp - include/pls/internal/scheduling/lambda_task.h - include/pls/dataflow/internal/split_node.h include/pls/internal/helpers/member_function.h) + include/pls/internal/scheduling/external_trading_deque.h src/internal/scheduling/external_trading_deque.cpp + include/pls/internal/scheduling/traded_cas_field.h include/pls/algorithms/loop_partition_strategy.h) + +# Dependencies for pls +target_link_libraries(pls Threads::Threads) +target_link_libraries(pls context_switcher) +if (EASY_PROFILER) + target_link_libraries(pls easy_profiler) +endif () + # Add everything in `./include` to be in the include path of this project target_include_directories(pls PUBLIC @@ -66,22 +53,19 @@ target_include_directories(pls ${CMAKE_CURRENT_SOURCE_DIR}/src # TODO: Set this up when we require private headers ) -# Add cmake dependencies here if needed -target_link_libraries(pls - Threads::Threads # pthread support - ) -if (EASY_PROFILER) - target_link_libraries(pls easy_profiler) -endif () +# Enable warnings/tidy code checking from our compiler +target_compile_options(pls PRIVATE + $<$,$,$>: + -Wall> + $<$: + -W4>) -# Rules for istalling the library on a system +# Rules for installing the library on a system # ...binaries INSTALL(TARGETS pls EXPORT pls-targets - LIBRARY - DESTINATION lib/pls - ARCHIVE - DESTINATION lib/pls + LIBRARY DESTINATION lib/pls + ARCHIVE DESTINATION lib/pls ) # ...all headers in `include` INSTALL( @@ -100,17 +84,10 @@ INSTALl( FILES pls-config.cmake DESTINATION lib/pls ) -# ...add a custom target that will only build the library when istalling. +# ...add a custom target that will only build the library when installing. # This can allow us to speed up the installation on embedded devices. ADD_CUSTOM_TARGET(install.pls ${CMAKE_COMMAND} -DBUILD_TYPE=${CMAKE_BUILD_TYPE} -P ${CMAKE_BINARY_DIR}/cmake_install.cmake) -ADD_DEPENDENCIES(install.pls pls) - -# Enable warnings/tidy code checking from our compiler -target_compile_options(pls PRIVATE - $<$,$,$>: - -Wall> - $<$: - -W4>) +ADD_DEPENDENCIES(install.pls context_switcher pls) diff --git a/lib/pls/include/pls/algorithms/for_each.h b/lib/pls/include/pls/algorithms/for_each.h index 0cc11b1..7430ea5 100644 --- a/lib/pls/include/pls/algorithms/for_each.h +++ b/lib/pls/include/pls/algorithms/for_each.h @@ -2,28 +2,31 @@ #ifndef PLS_PARALLEL_FOR_H #define PLS_PARALLEL_FOR_H -namespace pls { -namespace algorithm { +#include "loop_partition_strategy.h" -class fixed_strategy; -class dynamic_strategy; +namespace pls::algorithm { template -void for_each_range(unsigned long first, - unsigned long last, - const Function &function, - ExecutionStrategy &execution_strategy); +static void for_each_range(unsigned long first, + unsigned long last, + const Function &function, + ExecutionStrategy &execution_strategy); template -void for_each_range(unsigned long first, unsigned long last, const Function &function); +static void for_each_range(unsigned long first, + unsigned long last, + const Function &function); template -void for_each(RandomIt first, RandomIt last, const Function &function, ExecutionStrategy execution_strategy); +static void for_each(RandomIt first, + RandomIt last, + const Function &function, + ExecutionStrategy execution_strategy); template -void for_each(RandomIt first, RandomIt last, const Function &function); - -} +static void for_each(RandomIt first, + RandomIt last, + const Function &function); } #include "for_each_impl.h" diff --git a/lib/pls/include/pls/algorithms/for_each_impl.h b/lib/pls/include/pls/algorithms/for_each_impl.h index 058ae9f..9d2bb8a 100644 --- a/lib/pls/include/pls/algorithms/for_each_impl.h +++ b/lib/pls/include/pls/algorithms/for_each_impl.h @@ -2,19 +2,18 @@ #ifndef PLS_PARALLEL_FOR_IMPL_H #define PLS_PARALLEL_FOR_IMPL_H -#include "pls/internal/scheduling/task.h" #include "pls/internal/scheduling/scheduler.h" -#include "pls/internal/scheduling/thread_state.h" - -#include "pls/internal/helpers/unique_id.h" #include "pls/internal/helpers/range.h" -namespace pls { -namespace algorithm { +namespace pls::algorithm { + namespace internal { template -void for_each(const RandomIt first, const RandomIt last, const Function function, const long min_elements) { +static void for_each(const RandomIt first, + const RandomIt last, + const Function &function, + const long min_elements) { using namespace ::pls::internal::scheduling; const long num_elements = std::distance(first, last); @@ -27,80 +26,58 @@ void for_each(const RandomIt first, const RandomIt last, const Function function // Cut in half recursively const long middle_index = num_elements / 2; - auto second_half_body = - [first, middle_index, last, &function, min_elements] { - internal::for_each(first + middle_index, - last, - function, - min_elements); - }; - using second_half_t = lambda_task_by_reference; - scheduler::spawn_child(std::move(second_half_body)); - - auto first_half_body = - [first, middle_index, last, &function, min_elements] { - internal::for_each(first, - first + middle_index, - function, - min_elements); - }; - using first_half_t = lambda_task_by_reference; - scheduler::spawn_child_and_wait(std::move(first_half_body)); + scheduler::spawn([first, middle_index, last, &function, min_elements] { + internal::for_each(first, + first + middle_index, + function, + min_elements); + }); + scheduler::spawn([first, middle_index, last, &function, min_elements] { + internal::for_each(first + middle_index, + last, + function, + min_elements); + }); + scheduler::sync(); } } } -class dynamic_strategy { - public: - explicit dynamic_strategy(const unsigned int tasks_per_thread = 4) : tasks_per_thread_{tasks_per_thread} {}; - - long calculate_min_elements(long num_elements) const { - const long num_threads = pls::internal::scheduling::thread_state::get()->scheduler_->num_threads(); - return num_elements / (num_threads * tasks_per_thread_); - } - private: - unsigned const int tasks_per_thread_; -}; - -class fixed_strategy { - public: - explicit fixed_strategy(const long min_elements_per_task) : min_elements_per_task_{min_elements_per_task} {}; - - long calculate_min_elements(long /*num_elements*/) const { - return min_elements_per_task_; - } - private: - const long min_elements_per_task_; -}; - template -void for_each(RandomIt first, RandomIt last, const Function &function, ExecutionStrategy execution_strategy) { +static void for_each(RandomIt + first, + RandomIt last, + const Function &function, + ExecutionStrategy + execution_strategy) { long num_elements = std::distance(first, last); - internal::for_each(first, last, function, execution_strategy.calculate_min_elements(num_elements)); + return + internal::for_each(first, last, function, execution_strategy.calculate_min_elements(num_elements)); } template -void for_each(RandomIt first, RandomIt last, const Function &function) { - for_each(first, last, function, dynamic_strategy{4}); +static void for_each(RandomIt first, RandomIt last, const Function &function) { + return for_each(first, last, function, dynamic_strategy{4}); } template -void for_each_range(unsigned long first, - unsigned long last, - const Function &function, - ExecutionStrategy execution_strategy) { +static void for_each_range(unsigned long first, + unsigned long last, + const Function &function, + ExecutionStrategy execution_strategy) { auto range = pls::internal::helpers::range(first, last); - for_each(range.begin(), range.end(), function, execution_strategy); + return for_each(range.begin(), range.end(), function, execution_strategy); } template -void for_each_range(unsigned long first, unsigned long last, const Function &function) { +static void for_each_range(unsigned long first, + unsigned long last, + const Function &function) { auto range = pls::internal::helpers::range(first, last); - for_each(range.begin(), range.end(), function); + return for_each(range.begin(), range.end(), function); } } -} #endif //PLS_INVOKE_PARALLEL_IMPL_H diff --git a/lib/pls/include/pls/algorithms/invoke.h b/lib/pls/include/pls/algorithms/invoke.h index 3197a97..a2829e7 100644 --- a/lib/pls/include/pls/algorithms/invoke.h +++ b/lib/pls/include/pls/algorithms/invoke.h @@ -2,22 +2,19 @@ #ifndef PLS_PARALLEL_INVOKE_H #define PLS_PARALLEL_INVOKE_H -#include "pls/internal/scheduling/task.h" #include "pls/internal/scheduling/scheduler.h" -namespace pls { -namespace algorithm { +namespace pls::algorithm { template -void invoke(const Function1 &function1, const Function2 &function2); +void invoke(const Function1 &&function1, const Function2 &&function2); template -void invoke(const Function1 &function1, const Function2 &function2, const Function3 &function3); +void invoke(const Function1 &&function1, const Function2 &&function2, const Function3 &&function3); // ...and so on, add more if we decide to keep this design } -} #include "invoke_impl.h" #endif //PLS_PARALLEL_INVOKE_H diff --git a/lib/pls/include/pls/algorithms/invoke_impl.h b/lib/pls/include/pls/algorithms/invoke_impl.h index fe64cd7..83c4126 100644 --- a/lib/pls/include/pls/algorithms/invoke_impl.h +++ b/lib/pls/include/pls/algorithms/invoke_impl.h @@ -2,39 +2,29 @@ #ifndef PLS_INVOKE_PARALLEL_IMPL_H #define PLS_INVOKE_PARALLEL_IMPL_H -#include "pls/internal/scheduling/task.h" -#include "pls/internal/scheduling/lambda_task.h" #include "pls/internal/scheduling/scheduler.h" -#include "pls/internal/scheduling/thread_state.h" -namespace pls { -namespace algorithm { +namespace pls::algorithm { template void invoke(Function1 &&function1, Function2 &&function2) { using namespace ::pls::internal::scheduling; - using task_1_t = lambda_task_by_value; - using task_2_t = lambda_task_by_value; - - scheduler::spawn_child(std::forward(function2)); - scheduler::spawn_child_and_wait(std::forward(function1)); + scheduler::spawn(std::forward(function1)); + scheduler::spawn(std::forward(function2)); + scheduler::sync(); } template void invoke(Function1 &&function1, Function2 &&function2, Function3 &&function3) { using namespace ::pls::internal::scheduling; - using task_1_t = lambda_task_by_value; - using task_2_t = lambda_task_by_value; - using task_3_t = lambda_task_by_value; - - scheduler::spawn_child(std::forward(function3)); - scheduler::spawn_child(std::forward(function2)); - scheduler::spawn_child_and_wait(std::forward(function1)); + scheduler::spawn(std::forward(function1)); + scheduler::spawn(std::forward(function2)); + scheduler::spawn(std::forward(function3)); + scheduler::sync(); } } -} #endif //PLS_INVOKE_PARALLEL_IMPL_H diff --git a/lib/pls/include/pls/algorithms/loop_partition_strategy.h b/lib/pls/include/pls/algorithms/loop_partition_strategy.h new file mode 100644 index 0000000..315a5cd --- /dev/null +++ b/lib/pls/include/pls/algorithms/loop_partition_strategy.h @@ -0,0 +1,33 @@ + +#ifndef PLS_ALGO_LOOP_PARTITION_STRATEGY_H_ +#define PLS_ALGO_LOOP_PARTITION_STRATEGY_H_ + +#include "pls/internal/scheduling/scheduler.h" +#include "pls/internal/scheduling/thread_state.h" + +namespace pls::algorithm { +class dynamic_strategy { + public: + explicit dynamic_strategy(const unsigned int tasks_per_thread = 4) : tasks_per_thread_{tasks_per_thread} {}; + + [[nodiscard]] long calculate_min_elements(long num_elements) const { + const long num_threads = pls::internal::scheduling::thread_state::get().get_scheduler().num_threads(); + return num_elements / (num_threads * tasks_per_thread_); + } + private: + unsigned const int tasks_per_thread_; +}; + +class fixed_strategy { + public: + explicit fixed_strategy(const long min_elements_per_task) : min_elements_per_task_{min_elements_per_task} {}; + + [[nodiscard]] long calculate_min_elements(long /*num_elements*/) const { + return min_elements_per_task_; + } + private: + const long min_elements_per_task_; +}; +} + +#endif //PLS_ALGO_LOOP_PARTITION_STRATEGY_H_ diff --git a/lib/pls/include/pls/algorithms/reduce.h b/lib/pls/include/pls/algorithms/reduce.h new file mode 100644 index 0000000..8dd18ee --- /dev/null +++ b/lib/pls/include/pls/algorithms/reduce.h @@ -0,0 +1,24 @@ + +#ifndef PLS_PARALLEL_REDUCE_H +#define PLS_PARALLEL_REDUCE_H + +#include "loop_partition_strategy.h" + +namespace pls::algorithm { + +template +static auto reduce(RandomIt first, + RandomIt last, + decltype(*first) neutral, + const Function &reducer, + ExecutionStrategy execution_strategy); + +template +static auto reduce(RandomIt first, + RandomIt last, + decltype(*first) neutral, + const Function &reducer); +} +#include "reduce_impl.h" + +#endif //PLS_PARALLEL_REDUCE_H diff --git a/lib/pls/include/pls/algorithms/reduce_impl.h b/lib/pls/include/pls/algorithms/reduce_impl.h new file mode 100644 index 0000000..5022c9c --- /dev/null +++ b/lib/pls/include/pls/algorithms/reduce_impl.h @@ -0,0 +1,79 @@ + +#ifndef PLS_PARALLEL_REDUCE_IMPL_H +#define PLS_PARALLEL_REDUCE_IMPL_H + +#include "pls/internal/scheduling/scheduler.h" +#include "pls/algorithms/loop_partition_strategy.h" + +namespace pls::algorithm { + +namespace internal { + +template +static Element reduce(const RandomIt first, + const RandomIt last, + Element neutral, + const Function &reducer, + const long min_elements) { + using namespace ::pls::internal::scheduling; + + const long num_elements = std::distance(first, last); + if (num_elements <= min_elements) { + // calculate last elements in loop to avoid overhead + Element acc = neutral; + for (auto current = first; current != last; current++) { + acc = reducer(acc, *current); + } + return acc; + } else { + // Cut in half recursively + const long middle_index = num_elements / 2; + + Element left, right; + scheduler::spawn([first, middle_index, last, neutral, &reducer, min_elements, &left] { + left = internal::reduce(first, + first + middle_index, + neutral, + reducer, + min_elements); + }); + scheduler::spawn([first, middle_index, last, neutral, &reducer, min_elements, &right] { + right = internal::reduce(first + middle_index, + last, + neutral, + reducer, + min_elements); + }); + scheduler::sync(); + return reducer(left, right); + } +} + +} + +template +static auto reduce(RandomIt first, + RandomIt last, + decltype(*first) neutral, + const Function &reducer, + ExecutionStrategy execution_strategy) { + long num_elements = std::distance(first, last); + return internal::reduce(first, + last, + neutral, + reducer, + execution_strategy.calculate_min_elements(num_elements)); +} + +template +static auto reduce(RandomIt first, + RandomIt last, + decltype(*first) neutral, + const Function &reducer) { + return reduce(first, last, neutral, reducer, dynamic_strategy{4}); +} + +} + +#endif //PLS_PARALLEL_REDUCE_IMPL_H diff --git a/lib/pls/include/pls/algorithms/scan.h b/lib/pls/include/pls/algorithms/scan.h deleted file mode 100644 index 1db358b..0000000 --- a/lib/pls/include/pls/algorithms/scan.h +++ /dev/null @@ -1,15 +0,0 @@ - -#ifndef PLS_PARALLEL_SCAN_H_ -#define PLS_PARALLEL_SCAN_H_ - -namespace pls { -namespace algorithm { - -template -void scan(InIter in_start, const InIter in_end, OutIter out, BinaryOp op, Type neutral_elem); - -} -} -#include "scan_impl.h" - -#endif //PLS_PARALLEL_SCAN_H_ diff --git a/lib/pls/include/pls/algorithms/scan_impl.h b/lib/pls/include/pls/algorithms/scan_impl.h deleted file mode 100644 index 1af0418..0000000 --- a/lib/pls/include/pls/algorithms/scan_impl.h +++ /dev/null @@ -1,115 +0,0 @@ - -#ifndef PLS_PARALLEL_SCAN_IMPL_H_ -#define PLS_PARALLEL_SCAN_IMPL_H_ - -#include -#include - -#include "pls/pls.h" -#include "pls/internal/scheduling/thread_state.h" -#include "pls/internal/scheduling/task.h" - -namespace pls { -namespace algorithm { -namespace internal { - -using namespace pls::internal::scheduling; - -template -void serial_scan(InIter input_start, const InIter input_end, OutIter output, BinaryOp op, Type neutral_element) { - - auto current_output = output; - auto last_value = neutral_element; - - for (auto current_input = input_start; current_input != input_end; current_input++) { - last_value = op(last_value, *current_input); - *current_output = last_value; - current_output++; - } -} - -template -class scan_task : public pls::internal::scheduling::task { - const InIter in_start_; - const InIter in_end_; - const OutIter out_; - const BinaryOp op_; - const Type neutral_elem_; - - long size_, chunks_; - long items_per_chunk_; - Type *chunk_sums_; - - public: - scan_task(const InIter in_start, const InIter in_end, const OutIter out, const BinaryOp op, const Type neutral_elem) : - in_start_{in_start}, - in_end_{in_end}, - out_{out}, - op_{op}, - neutral_elem_{neutral_elem} { - constexpr auto chunks_per_thread = 1; - - size_ = std::distance(in_start, in_end); - auto num_threads = thread_state::get()->scheduler_->num_threads(); - chunks_ = num_threads * chunks_per_thread + 1; - items_per_chunk_ = size_ / chunks_ + 1; - - chunk_sums_ = reinterpret_cast(allocate_memory(sizeof(Type) * chunks_ - 1)); - }; - - void execute_internal() override { - // First Pass = calculate each chunks individual prefix sum - for_each_range(0, chunks_ - 1, [&](int i) { - auto chunk_start = in_start_ + items_per_chunk_ * i; - auto chunk_end = std::min(in_end_, chunk_start + items_per_chunk_); - auto chunk_size = std::distance(chunk_start, chunk_end); - auto chunk_output = out_ + items_per_chunk_ * i; - - internal::serial_scan(chunk_start, chunk_end, chunk_output, op_, neutral_elem_); - auto last_chunk_value = *(chunk_output + chunk_size - 1); - chunk_sums_[i] = last_chunk_value; - }, fixed_strategy{1}); - - // Calculate prefix sums of each chunks sum - // (effectively the prefix sum at the end of each chunk, then used to correct the following chunk). - internal::serial_scan(chunk_sums_, chunk_sums_ + chunks_ - 1, chunk_sums_, op_, neutral_elem_); - - // Second Pass = Use results from first pass to correct each chunks sum - auto output_start = out_; - auto output_end = out_ + size_; - for_each_range(1, chunks_, [&](int i) { - if (i == chunks_ - 1) { - auto chunk_start = in_start_ + items_per_chunk_ * i; - auto chunk_end = std::min(in_end_, chunk_start + items_per_chunk_); - auto chunk_output = output_start + items_per_chunk_ * i; - - *chunk_start += chunk_sums_[i - 1]; - internal::serial_scan(chunk_start, chunk_end, chunk_output, op_, neutral_elem_); - } else { - auto chunk_start = output_start + items_per_chunk_ * i; - auto chunk_end = std::min(output_end, chunk_start + items_per_chunk_); - for (; chunk_start != chunk_end; chunk_start++) { - *chunk_start = op_(*chunk_start, chunk_sums_[i - 1]); - } - } - }, fixed_strategy{1}); - - wait_for_all(); - this->~scan_task(); - } -}; - -} - -template -void scan(InIter in_start, const InIter in_end, OutIter out, BinaryOp op, Type neutral_elem) { - using namespace pls::internal::scheduling; - - using scan_task_type = internal::scan_task; - scheduler::spawn_child_and_wait(in_start, in_end, out, op, neutral_elem); -} - -} -} - -#endif //PLS_PARALLEL_SCAN_IMPL_H_ diff --git a/lib/pls/include/pls/internal/base/alignment.h b/lib/pls/include/pls/internal/base/alignment.h index 70aac52..dcaf894 100644 --- a/lib/pls/include/pls/internal/base/alignment.h +++ b/lib/pls/include/pls/internal/base/alignment.h @@ -4,28 +4,70 @@ #include #include +#include #include "system_details.h" -namespace pls { -namespace internal { -namespace base { -namespace alignment { +namespace pls::internal::base::alignment { -template -struct aligned_wrapper { - alignas(system_details::CACHE_LINE_SIZE) unsigned char data[sizeof(T)]; - T *pointer() { return reinterpret_cast(data); } +constexpr system_details::pointer_t next_alignment(system_details::pointer_t size, + size_t alignment = system_details::CACHE_LINE_SIZE) { + return (size % alignment) == 0 ? + size : + size + (alignment - (size % alignment)); +} + +constexpr system_details::pointer_t previous_alignment(system_details::pointer_t size, + size_t alignment = system_details::CACHE_LINE_SIZE) { + return (size % alignment) == 0 ? + size : + size - (size % alignment); +} + +char *next_alignment(char *pointer, size_t alignment = system_details::CACHE_LINE_SIZE); + +/** + * Forces alignment requirements on a type equal to the given size. + * This can be useful to store the elements aligned in an array or to allocate them using new. + * + * The object is constructed using perfect forwarding. Thus initialization looks identical to the + * wrapped object (can be used in containers), access to the elements has to be done through + * the pointer returned by pointer() or moved out using object(). + * + * (This is required, as C++11 does not allow 'over_aligned' types, meaning alignments higher + * than the max_align_t (most times 16 bytes) wont be respected properly.) + */ +template +struct alignment_wrapper { + static_assert(alignof(T) <= ALIGNMENT, "Must not wrap an type to an alignment smaller than required by the type!"); + + private: + std::array memory_; + T *data_; + public: + template + explicit alignment_wrapper(ARGS &&...args) + : memory_{}, data_{reinterpret_cast(next_alignment(memory_.data(), ALIGNMENT))} { + new(data_) T(std::forward(args)...); + } + ~alignment_wrapper() { + data_->~T(); + } + + T &object() { return *data_; } + T *pointer() { return data_; } }; -void *allocate_aligned(size_t size); -system_details::pointer_t next_alignment(system_details::pointer_t size); -system_details::pointer_t previous_alignment(system_details::pointer_t size); +/** + * Our common case or over aligning types is when we want to hit cache lines. + * The most portable way is to use the above alignment wrapper, removing any alignment + * requirements from the wrapped type itself (here the cache line) and manually filling + * up padding/fill bytes as needed. + */ +template +using cache_alignment_wrapper = alignment_wrapper; -} } -} -} #endif //PLS_ALIGNMENT_H diff --git a/lib/pls/include/pls/internal/base/backoff.h b/lib/pls/include/pls/internal/base/backoff.h index 45ca814..f0625b2 100644 --- a/lib/pls/include/pls/internal/base/backoff.h +++ b/lib/pls/include/pls/internal/base/backoff.h @@ -4,14 +4,13 @@ #include "pls/internal/base/system_details.h" #include "pls/internal/helpers/profiler.h" -#include "pls/internal/base/thread.h" #include -#include +#include +#include +#include -namespace pls { -namespace internal { -namespace base { +namespace pls::internal::base { class backoff { const unsigned long INITIAL_SPIN_ITERS = 2u << 1u; @@ -36,7 +35,8 @@ class backoff { if (current_ >= YELD_ITERS) { PROFILE_LOCK("Yield") - this_thread::sleep(5); + using namespace std::chrono_literals; + std::this_thread::sleep_for(5us); } current_ = std::min(current_ * 2, MAX_ITERS); @@ -48,7 +48,5 @@ class backoff { }; } -} -} #endif //PLS_BACKOFF_H_ diff --git a/lib/pls/include/pls/internal/base/error_handling.h b/lib/pls/include/pls/internal/base/error_handling.h index 15a2df6..8704cc8 100644 --- a/lib/pls/include/pls/internal/base/error_handling.h +++ b/lib/pls/include/pls/internal/base/error_handling.h @@ -12,6 +12,10 @@ * (or its inclusion adds too much overhead). */ #define PLS_ERROR(msg) printf("%s\n", msg); exit(1); -#define PLS_ASSERT(cond, msg) if (!(cond)) { PLS_ERROR(msg) } + +void pls_error(const char *msg); + +// TODO: Distinguish between debug/internal asserts and production asserts. +#define PLS_ASSERT(cond, msg) if (!(cond)) { pls_error(msg); } #endif //PLS_ERROR_HANDLING_H diff --git a/lib/pls/include/pls/internal/base/stack_allocator.h b/lib/pls/include/pls/internal/base/stack_allocator.h new file mode 100644 index 0000000..ec99bf5 --- /dev/null +++ b/lib/pls/include/pls/internal/base/stack_allocator.h @@ -0,0 +1,28 @@ + +#ifndef PLS_INCLUDE_PLS_INTERNAL_BASE_STACK_ALLOCATOR_H_ +#define PLS_INCLUDE_PLS_INTERNAL_BASE_STACK_ALLOCATOR_H_ + +#include + +namespace pls::internal::base { +class stack_allocator { + public: + virtual char *allocate_stack(size_t size) = 0; + virtual void free_stack(size_t size, char *stack) = 0; +}; + +class heap_stack_allocator : public stack_allocator { + public: + char *allocate_stack(size_t size) override { return new char[size]; } + void free_stack(size_t, char *stack) override { delete[] stack; } +}; + +class mmap_stack_allocator : public stack_allocator { + public: + char *allocate_stack(size_t size) override; + void free_stack(size_t, char *stack) override; +}; + +} + +#endif //PLS_INCLUDE_PLS_INTERNAL_BASE_STACK_ALLOCATOR_H_ diff --git a/lib/pls/include/pls/internal/base/system_details.h b/lib/pls/include/pls/internal/base/system_details.h index 01543c0..68821a8 100644 --- a/lib/pls/include/pls/internal/base/system_details.h +++ b/lib/pls/include/pls/internal/base/system_details.h @@ -14,19 +14,17 @@ #endif #endif -#include +#include "error_handling.h" -namespace pls { -namespace internal { -namespace base { +#include /** * Collection of system details, e.g. hardware cache line size. * * PORTABILITY: - * Currently sane default values for x86. + * Currently sane default values for x86. Must be changed for any port. */ -namespace system_details { +namespace pls::internal::base::system_details { /** * Pointer Types needed for ABA protection mixed into addresses. @@ -39,19 +37,53 @@ using pointer_t = std::uintptr_t; * Usually it is sane to assume a pointer can be swapped in a single CAS operation. */ using cas_integer = std::uintptr_t; -constexpr unsigned long CAS_SIZE = sizeof(cas_integer); +constexpr unsigned long CAS_SIZE = sizeof(cas_integer) * 8; /** * Most processors have 64 byte cache lines (last 6 bit of the address are zero at line beginnings). */ -constexpr pointer_t CACHE_LINE_SIZE = 64; +constexpr size_t CACHE_LINE_SIZE = 64; + +/** + * Helper to align types/values on cache lines. + */ +#define PLS_CACHE_ALIGN alignas(base::system_details::CACHE_LINE_SIZE) + +/** + * Helper to find mmap page size. Either set constant or rely on system specific getter function. + */ +size_t get_page_size(); /** - * Choose one of the following ways to store thread specific data. - * Try to choose the fastest available on this processor/system. + * Wrapper to create a new memory mapping. + * Currently implemented for linux/posix systems. + * + * @param size The page_size aligned size of the new mapping + * @return The newly created mapping or the error returned by the sytem call + */ +void *memory_map_range(size_t size); + +/** + * Helper to revert a previous made memory mapping. + * currently implemented for linux/posix systems. + * + * @param addr The start address of the mapping. + * @param size The size of the mapping. + * + * @return status code from system call */ -//#define PLS_THREAD_SPECIFIC_PTHREAD -#define PLS_THREAD_SPECIFIC_COMPILER +int memory_unmap_range(void *addr, size_t size); + +/** + * Helper to protect a specific address range (must be mapped in the application) + * from any access. Later accesses to this address range should result in a system error. + * + * @param addr The start address of the to be protected block. + * @param size The size of the protected block. + * + * @return status code from system call + */ +int memory_protect_range(void *addr, size_t size); /** * When spinning one wants to 'relax' the CPU from some task, @@ -67,9 +99,24 @@ inline void relax_cpu() { #endif } -} -} -} +/** + * Prevent inlining of functions. This is a compiler specific setting and + * it is seen as an error to not properly declare this. + * (Some functions in the codebase MUST be re-evaluated after fiber switches, + * by preventing inlining them we prevent the compiler caching their results) + */ +#if defined(_MSC_VER) +#define PLS_NOINLINE __declspec(noinline) +#elif defined(__GNUC__) && __GNUC__ > 3 +#if defined(__CUDACC__) +#define PLS_NOINLINE __attribute__ ((noinline)) +#else +#define PLS_NOINLINE __attribute__ ((__noinline__)) +#endif +#else +#error "PLS requires inline prevention for certain functions." +#endif + } #endif //PLS_SYSTEM_DETAILS_H diff --git a/lib/pls/include/pls/internal/base/tas_spin_lock.h b/lib/pls/include/pls/internal/base/tas_spin_lock.h index 74a11a5..e4438e6 100644 --- a/lib/pls/include/pls/internal/base/tas_spin_lock.h +++ b/lib/pls/include/pls/internal/base/tas_spin_lock.h @@ -7,8 +7,6 @@ #include #include -#include "pls/internal/base/thread.h" - namespace pls { namespace internal { namespace base { diff --git a/lib/pls/include/pls/internal/base/thread.h b/lib/pls/include/pls/internal/base/thread.h deleted file mode 100644 index 74e38ec..0000000 --- a/lib/pls/include/pls/internal/base/thread.h +++ /dev/null @@ -1,113 +0,0 @@ -/** - * Abstraction for threading to allow porting. - * Currently using either pthread or C++ 11 threads. - */ - -#ifndef PLS_THREAD_H -#define PLS_THREAD_H - -#include -#include -#include -#include - -#include "system_details.h" - -namespace pls { -namespace internal { -namespace base { - -using thread_entrypoint = void(); - -/** - * Static methods than can be performed on the current thread. - * - * usage: - * this_thread::yield(); - * T* state = this_thread::state(); - * - * PORTABILITY: - * Current implementation is based on pthreads. - */ -class this_thread { - friend - class thread; -#ifdef PLS_THREAD_SPECIFIC_PTHREAD - static pthread_key_t local_storage_key_; - static bool local_storage_key_initialized_; -#endif -#ifdef PLS_THREAD_SPECIFIC_COMPILER - static __thread void *local_state_; -#endif - public: - static void yield() { - pthread_yield(); - } - - static void sleep(long microseconds) { - timespec time{0, 1000 * microseconds}; - nanosleep(&time, nullptr); - } - - /** - * Retrieves the local state pointer. - * - * @tparam T The type of the state that is stored. - * @return The state pointer hold for this thread. - */ - template - static T *state(); - - /** - * Stores a pointer to the thread local state object. - * The memory management for this has to be done by the user, - * we only keep the pointer. - * - * @tparam T The type of the state that is stored. - * @param state_pointer A pointer to the threads state object. - */ - template - static void set_state(T *state_pointer); -}; - -/** - * Abstraction for starting a function in a separate thread. - * Offers only threading functionality needed in this project, - * underlying implementation can be changed. - * Uses NO heap memory allocation. - * - * PORTABILITY: - * Current implementation is based on pthreads. - */ -class thread { - friend class this_thread; - // Keep handle to native implementation - pthread_t pthread_thread_; - - template - static void *start_pthread_internal(void *thread_pointer); - - public: - template - explicit thread(const Function &function, State *state_pointer); - - template - explicit thread(const Function &function); - - public: - void join(); - - // make object move only - thread(thread &&) noexcept = default; - thread &operator=(thread &&) noexcept = default; - - thread(const thread &) = delete; - thread &operator=(const thread &) = delete; -}; - -} -} -} -#include "thread_impl.h" - -#endif //PLS_THREAD_H diff --git a/lib/pls/include/pls/internal/base/thread_impl.h b/lib/pls/include/pls/internal/base/thread_impl.h deleted file mode 100644 index 498ed2b..0000000 --- a/lib/pls/include/pls/internal/base/thread_impl.h +++ /dev/null @@ -1,84 +0,0 @@ - -#ifndef PLS_THREAD_IMPL_H -#define PLS_THREAD_IMPL_H - -namespace pls { -namespace internal { -namespace base { - -template -T *this_thread::state() { -#ifdef PLS_THREAD_SPECIFIC_PTHREAD - return reinterpret_cast(pthread_getspecific(local_storage_key_)); -#endif -#ifdef PLS_THREAD_SPECIFIC_COMPILER - return reinterpret_cast(local_state_); -#endif -} - -template -void this_thread::set_state(T *state_pointer) { -#ifdef PLS_THREAD_SPECIFIC_PTHREAD - pthread_setspecific(this_thread::local_storage_key_, (void*)state_pointer); -#endif -#ifdef PLS_THREAD_SPECIFIC_COMPILER - local_state_ = state_pointer; -#endif -} - -template -struct thread_arguments { - Function function_; - State *state_; - std::atomic_flag *startup_flag_; -}; - -template -void *thread::start_pthread_internal(void *thread_pointer) { - // Actively copy all arguments into stack memory. - thread_arguments - arguments_copy = *reinterpret_cast *>(thread_pointer); - - // Now we have copies of everything we need on the stack. - // The original thread object can be moved freely (no more - // references to its memory location). - arguments_copy.startup_flag_->clear(); - - this_thread::set_state(arguments_copy.state_); - arguments_copy.function_(); - - // Finished executing the user function - pthread_exit(nullptr); -} - -template -thread::thread(const Function &function, State *state_pointer): - pthread_thread_{} { - -#ifdef PLS_THREAD_SPECIFIC_PTHREAD - if (!this_thread::local_storage_key_initialized_) { - pthread_key_create(&this_thread::local_storage_key_, nullptr); - this_thread::local_storage_key_initialized_ = true; - } -#endif - - // Wee need to wait for the started function to read - // the function_ and state_pointer_ property before returning - // from the constructor, as the object might be moved after this. - std::atomic_flag startup_flag{ATOMIC_FLAG_INIT}; - - thread_arguments arguments{function, state_pointer, &startup_flag}; - - startup_flag.test_and_set(); // Set the flag, pthread will clear it when it is safe to return - pthread_create(&pthread_thread_, nullptr, start_pthread_internal < Function, State > , (void *) (&arguments)); - while (startup_flag.test_and_set()); // Busy waiting for the starting flag to clear -} - -template -thread::thread(const Function &function): thread{function, (void *) nullptr} {} - -} -} -} - -#endif //PLS_THREAD_IMPL_H diff --git a/lib/pls/include/pls/internal/base/ttas_spin_lock.h b/lib/pls/include/pls/internal/base/ttas_spin_lock.h index 787f772..abd67fd 100644 --- a/lib/pls/include/pls/internal/base/ttas_spin_lock.h +++ b/lib/pls/include/pls/internal/base/ttas_spin_lock.h @@ -5,7 +5,6 @@ #include #include -#include "pls/internal/base/thread.h" #include "pls/internal/base/backoff.h" namespace pls { diff --git a/lib/pls/include/pls/internal/scheduling/data_structures/aligned_stack.h b/lib/pls/include/pls/internal/data_structures/aligned_stack.h similarity index 60% rename from lib/pls/include/pls/internal/scheduling/data_structures/aligned_stack.h rename to lib/pls/include/pls/internal/data_structures/aligned_stack.h index e3f63fb..d3ebe0e 100644 --- a/lib/pls/include/pls/internal/scheduling/data_structures/aligned_stack.h +++ b/lib/pls/include/pls/internal/data_structures/aligned_stack.h @@ -4,13 +4,15 @@ #include #include +#include +#include #include "pls/internal/base/error_handling.h" #include "pls/internal/base/alignment.h" +#include "pls/internal/base/system_details.h" namespace pls { namespace internal { -namespace scheduling { namespace data_structures { using base::system_details::pointer_t; @@ -20,44 +22,75 @@ using base::system_details::pointer_t; * The objects will be stored aligned in the stack, making the storage cache friendly and very fast * (as long as one can live with the stack restrictions). * - * IMPORTANT: Does not call destructors on stored objects! Do not allocate resources in the objects! + * IMPORTANT: Does only call the deconstructor when explicitly using pop(). + * In this case you have to be sure that push() and pop() calls + * match up through out your program. * * Usage: - * aligned_stack stack{pointer_to_memory, size_of_memory}; + * aligned_static_stack stack; or heap_aligned_stack stack(size); * T* pointer = stack.push(constructor_arguments); // Perfect-Forward-Construct the object on top of stack - * stack.pop(); // Remove the top object of type T + * stack.pop(); // Remove the top object of type T and deconstruct it */ class aligned_stack { public: typedef size_t stack_offset; - aligned_stack() : aligned_memory_start_{0}, aligned_memory_end_{0}, max_offset_{0}, current_offset_{0} {}; - aligned_stack(pointer_t memory_region, std::size_t size); - aligned_stack(char *memory_region, std::size_t size); + aligned_stack(char *memory_pointer, size_t size); + aligned_stack(char *memory_pointer, size_t size, size_t original_size); template T *push(ARGS &&... args); template - void *push_bytes(); - void *push_bytes(size_t size); + char *push_bytes(); + char *push_bytes(size_t size); + template - T pop(); + void pop(); - void *memory_at_offset(stack_offset offset) const; + char *memory_at_offset(stack_offset offset) const; stack_offset save_offset() const { return current_offset_; } void reset_offset(stack_offset new_offset) { current_offset_ = new_offset; } - private: + protected: // Keep bounds of our memory block - pointer_t aligned_memory_start_; - pointer_t aligned_memory_end_; + char *unaligned_memory_pointer_; + char *memory_pointer_; stack_offset max_offset_; stack_offset current_offset_; }; -} +template +class static_aligned_stack { + public: + static_aligned_stack(); + aligned_stack &get_stack() { return aligned_stack_; } + + private: + alignas(base::system_details::CACHE_LINE_SIZE) std::array memory_; + aligned_stack aligned_stack_; +}; + +class heap_aligned_stack { + public: + explicit heap_aligned_stack(size_t size) : + unaligned_memory_size_{base::alignment::next_alignment(size)}, + unaligned_memory_pointer_{new char[unaligned_memory_size_]}, + aligned_stack_{unaligned_memory_pointer_, size, unaligned_memory_size_} {} + + ~heap_aligned_stack() { + delete[] unaligned_memory_pointer_; + } + + aligned_stack &get_stack() { return aligned_stack_; } + + private: + size_t unaligned_memory_size_; + char *unaligned_memory_pointer_; + aligned_stack aligned_stack_; +}; + } } } diff --git a/lib/pls/include/pls/internal/scheduling/data_structures/aligned_stack_impl.h b/lib/pls/include/pls/internal/data_structures/aligned_stack_impl.h similarity index 77% rename from lib/pls/include/pls/internal/scheduling/data_structures/aligned_stack_impl.h rename to lib/pls/include/pls/internal/data_structures/aligned_stack_impl.h index f04e9e1..60fb0b8 100644 --- a/lib/pls/include/pls/internal/scheduling/data_structures/aligned_stack_impl.h +++ b/lib/pls/include/pls/internal/data_structures/aligned_stack_impl.h @@ -6,29 +6,30 @@ namespace pls { namespace internal { -namespace scheduling { namespace data_structures { template T *aligned_stack::push(ARGS &&... args) { - // Perfect-Forward construct return new(push_bytes())T(std::forward(args)...); } template -void *aligned_stack::push_bytes() { +char *aligned_stack::push_bytes() { return push_bytes(sizeof(T)); } template -T aligned_stack::pop() { +void aligned_stack::pop() { auto num_cache_lines = base::alignment::next_alignment(sizeof(T)) / base::system_details::CACHE_LINE_SIZE; current_offset_ -= num_cache_lines; - return *reinterpret_cast(memory_at_offset(current_offset_)); + auto result = *reinterpret_cast(memory_at_offset(current_offset_)); + result.~T(); } -} +template +static_aligned_stack::static_aligned_stack(): memory_{}, aligned_stack_{memory_.data(), SIZE} {}; + } } } diff --git a/lib/pls/include/pls/internal/data_structures/bounded_trading_deque.h b/lib/pls/include/pls/internal/data_structures/bounded_trading_deque.h new file mode 100644 index 0000000..e54d9d7 --- /dev/null +++ b/lib/pls/include/pls/internal/data_structures/bounded_trading_deque.h @@ -0,0 +1,271 @@ + +#ifndef PLS_INTERNAL_DATA_STRUCTURES_BOUNDED_TRADING_DEQUE_H_ +#define PLS_INTERNAL_DATA_STRUCTURES_BOUNDED_TRADING_DEQUE_H_ + +#include +#include + +#include "pls/internal/base/error_handling.h" +#include "pls/internal/base/system_details.h" + +#include "pls/internal/data_structures/optional.h" +#include "pls/internal/data_structures/stamped_integer.h" + +namespace pls { +namespace internal { +namespace data_structures { + +template +class traded_field { + static_assert(base::system_details::CACHE_LINE_SIZE >= 4, + "Traded objects must not use their last address bits, as we use them for status flags." + "As traded objects are usually cache aligned, we need big enough cache lines."); + // TODO: Replace unsigned long with a portable sized integer + // (some systems might have different pointer sizes to long sizes). + static constexpr unsigned long SHIFT = 0x2lu; + static constexpr unsigned long TAG_BITS = 0x3lu; + static constexpr unsigned long RELEVANT_BITS = ~TAG_BITS; + static constexpr unsigned long EMPTY_TAG = 0x0lu; + static constexpr unsigned long STAMP_TAG = 0x1lu; + static constexpr unsigned long TRADE_TAG = 0x2lu; + + public: + void fill_with_stamp(unsigned long stamp) { + pointer_ = (void *) ((stamp << SHIFT) | STAMP_TAG); + } + unsigned long get_stamp() { + PLS_ASSERT(is_filled_with_tag(), "Must only read out the tag when the traded field contains one."); + return ((unsigned long) pointer_) >> SHIFT; + } + bool is_filled_with_tag() { + return (((unsigned long) pointer_) & TAG_BITS) == STAMP_TAG; + } + + void fill_with_trade_object(TradedType *trade_object) { + PLS_ASSERT((((unsigned long) trade_object) & TAG_BITS) == 0, + "Must only store aligned objects in this data structure (last bits are needed for tag bit)"); + pointer_ = reinterpret_cast(((unsigned long) trade_object) | TRADE_TAG); + } + TradedType *get_trade_object() { + PLS_ASSERT(is_filled_with_object(), "Must only read out the object when the traded field contains one."); + return reinterpret_cast(((unsigned long) pointer_) & RELEVANT_BITS); + } + bool is_filled_with_object() { + return (((unsigned long) pointer_) & TAG_BITS) == TRADE_TAG; + } + + bool is_empty() { + return (((unsigned long) pointer_) & TAG_BITS) == EMPTY_TAG; + } + + private: + void *pointer_{nullptr}; +}; + +template +class alignas(base::system_details::CACHE_LINE_SIZE) trading_deque_entry { + public: + /* + * Fill the slot with its initial values, making it ready for being stolen. + * Performs no synchronization/memory ordering constraints. + * + * Method is called to init a field on pushBot. + */ + void fill_slots(EntryType *entry_item, unsigned long expected_stamp) { + entry_slot_.store(entry_item, std::memory_order_relaxed); + forwarding_stamp_.store(expected_stamp, std::memory_order_relaxed); + + // Relaxed is fine for this, as adding elements is synced over the bot pointer + auto old = trade_slot_.load(std::memory_order_relaxed); + old.fill_with_stamp(expected_stamp); + trade_slot_.store(old, std::memory_order_relaxed); + } + + /** + * Tries to atomically read out the object traded in by thieves. + * Either returns the traded in field (the slot was stolen) or no result (the slot is still owned locally). + * + * Method is used to pop a field on popBot. + */ + optional acquire_traded_type() { + traded_field empty_field; + traded_field old_field_value = trade_slot_.exchange(empty_field, std::memory_order_acq_rel); + + if (old_field_value.is_filled_with_tag()) { + return optional(); + } else { + return optional(old_field_value.get_trade_object()); + } + } + + EntryType *get_object() { + return entry_slot_; + } + + bool is_empty() { + return trade_slot_.load(std::memory_order_seq_cst).is_empty(); + } + + optional trade_object(TradedType *offered_object, unsigned long &expected_stamp) { + // Read our potential result + EntryType *result = entry_slot_.load(std::memory_order_relaxed); + unsigned long forwarding_stamp = forwarding_stamp_.load(std::memory_order_relaxed); + + // Try to get it by CAS with the expected field entry, giving up our offered_object for it + traded_field expected_field; + expected_field.fill_with_stamp(expected_stamp); + traded_field offered_field; + offered_field.fill_with_trade_object(offered_object); + + if (trade_slot_.compare_exchange_strong(expected_field, offered_field, std::memory_order_acq_rel)) { + return optional{result}; + } else { + if (expected_field.is_empty()) { + expected_stamp = forwarding_stamp; + } + return optional{}; + } + } + + private: + std::atomic entry_slot_{nullptr}; + std::atomic forwarding_stamp_{}; + std::atomic> trade_slot_{}; +}; + +/** + * A work stealing deque (single produces/consumer at the end, multiple consumers at the start). + * A pointer to an OfferedType object can only be acquired by stealing consumers (from the start), + * when they also offer a pointer to a TradeType object. + * + * The exchange of 'goods' (OfferedType and TradedType) happens atomically at a linearization point. + * This means that the owning thread always gets a TradedType for each and every OfferedType that was + * successfully stolen. + * + * The owner of the deque must pop ALL elements, even the stolen ones (to get the traded goods instead). + * + * @tparam EntryType The type of objects stored in the deque + * @tparam TradedType The type of objects traded in for acquiring a deque element. + */ +template +class bounded_trading_deque { + using deque_entry = trading_deque_entry; + + public: + bounded_trading_deque(deque_entry *entries, size_t num_entries) : + entries_{entries}, num_entries_{num_entries} {}; + + void push_bot(EntryType *offered_object) { + auto expected_stamp = bot_internal_.stamp; + auto ¤t_entry = entries_[bot_internal_.value]; + + current_entry.fill_slots(offered_object, expected_stamp); + bot_internal_.stamp++; + bot_internal_.value++; + + bot_.store(bot_internal_.value, std::memory_order_release); + } + + struct pop_result { + explicit pop_result(optional entry, optional traded) : entry_{entry}, + traded_{traded} {}; + pop_result() : entry_{}, traded_{} {}; + + optional entry_; + optional traded_; + }; + pop_result pop_bot() { + if (bot_internal_.value == 0) { + return pop_result{}; // Empty, nothing to return... + } + + // Go one step back + bot_internal_.value--; + + auto ¤t_entry = entries_[bot_internal_.value]; + optional traded_object = current_entry.acquire_traded_type(); + optional queue_entry; + if (traded_object) { + // We do not return an entry, but the traded object + queue_entry = optional{}; + } else { + // We still got it locally, grab the object + queue_entry = optional{current_entry.get_object()}; + } + + bot_.store(bot_internal_.value, std::memory_order_relaxed); + if (bot_internal_.value == 0) { + bot_internal_.stamp++; + top_.store({bot_internal_.stamp, 0}, std::memory_order_release); + } + + return pop_result{queue_entry, traded_object}; + } + + std::tuple, stamped_integer> peek_top() { + auto local_top = top_.load(); + auto local_bot = bot_.load(); + if (local_top.value >= local_bot) { + return std::make_tuple(optional{}, local_top); + } else { + return std::make_tuple(optional{entries_[local_top.value].get_object()}, local_top); + } + } + + optional pop_top(TradedType *trade_offer) { + auto local_top = top_.load(); + return pop_top(trade_offer, local_top); + } + + optional pop_top(TradedType *trade_offer, stamped_integer local_top) { + auto local_bot = bot_.load(); + if (local_top.value >= local_bot) { + return optional{}; + } + + unsigned long expected_top_stamp = local_top.stamp; + optional entry = entries_[local_top.value].trade_object(trade_offer, expected_top_stamp); + if (entry) { + // We got it, for sure move the top pointer forward. + top_.compare_exchange_strong(local_top, {local_top.stamp + 1, local_top.value + 1}); + } else { + // We did not get it.... + if (entries_[local_top.value].is_empty()) { + // ...update the top stamp, so the next call can get it (we still make system progress, as the owner + // must have popped off the element) + top_.compare_exchange_strong(local_top, {expected_top_stamp, local_top.value}); + } else { + // ...move the pointer forward if someone else put a valid trade object in there. + top_.compare_exchange_strong(local_top, {local_top.stamp + 1, local_top.value + 1}); + } + } + + return entry; + } + + private: + deque_entry *entries_; + size_t num_entries_; + + alignas(base::system_details::CACHE_LINE_SIZE) std::atomic top_{{0, 0}}; + alignas(base::system_details::CACHE_LINE_SIZE) std::atomic bot_{0}; + + stamped_integer bot_internal_{0, 0}; +}; + +template +class static_bounded_trading_deque { + public: + static_bounded_trading_deque() : items_{}, deque_{items_.data(), SIZE} {} + + bounded_trading_deque &get_deque() { return deque_; } + private: + std::array, SIZE> items_; + bounded_trading_deque deque_; +}; + +} +} +} + +#endif //PLS_INTERNAL_DATA_STRUCTURES_BOUNDED_TRADING_DEQUE_H_ diff --git a/lib/pls/include/pls/internal/data_structures/bounded_ws_deque.h b/lib/pls/include/pls/internal/data_structures/bounded_ws_deque.h new file mode 100644 index 0000000..194a94c --- /dev/null +++ b/lib/pls/include/pls/internal/data_structures/bounded_ws_deque.h @@ -0,0 +1,117 @@ + +#ifndef PLS_INTERNAL_DATA_STRUCTURES_BOUNDED_WS_DEQUE_H_ +#define PLS_INTERNAL_DATA_STRUCTURES_BOUNDED_WS_DEQUE_H_ + +#include +#include +#include + +#include "pls/internal/base/system_details.h" +#include "pls/internal/data_structures/stamped_integer.h" +#include "pls/internal/data_structures/optional.h" + +namespace pls { +namespace internal { +namespace data_structures { + +/** + * Classic, text book ws bounded deque based on arrays. + * Stores a fixed amount of fixed size objects in an array, + * allowing or local push/pop on the bottom and remote + * pop on the top. + * + * The local operations are cheap as long as head and tail are + * far enough apart, making it ideal to avoid cache problems. + * + * Depends on overaligned datatypes to be cache line friendly. + * This does not concern C++14 and upwards, but hinders you to properly + * allocate it on the heap in C++11 (see base::alignment::alignment_wrapper for a solution). + */ +// TODO: Relax memory orderings in here... +template +class bounded_ws_deque { + public: + bounded_ws_deque(T *item_array, size_t size) : size_{size}, item_array_{item_array} {} + + void push_bottom(T item) { + item_array_[local_bottom_] = item; + local_bottom_++; + bottom_.store(local_bottom_, std::memory_order_release); + } + + bool is_empty() { + return top_.load().value < bottom_.load(); + } + + optional pop_top() { + stamped_integer old_top = top_.load(); + unsigned int new_stamp = old_top.stamp + 1; + unsigned int new_value = old_top.value + 1; + + if (bottom_.load() <= old_top.value) { + return optional(); + } + + optional result(item_array_[old_top.value]); + if (top_.compare_exchange_strong(old_top, {new_stamp, new_value})) { + return result; + } + + return optional(); + } + + optional pop_bottom() { + if (local_bottom_ == 0) { + return optional(); + } + + local_bottom_--; + bottom_.store(local_bottom_, std::memory_order_seq_cst); + + optional result(item_array_[local_bottom_]); + + stamped_integer old_top = top_.load(std::memory_order_acquire); + if (local_bottom_ > old_top.value) { + // Enough distance to just return the value + return result; + } + if (local_bottom_ == old_top.value) { + local_bottom_ = 0; + bottom_.store(local_bottom_); + if (top_.compare_exchange_strong(old_top, {old_top.stamp + 1, 0})) { + // We won the competition and the queue is empty + return result; + } + } + + // The queue is empty and we lost the competition + local_bottom_ = 0; + bottom_.store(local_bottom_); + top_.store({old_top.stamp + 1, 0}); + return optional(); + } + + private: + alignas(base::system_details::CACHE_LINE_SIZE) std::atomic top_{stamped_integer{0, 0}}; + alignas(base::system_details::CACHE_LINE_SIZE) std::atomic bottom_{0}; + unsigned int local_bottom_{0}; + size_t size_; + T *item_array_; +}; + +template +class static_bounded_ws_deque { + public: + static_bounded_ws_deque() : items_{}, deque_{items_.data(), SIZE} {} + + bounded_ws_deque &get_deque() { return deque_; } + private: + std::array items_; + bounded_ws_deque deque_; +}; + +} +} +} + +#endif //PLS_INTERNAL_DATA_STRUCTURES_BOUNDED_WS_DEQUE_H_ diff --git a/lib/pls/include/pls/internal/data_structures/delayed_initialization.h b/lib/pls/include/pls/internal/data_structures/delayed_initialization.h new file mode 100644 index 0000000..4dc1eaf --- /dev/null +++ b/lib/pls/include/pls/internal/data_structures/delayed_initialization.h @@ -0,0 +1,113 @@ + +#ifndef PLS_INTERNAL_DATA_STRUCTURES_DELAYED_INITIALIZATION_H_ +#define PLS_INTERNAL_DATA_STRUCTURES_DELAYED_INITIALIZATION_H_ + +#include +#include + +#include "pls/internal/base/error_handling.h" + +namespace pls { +namespace internal { +namespace data_structures { + +/** + * Allows to reserve space for an uninitialized member variable. + * The member must be initialized before usage using the provided + * perfect forwarding constructor method. + * + * Takes care of the de-construction the contained object if one is active. + */ +template +class alignas(alignof(T)) delayed_initialization { + public: + delayed_initialization() : memory_{}, initialized_{false} {} + delayed_initialization(const delayed_initialization &) = delete; + delayed_initialization(delayed_initialization &&other) noexcept { + if (other.initialized()) { + new((void *) memory_.data()) T(std::move(other.object())); + other.initialized_ = false; + initialized_ = true; + } + } + delayed_initialization &operator=(const delayed_initialization &) = delete; + delayed_initialization &operator=(delayed_initialization &&other) noexcept { + if (&other == this) { + return *this; + } + + if (initialized() && other.initialized()) { + object() = std::move(other.object()); + other.initialized_ = false; + initialized_ = true; + return *this; + } + + if (!initialized() && other.initialized_) { + new((void *) memory_.data()) T(std::move(other.object())); + other.initialized_ = false; + initialized_ = true; + return *this; + } + + return *this; + } + + template + explicit delayed_initialization(ARGS &&...args): memory_{}, initialized_{true} { + new(memory_.data()) T(std::forward(args)...); + } + + ~delayed_initialization() { + if (initialized_) { + object().~T(); + } + } + + template + void initialize(ARGS &&...args) { + PLS_ASSERT(!initialized_, "Can only initialize delayed wrapper object once!"); + + new((void *) memory_.data()) T(std::forward(args)...); + initialized_ = true; + } + + void destroy() { + PLS_ASSERT(initialized_, "Can only destroy initialized objects!") + + object().~T(); + initialized_ = false; + } + + T &object() { + PLS_ASSERT(initialized_, "Can not use an uninitialized delayed wrapper object!"); + + return *reinterpret_cast(memory_.data()); + } + + const T &object() const { + PLS_ASSERT(initialized_, "Can not use an uninitialized delayed wrapper object!"); + + return *reinterpret_cast(memory_.data()); + } + + T &operator*() { + return object(); + } + + const T &operator*() const { + return object(); + } + + bool initialized() const { return initialized_; } + + private: + alignas(alignof(T)) std::array memory_; + bool initialized_; +}; + +} +} +} + +#endif // PLS_INTERNAL_DATA_STRUCTURES_DELAYED_INITIALIZATION_H_ diff --git a/lib/pls/include/pls/internal/data_structures/optional.h b/lib/pls/include/pls/internal/data_structures/optional.h new file mode 100644 index 0000000..2a5d287 --- /dev/null +++ b/lib/pls/include/pls/internal/data_structures/optional.h @@ -0,0 +1,70 @@ + +#ifndef PLS_INTERNAL_DATA_STRUCTURES_OPTIONAL_H_ +#define PLS_INTERNAL_DATA_STRUCTURES_OPTIONAL_H_ + +#include +#include + +#include "pls/internal/data_structures/delayed_initialization.h" + +namespace pls { +namespace internal { +namespace data_structures { + +template +class optional { + public: + optional() = default; + optional(optional &other) noexcept : optional(const_cast(other)) {}; + optional(const optional &other) noexcept { + if (other) { + data_.initialize(other.data_.object()); + } + } + optional(optional &&other) noexcept { + data_ = std::move(other.data_); + } + optional &operator=(const optional &other) { + if (&other == this) { + return *this; + } + + if (data_.initialized()) { + data_.destroy(); + } + if (other) { + data_.initialize(other.data_.object()); + } + + return *this; + } + optional &operator=(optional &&other) noexcept { + if (&other == this) { + return *this; + } + + data_ = std::move(other.data_); + + return *this; + } + + template + explicit optional(ARGS &&...args): data_{std::forward(args)...} {} + + operator bool() const { + return data_.initialized(); + } + + T &operator*() { + return *data_; + } + + private: + delayed_initialization data_; +}; + +} +} +} + +#endif //PLS_INTERNAL_DATA_STRUCTURES_OPTIONAL_H_ diff --git a/lib/pls/include/pls/internal/scheduling/data_structures/stamped_integer.h b/lib/pls/include/pls/internal/data_structures/stamped_integer.h similarity index 71% rename from lib/pls/include/pls/internal/scheduling/data_structures/stamped_integer.h rename to lib/pls/include/pls/internal/data_structures/stamped_integer.h index 04fea63..511f774 100644 --- a/lib/pls/include/pls/internal/scheduling/data_structures/stamped_integer.h +++ b/lib/pls/include/pls/internal/data_structures/stamped_integer.h @@ -1,12 +1,11 @@ -#ifndef PREDICTABLE_PARALLEL_PATTERNS_LIB_PLS_INCLUDE_PLS_INTERNAL_DATA_STRUCTURES_STAMPED_INTEGER_H_ -#define PREDICTABLE_PARALLEL_PATTERNS_LIB_PLS_INCLUDE_PLS_INTERNAL_DATA_STRUCTURES_STAMPED_INTEGER_H_ +#ifndef PLS_STAMPED_INTEGER_H_ +#define PLS_STAMPED_INTEGER_H_ #include "pls/internal/base/system_details.h" namespace pls { namespace internal { -namespace scheduling { namespace data_structures { constexpr unsigned long HALF_CACHE_LINE = base::system_details::CACHE_LINE_SIZE / 2; @@ -19,11 +18,18 @@ struct stamped_integer { stamped_integer() : stamp{0}, value{0} {}; stamped_integer(member_t new_value) : stamp{0}, value{new_value} {}; stamped_integer(member_t new_stamp, member_t new_value) : stamp{new_stamp}, value{new_value} {}; + + bool operator==(const stamped_integer &other) const noexcept { + return stamp == other.stamp && value == other.value; + } + + bool operator!=(const stamped_integer &other) const noexcept { + return !(*this == other); + } }; } } } -} -#endif //PREDICTABLE_PARALLEL_PATTERNS_LIB_PLS_INCLUDE_PLS_INTERNAL_DATA_STRUCTURES_STAMPED_INTEGER_H_ +#endif //PLS_STAMPED_INTEGER_H_ diff --git a/lib/pls/include/pls/internal/helpers/mini_benchmark.h b/lib/pls/include/pls/internal/helpers/mini_benchmark.h deleted file mode 100644 index 0b7fa63..0000000 --- a/lib/pls/include/pls/internal/helpers/mini_benchmark.h +++ /dev/null @@ -1,70 +0,0 @@ - -#ifndef PLS_MINI_BENCHMARK_H -#define PLS_MINI_BENCHMARK_H - -#include "pls/internal/scheduling/scheduler_memory.h" -#include "pls/internal/scheduling/scheduler.h" - -#include -#include - -namespace pls { -namespace internal { -namespace helpers { - -// TODO: Clean up (separate into small functions and .cpp file) -template -void run_mini_benchmark(const Function &lambda, - size_t max_threads, - unsigned long max_runtime_ms = 1000, - unsigned long warmup_time_ms = 100) { - using namespace std; - using namespace pls::internal::scheduling; - - malloc_scheduler_memory scheduler_memory{max_threads, 2u << 17u}; - for (unsigned int num_threads = 1; num_threads <= max_threads; num_threads++) { - scheduler local_scheduler{&scheduler_memory, num_threads}; - - chrono::high_resolution_clock::time_point start_time; - chrono::high_resolution_clock::time_point end_time; - long max_local_time = 0; - long total_time = 0; - long iterations = 0; - - local_scheduler.perform_work([&] { - start_time = chrono::high_resolution_clock::now(); - end_time = start_time; - chrono::high_resolution_clock::time_point planned_end_time = start_time + chrono::milliseconds(max_runtime_ms); - chrono::high_resolution_clock::time_point planned_warmup_time = start_time + chrono::milliseconds(warmup_time_ms); - - while (end_time < planned_end_time) { - if (end_time < planned_warmup_time) { - lambda(); - } else { - auto local_start_time = chrono::high_resolution_clock::now(); - lambda(); - auto local_end_time = chrono::high_resolution_clock::now(); - long local_time = chrono::duration_cast(local_end_time - local_start_time).count(); - - total_time += local_time; - max_local_time = std::max(local_time, max_local_time); - iterations++; - } - end_time = chrono::high_resolution_clock::now(); - } - }); - double time_per_iteration = (double) total_time / iterations; - - std::cout << (long) time_per_iteration << " (" << max_local_time << ")"; - if (num_threads < max_threads) { - std::cout << "\t\t"; - } - } - std::cout << std::endl; -} - -} -} -} - -#endif //PLS_MINI_BENCHMARK_H diff --git a/lib/pls/include/pls/internal/helpers/profiler.h b/lib/pls/include/pls/internal/helpers/profiler.h index 2902344..e0fe3ad 100644 --- a/lib/pls/include/pls/internal/helpers/profiler.h +++ b/lib/pls/include/pls/internal/helpers/profiler.h @@ -6,9 +6,10 @@ #include #include -#define PROFILE_WORK_BLOCK(msg) EASY_BLOCK(msg, profiler::colors::LightGreen) -#define PROFILE_FORK_JOIN_STEALING(msg) EASY_BLOCK(msg, profiler::colors::LightBlue) -#define PROFILE_STEALING(msg) EASY_BLOCK(msg, profiler::colors::Blue) +#define PROFILE_TASK(msg) EASY_BLOCK(msg, profiler::colors::LightBlue) +#define PROFILE_CONTINUATION(msg) EASY_BLOCK(msg, profiler::colors::LightBlue) +#define PROFILE_FAST_PATH(msg) EASY_BLOCK(msg, profiler::colors::Green) +#define PROFILE_STEALING(msg) EASY_BLOCK(msg, profiler::colors::Orange) #define PROFILE_LOCK(msg) EASY_BLOCK(msg, profiler::colors::Red) #define PROFILE_END_BLOCK EASY_END_BLOCK @@ -21,8 +22,9 @@ #else //ENABLE_EASY_PROFILER -#define PROFILE_WORK_BLOCK(msg) -#define PROFILE_FORK_JOIN_STEALING(msg) +#define PROFILE_TASK(msg) +#define PROFILE_CONTINUATION(msg) +#define PROFILE_FAST_PATH(msg) #define PROFILE_STEALING(msg) #define PROFILE_LOCK(msg) diff --git a/lib/pls/include/pls/internal/helpers/range.h b/lib/pls/include/pls/internal/helpers/range.h index d83cee8..0bb677d 100644 --- a/lib/pls/include/pls/internal/helpers/range.h +++ b/lib/pls/include/pls/internal/helpers/range.h @@ -1,608 +1,608 @@ -/* - Range - ===== - - Copyright (c) 2009-2011 Khaled Alshaya - - Distributed under the Boost Software License, version 1.0 - (See the license at: http://www.boost.org/license_1_0.txt). -*/ - -/* - Rationale - ========= - - In Python, there is a beautiful function called "range". - "range" allows the programmer to iterate over a range elegantly. - This concept is not as general as "for-loops" in C++, - but non the less, it expresses the intent of the programmer - clearer than the general "for-loops" in many cases. - - - Design - ====== - - Range is made to be STL-like library. In fact, it is - built on top of the concepts of STL. The library is designed to - work with STL algorithms as well. Range is more flexible - than the Python "range", because: - - Range is an "immutable ordered random access container" - - - Specifications - ============== - - Range satisfies the following requirements: - - * Immutable. - * Random Access Container. - * Random Access Iterator Interface. - * Constant Time Complexity Operations. - - - Range models an ordered sequence of elements, - where a range is defined by: - - [begin, end) - - * begin: the first element in the range. (Inclusive) - * end : the last element in the range. (Exclusive) - * step : the distance between two consecutive elements in a range. - - where each element in the range is defined by: - - element = begin + step * i - - * i: is the index of the element in range. - - The following precondition must be met for the sequence - to be a valid range: - - step != 0 - && - ( - begin <= end && step > 0 - || - begin >= end && step < 0 - ) - - - Portability - =========== - - Range Generator is written in standard C++ (C++98). It depends - -only- on the standard C++ library. -*/ - -// TODO: See if we should swap this out for our own implementation, for now this is fine, as it is self contained. -/** - * Notes on Modification: - * The code was adpated to fit into our namespacing/naming scheme for simpler use. - * This includes ifdef's, namespace and code formatting style. - */ - -#ifndef PLS_range_h__ -#define PLS_range_h__ - -#include -#include -#include -#include - -namespace pls { -namespace internal { -namespace helpers { - -template -struct basic_range { - struct const_iterator_impl { - typedef IntegerType value_type; - typedef std::size_t size_type; - typedef IntegerType difference_type; - typedef value_type *pointer; - typedef value_type &reference; - typedef - std::random_access_iterator_tag - iterator_category; - - const_iterator_impl() : r(0), index(0) {} - - const_iterator_impl(const const_iterator_impl &rhs) - : r(rhs.r), index(rhs.index) {} - - const_iterator_impl(basic_range const *p_range, size_type p_index) - : r(p_range), index(p_index) {} - - const_iterator_impl &operator=(const const_iterator_impl &rhs) { - r = rhs.r; - index = rhs.index; - return *this; - } - - bool operator==(const const_iterator_impl &rhs) const { - return *r == *(rhs.r) && index == rhs.index; - } - - bool operator!=(const const_iterator_impl &rhs) const { - return !(*this == rhs); - } - - bool operator<(const const_iterator_impl &rhs) const { - return index < rhs.index; - } - - bool operator>(const const_iterator_impl &rhs) const { - return index > rhs.index; - } - - bool operator<=(const const_iterator_impl &rhs) const { - return index <= rhs.index; - } - - bool operator>=(const const_iterator_impl &rhs) const { - return index >= rhs.index; - } - - value_type operator*() const { - return r->m_first_element + r->m_step * index; - } - - // operator-> - // is not implemented because the value_type is an integer type - // and primitive types in C++ don't define member functions. - - const_iterator_impl &operator++() { - ++index; - return *this; - } - - const_iterator_impl operator++(int) { - const_iterator_impl temp = *this; - ++index; - return temp; - } - - const_iterator_impl &operator--() { - --index; - return *this; - } - - const_iterator_impl operator--(int) { - const_iterator_impl temp = *this; - --index; - return temp; - } - - const_iterator_impl &operator+=(difference_type increment) { - index += increment; - return *this; - } - - // operator+ - // is friend operator but operator- - // is not, because we want to allow the following for "+": - // iterator+5 - // 5+iterator - // For the "-" it is not correct to do so, because - // iterator-5 != 5-iterator - friend const_iterator_impl operator+ - (const const_iterator_impl &lhs, difference_type increment) { - const_iterator_impl sum; - sum.r = lhs.r; - sum.index = lhs.index + increment; - return sum; - } - - const_iterator_impl &operator-=(difference_type decrement) { - index -= decrement; - return *this; - } - - const_iterator_impl operator-(difference_type decrement) const { - const_iterator_impl shifted_iterator; - shifted_iterator.r = r; - shifted_iterator.index = index - decrement; - return shifted_iterator; - } - - difference_type operator-(const const_iterator_impl &rhs) const { - return index - rhs.index; - } - - value_type operator[](difference_type offset) const { - size_type new_index = index + offset; - return r->m_first_element + r->m_step * new_index; - } - - private: - basic_range const *r; - size_type index; - }; - - struct const_reverse_iterator_impl { - typedef IntegerType value_type; - typedef std::size_t size_type; - typedef IntegerType difference_type; - typedef value_type *pointer; - typedef value_type &reference; - typedef - std::random_access_iterator_tag - iterator_category; - - const_reverse_iterator_impl() : r(0), index(0) {} - - const_reverse_iterator_impl(const const_reverse_iterator_impl &rhs) - : r(rhs.r), index(rhs.index) {} - - const_reverse_iterator_impl(basic_range const *p_range, size_type p_index) - : r(p_range), index(p_index) {} - - const_reverse_iterator_impl &operator=(const const_reverse_iterator_impl &rhs) { - r = rhs.r; - index = rhs.index; - return *this; - } - - bool operator==(const const_reverse_iterator_impl &rhs) const { - return *r == *(rhs.r) && index == rhs.index; - } - - bool operator!=(const const_reverse_iterator_impl &rhs) const { - return !(*this == rhs); - } - - bool operator<(const const_reverse_iterator_impl &rhs) const { - return index < rhs.index; - } - - bool operator>(const const_reverse_iterator_impl &rhs) const { - return index > rhs.index; - } - - bool operator<=(const const_reverse_iterator_impl &rhs) const { - return index <= rhs.index; - } - - bool operator>=(const const_reverse_iterator_impl &rhs) const { - return index >= rhs.index; - } - - value_type operator*() const { - size_type reverse_index - = (r->m_element_count - 1) - index; - return r->m_first_element + r->m_step * reverse_index; - } - - // operator-> - // is not implemented because the value_type is integer type - // and primitive types in C++ don't define member functions. - - const_reverse_iterator_impl &operator++() { - ++index; - return *this; - } - - const_reverse_iterator_impl operator++(int) { - const_reverse_iterator_impl temp = *this; - ++index; - return temp; - } - - const_reverse_iterator_impl &operator--() { - --index; - return *this; - } - - const_reverse_iterator_impl operator--(int) { - const_reverse_iterator_impl temp = *this; - --index; - return temp; - } - - const_reverse_iterator_impl &operator+=(difference_type increment) { - index += increment; - return *this; - } - - // operator+ - // is friend operator but operator- - // is not, because we want to allow the following for "+": - // iterator+5 - // 5+iterator - // For the "-" it is not correct to do so, because - // iterator-5 != 5-iterator - friend const_reverse_iterator_impl operator+ - (const const_reverse_iterator_impl &lhs, difference_type increment) { - const_reverse_iterator_impl sum; - sum.r = lhs.r; - sum.index = lhs.index + increment; - return sum; - } - - const_reverse_iterator_impl &operator-=(difference_type decrement) { - index -= decrement; - return *this; - } - - const_reverse_iterator_impl operator-(difference_type decrement) const { - const_reverse_iterator_impl shifted_iterator; - shifted_iterator.r = r; - shifted_iterator.index = index - decrement; - return shifted_iterator; - } - - difference_type operator-(const const_reverse_iterator_impl &rhs) const { - return index - rhs.index; - } - - value_type operator[](difference_type offset) const { - size_type new_reverse_index - = (r->m_element_count - 1) - (index + offset); - return r->m_first_element + r->m_step * new_reverse_index; - } - - private: - basic_range const *r; - size_type index; - }; - - typedef IntegerType value_type; - typedef const_iterator_impl iterator; - typedef const_iterator_impl const_iterator; - typedef const_reverse_iterator_impl reverse_iterator; - typedef const_reverse_iterator_impl const_reverse_iterator; - typedef value_type &reference; - typedef const value_type &const_reference; - typedef value_type *pointer; - typedef IntegerType difference_type; - typedef std::size_t size_type; - - // In the case of default construction, - // the range is considered as an empty range with no elements. - // step can be anything other than 0. 1 is - // an implementation convention, and it doesn't have - // a significance in this case because the range is empty. - basic_range() : m_first_element(0), m_element_count(0), m_step(1) {} - - // first_element: is begin in specifications. - // last_element: is end in specifications. - basic_range(value_type first_element, value_type last_element, value_type step) - : m_first_element(first_element), - m_step(step) { - // We need to count the number of elements. - // The only case where a range is invalid, - // when the step=0. It means that the range - // is infinite, because the number of elements - // in a range, is the length of that range - // divided by the difference between - // every two successive elements. - - if (step == 0) - throw std::out_of_range("Invalid Range: step can't be equal to zero!"); - if (first_element < last_element && step < 0) - throw std::out_of_range("Invalid Range: step can't be backward, while the range is forward!"); - if (first_element > last_element && step > 0) - throw std::out_of_range("Invalid Range: step can't be forward, while the range is backward!"); - - m_element_count = (last_element - first_element) / step; - if ((last_element - first_element) % step != 0) - ++m_element_count; - } - - // The following constructor, determines the step - // automatically. If the range is forward, then - // step will be one. If the range is backward, - // step will be minus one. If the begin is equal - // to end, then the step must not equal to zero - // and it is set to one as a convention. - basic_range(value_type first_element, value_type last_element) - : m_first_element(first_element) { - if (last_element >= first_element) *this = basic_range(first_element, last_element, 1); - else *this = basic_range(first_element, last_element, -1); - - } - - // The following constructor is a shortcut - // if you want the first element as zero. - // the step is determined automatically, based - // on the last element. If the last element is - // positive, then step is one, but if it is negative - // then step is minus one. - basic_range(value_type last_element) - : m_first_element(0) { - if (last_element >= m_first_element) *this = basic_range(m_first_element, last_element, 1); - else *this = basic_range(m_first_element, last_element, -1); - } - - basic_range(const basic_range &r) - : m_first_element(r.m_first_element), - m_element_count(r.m_element_count), - m_step(r.m_step) {} - - basic_range &operator=(const basic_range &r) { - m_first_element = r.m_first_element; - m_element_count = r.m_element_count; - m_step = r.m_step; - - return *this; - } - - bool operator==(const basic_range &r) const { - return m_first_element == r.m_first_element - && - m_element_count == r.m_element_count - && - m_step == r.m_step; - } - - bool operator!=(const basic_range &r) const { - return !(*this == r); - } - - // The following four functions enable the user to compare - // ranges using ( <, >, <=, >=). - // The comparison between two ranges is a simple lexicographical - // comparison(element by element). By convention, if two ranges - // R1, R2 where R1 has a smaller number of elements. Then if - // R1 contains more elements but all R1 elements are found in R2 - // R1 is considered less than R2. - bool operator<(const basic_range &r) const { - // ********** This function needs refactoring. - - if (m_element_count == 0 && r.m_element_count == 0) - return false; - if (m_element_count == 0 && r.m_element_count > 0) - return true; - if (m_element_count > 0 && r.m_element_count == 0) - return false; - - // At this point, both has at least one element. - if (m_first_element < r.m_first_element) - return true; - if (m_first_element > r.m_first_element) - return false; - - // At this point, the first element of both are equal. - if (m_element_count == 1 && r.m_element_count == 1) - return false; - if (m_element_count == 1 && r.m_element_count > 1) - return true; - if (m_element_count > 1 && r.m_element_count == 1) - return false; - - // At this point, both have at least two elements with - // a similar first element. Note than the final answer - // in this case depends on the second element only, because - // we don't need to compare the elements further. - // Note that the second element is at (index == 1), because - // the first element is at (index == 0). - if (m_first_element + m_step * 1 < r.m_first_element + r.m_step * 1) - return true; - if (m_first_element + m_step * 1 > r.m_first_element + r.m_step * 1) - return false; - - // if the first two elements of both ranges are equal, then - // they are co-linear ranges(because the step is constant). - // In that case, they comparison depends only on - // the size of the ranges by convention. - return m_element_count < r.m_element_count; - } - - bool operator>(const basic_range &r) const { - // ********** This function needs refactoring. - - if (m_element_count == 0 && r.m_element_count == 0) - return false; - if (m_element_count == 0 && r.m_element_count > 0) - return false; - if (m_element_count > 0 && r.m_element_count == 0) - return true; - - // At this point, both has at least one element. - if (m_first_element < r.m_first_element) - return false; - if (m_first_element > r.m_first_element) - return true; - - // At this point, the first element of both are equal. - if (m_element_count == 1 && r.m_element_count == 1) - return false; - if (m_element_count == 1 && r.m_element_count > 1) - return false; - if (m_element_count > 1 && r.m_element_count == 1) - return true; - - // At this point, both have at least two elements with - // a similar first element. Note than the final answer - // in this case depends on the second element only, because - // we don't need to compare the elements further. - // Note that the second element is at (index == 1), because - // the first element is at (index == 0). - if (m_first_element + m_step * 1 < r.m_first_element + r.m_step * 1) - return false; - if (m_first_element + m_step * 1 > r.m_first_element + r.m_step * 1) - return true; - - // if the first two elements of both ranges are equal, then - // they are co-linear ranges(because the step is constant). - // In that case, they comparison depends only on - // the size of the ranges by convention. - return m_element_count > r.m_element_count; - } - - bool operator<=(const basic_range &r) const { - return !(*this > r); - } - - bool operator>=(const basic_range &r) const { - return !(*this < r); - } - - const_iterator begin() const { - return const_iterator(this, 0); - } - - const_iterator end() const { - return const_iterator(this, m_element_count); - } - - const_reverse_iterator rbegin() const { - return const_reverse_iterator(this, 0); - } - - const_reverse_iterator rend() const { - return const_reverse_iterator(this, m_element_count); - } - - size_type size() const { - return m_element_count; - } - - size_type max_size() const { - // Because this is an immutable container, - // max_size() == size() - return m_element_count; - } - - bool empty() const { - return m_element_count == 0; - } - - // exist() and find() are similar except that - // find() returns the index of the element. - iterator find(value_type element) const { - value_type element_index = (element - m_first_element) / m_step; - bool in_range = element_index >= 0 && element_index < m_element_count && - (element - m_first_element) % m_step == 0; - if (in_range) - return begin() + element_index; - return end(); - } - - bool exist(value_type element) const { - return find(element) != end(); - } - - // In the standard, the operator[] - // should return a const reference. - // Because Range Generator doesn't store its elements - // internally, we return a copy of the value. - // In any case, this doesn't affect the semantics of the operator. - value_type operator[](size_type index) const { - return m_first_element + m_step * index; - } - - private: - // m_first_element: begin (see specifications). - // m_element_count: (end - begin) / step - value_type m_first_element, m_element_count, m_step; -}; - -// This is the default type of range! -typedef basic_range range; -} -} -} - -#endif // range_h__ +/* + Range + ===== + + Copyright (c) 2009-2011 Khaled Alshaya + + Distributed under the Boost Software License, version 1.0 + (See the license at: http://www.boost.org/license_1_0.txt). +*/ + +/* + Rationale + ========= + + In Python, there is a beautiful function called "range". + "range" allows the programmer to iterate over a range elegantly. + This concept is not as general as "for-loops" in C++, + but non the less, it expresses the intent of the programmer + clearer than the general "for-loops" in many cases. + + + Design + ====== + + Range is made to be STL-like library. In fact, it is + built on top of the concepts of STL. The library is designed to + work with STL algorithms as well. Range is more flexible + than the Python "range", because: + + Range is an "immutable ordered random access container" + + + Specifications + ============== + + Range satisfies the following requirements: + + * Immutable. + * Random Access Container. + * Random Access Iterator Interface. + * Constant Time Complexity Operations. + + + Range models an ordered sequence of elements, + where a range is defined by: + + [begin, end) + + * begin: the first element in the range. (Inclusive) + * end : the last element in the range. (Exclusive) + * step : the distance between two consecutive elements in a range. + + where each element in the range is defined by: + + element = begin + step * i + + * i: is the index of the element in range. + + The following precondition must be met for the sequence + to be a valid range: + + step != 0 + && + ( + begin <= end && step > 0 + || + begin >= end && step < 0 + ) + + + Portability + =========== + + Range Generator is written in standard C++ (C++98). It depends + -only- on the standard C++ library. +*/ + +// TODO: See if we should swap this out for our own implementation, for now this is fine, as it is self contained. +/** + * Notes on Modification: + * The code was adpated to fit into our namespacing/naming scheme for simpler use. + * This includes ifdef's, namespace and code formatting style. + */ + +#ifndef PLS_range_h__ +#define PLS_range_h__ + +#include +#include +#include +#include + +namespace pls { +namespace internal { +namespace helpers { + +template +struct basic_range { + struct const_iterator_impl { + typedef IntegerType value_type; + typedef std::size_t size_type; + typedef IntegerType difference_type; + typedef value_type *pointer; + typedef value_type &reference; + typedef + std::random_access_iterator_tag + iterator_category; + + const_iterator_impl() : r(0), index(0) {} + + const_iterator_impl(const const_iterator_impl &rhs) + : r(rhs.r), index(rhs.index) {} + + const_iterator_impl(basic_range const *p_range, size_type p_index) + : r(p_range), index(p_index) {} + + const_iterator_impl &operator=(const const_iterator_impl &rhs) { + r = rhs.r; + index = rhs.index; + return *this; + } + + bool operator==(const const_iterator_impl &rhs) const { + return *r == *(rhs.r) && index == rhs.index; + } + + bool operator!=(const const_iterator_impl &rhs) const { + return !(*this == rhs); + } + + bool operator<(const const_iterator_impl &rhs) const { + return index < rhs.index; + } + + bool operator>(const const_iterator_impl &rhs) const { + return index > rhs.index; + } + + bool operator<=(const const_iterator_impl &rhs) const { + return index <= rhs.index; + } + + bool operator>=(const const_iterator_impl &rhs) const { + return index >= rhs.index; + } + + value_type operator*() const { + return r->m_first_element + r->m_step * index; + } + + // operator-> + // is not implemented because the value_type is an integer type + // and primitive types in C++ don't define member functions. + + const_iterator_impl &operator++() { + ++index; + return *this; + } + + const_iterator_impl operator++(int) { + const_iterator_impl temp = *this; + ++index; + return temp; + } + + const_iterator_impl &operator--() { + --index; + return *this; + } + + const_iterator_impl operator--(int) { + const_iterator_impl temp = *this; + --index; + return temp; + } + + const_iterator_impl &operator+=(difference_type increment) { + index += increment; + return *this; + } + + // operator+ + // is friend operator but operator- + // is not, because we want to allow the following for "+": + // iterator+5 + // 5+iterator + // For the "-" it is not correct to do so, because + // iterator-5 != 5-iterator + friend const_iterator_impl operator+ + (const const_iterator_impl &lhs, difference_type increment) { + const_iterator_impl sum; + sum.r = lhs.r; + sum.index = lhs.index + increment; + return sum; + } + + const_iterator_impl &operator-=(difference_type decrement) { + index -= decrement; + return *this; + } + + const_iterator_impl operator-(difference_type decrement) const { + const_iterator_impl shifted_iterator; + shifted_iterator.r = r; + shifted_iterator.index = index - decrement; + return shifted_iterator; + } + + difference_type operator-(const const_iterator_impl &rhs) const { + return index - rhs.index; + } + + value_type operator[](difference_type offset) const { + size_type new_index = index + offset; + return r->m_first_element + r->m_step * new_index; + } + + private: + basic_range const *r; + size_type index; + }; + + struct const_reverse_iterator_impl { + typedef IntegerType value_type; + typedef std::size_t size_type; + typedef IntegerType difference_type; + typedef value_type *pointer; + typedef value_type &reference; + typedef + std::random_access_iterator_tag + iterator_category; + + const_reverse_iterator_impl() : r(0), index(0) {} + + const_reverse_iterator_impl(const const_reverse_iterator_impl &rhs) + : r(rhs.r), index(rhs.index) {} + + const_reverse_iterator_impl(basic_range const *p_range, size_type p_index) + : r(p_range), index(p_index) {} + + const_reverse_iterator_impl &operator=(const const_reverse_iterator_impl &rhs) { + r = rhs.r; + index = rhs.index; + return *this; + } + + bool operator==(const const_reverse_iterator_impl &rhs) const { + return *r == *(rhs.r) && index == rhs.index; + } + + bool operator!=(const const_reverse_iterator_impl &rhs) const { + return !(*this == rhs); + } + + bool operator<(const const_reverse_iterator_impl &rhs) const { + return index < rhs.index; + } + + bool operator>(const const_reverse_iterator_impl &rhs) const { + return index > rhs.index; + } + + bool operator<=(const const_reverse_iterator_impl &rhs) const { + return index <= rhs.index; + } + + bool operator>=(const const_reverse_iterator_impl &rhs) const { + return index >= rhs.index; + } + + value_type operator*() const { + size_type reverse_index + = (r->m_element_count - 1) - index; + return r->m_first_element + r->m_step * reverse_index; + } + + // operator-> + // is not implemented because the value_type is integer type + // and primitive types in C++ don't define member functions. + + const_reverse_iterator_impl &operator++() { + ++index; + return *this; + } + + const_reverse_iterator_impl operator++(int) { + const_reverse_iterator_impl temp = *this; + ++index; + return temp; + } + + const_reverse_iterator_impl &operator--() { + --index; + return *this; + } + + const_reverse_iterator_impl operator--(int) { + const_reverse_iterator_impl temp = *this; + --index; + return temp; + } + + const_reverse_iterator_impl &operator+=(difference_type increment) { + index += increment; + return *this; + } + + // operator+ + // is friend operator but operator- + // is not, because we want to allow the following for "+": + // iterator+5 + // 5+iterator + // For the "-" it is not correct to do so, because + // iterator-5 != 5-iterator + friend const_reverse_iterator_impl operator+ + (const const_reverse_iterator_impl &lhs, difference_type increment) { + const_reverse_iterator_impl sum; + sum.r = lhs.r; + sum.index = lhs.index + increment; + return sum; + } + + const_reverse_iterator_impl &operator-=(difference_type decrement) { + index -= decrement; + return *this; + } + + const_reverse_iterator_impl operator-(difference_type decrement) const { + const_reverse_iterator_impl shifted_iterator; + shifted_iterator.r = r; + shifted_iterator.index = index - decrement; + return shifted_iterator; + } + + difference_type operator-(const const_reverse_iterator_impl &rhs) const { + return index - rhs.index; + } + + value_type operator[](difference_type offset) const { + size_type new_reverse_index + = (r->m_element_count - 1) - (index + offset); + return r->m_first_element + r->m_step * new_reverse_index; + } + + private: + basic_range const *r; + size_type index; + }; + + typedef IntegerType value_type; + typedef const_iterator_impl iterator; + typedef const_iterator_impl const_iterator; + typedef const_reverse_iterator_impl reverse_iterator; + typedef const_reverse_iterator_impl const_reverse_iterator; + typedef value_type &reference; + typedef const value_type &const_reference; + typedef value_type *pointer; + typedef IntegerType difference_type; + typedef std::size_t size_type; + + // In the case of default construction, + // the range is considered as an empty range with no elements. + // step can be anything other than 0. 1 is + // an implementation convention, and it doesn't have + // a significance in this case because the range is empty. + basic_range() : m_first_element(0), m_element_count(0), m_step(1) {} + + // first_element: is begin in specifications. + // last_element: is end in specifications. + basic_range(value_type first_element, value_type last_element, value_type step) + : m_first_element(first_element), + m_step(step) { + // We need to count the number of elements. + // The only case where a range is invalid, + // when the step=0. It means that the range + // is infinite, because the number of elements + // in a range, is the length of that range + // divided by the difference between + // every two successive elements. + + if (step == 0) + throw std::out_of_range("Invalid Range: step can't be equal to zero!"); + if (first_element < last_element && step < 0) + throw std::out_of_range("Invalid Range: step can't be backward, while the range is forward!"); + if (first_element > last_element && step > 0) + throw std::out_of_range("Invalid Range: step can't be forward, while the range is backward!"); + + m_element_count = (last_element - first_element) / step; + if ((last_element - first_element) % step != 0) + ++m_element_count; + } + + // The following constructor, determines the step + // automatically. If the range is forward, then + // step will be one. If the range is backward, + // step will be minus one. If the begin is equal + // to end, then the step must not equal to zero + // and it is set to one as a convention. + basic_range(value_type first_element, value_type last_element) + : m_first_element(first_element) { + if (last_element >= first_element) *this = basic_range(first_element, last_element, 1); + else *this = basic_range(first_element, last_element, -1); + + } + + // The following constructor is a shortcut + // if you want the first element as zero. + // the step is determined automatically, based + // on the last element. If the last element is + // positive, then step is one, but if it is negative + // then step is minus one. + basic_range(value_type last_element) + : m_first_element(0) { + if (last_element >= m_first_element) *this = basic_range(m_first_element, last_element, 1); + else *this = basic_range(m_first_element, last_element, -1); + } + + basic_range(const basic_range &r) + : m_first_element(r.m_first_element), + m_element_count(r.m_element_count), + m_step(r.m_step) {} + + basic_range &operator=(const basic_range &r) { + m_first_element = r.m_first_element; + m_element_count = r.m_element_count; + m_step = r.m_step; + + return *this; + } + + bool operator==(const basic_range &r) const { + return m_first_element == r.m_first_element + && + m_element_count == r.m_element_count + && + m_step == r.m_step; + } + + bool operator!=(const basic_range &r) const { + return !(*this == r); + } + + // The following four functions enable the user to compare + // ranges using ( <, >, <=, >=). + // The comparison between two ranges is a simple lexicographical + // comparison(element by element). By convention, if two ranges + // R1, R2 where R1 has a smaller number of elements. Then if + // R1 contains more elements but all R1 elements are found in R2 + // R1 is considered less than R2. + bool operator<(const basic_range &r) const { + // ********** This function needs refactoring. + + if (m_element_count == 0 && r.m_element_count == 0) + return false; + if (m_element_count == 0 && r.m_element_count > 0) + return true; + if (m_element_count > 0 && r.m_element_count == 0) + return false; + + // At this point, both has at least one element. + if (m_first_element < r.m_first_element) + return true; + if (m_first_element > r.m_first_element) + return false; + + // At this point, the first element of both are equal. + if (m_element_count == 1 && r.m_element_count == 1) + return false; + if (m_element_count == 1 && r.m_element_count > 1) + return true; + if (m_element_count > 1 && r.m_element_count == 1) + return false; + + // At this point, both have at least two elements with + // a similar first element. Note than the final answer + // in this case depends on the second element only, because + // we don't need to compare the elements further. + // Note that the second element is at (index == 1), because + // the first element is at (index == 0). + if (m_first_element + m_step * 1 < r.m_first_element + r.m_step * 1) + return true; + if (m_first_element + m_step * 1 > r.m_first_element + r.m_step * 1) + return false; + + // if the first two elements of both ranges are equal, then + // they are co-linear ranges(because the step is constant). + // In that case, they comparison depends only on + // the size of the ranges by convention. + return m_element_count < r.m_element_count; + } + + bool operator>(const basic_range &r) const { + // ********** This function needs refactoring. + + if (m_element_count == 0 && r.m_element_count == 0) + return false; + if (m_element_count == 0 && r.m_element_count > 0) + return false; + if (m_element_count > 0 && r.m_element_count == 0) + return true; + + // At this point, both has at least one element. + if (m_first_element < r.m_first_element) + return false; + if (m_first_element > r.m_first_element) + return true; + + // At this point, the first element of both are equal. + if (m_element_count == 1 && r.m_element_count == 1) + return false; + if (m_element_count == 1 && r.m_element_count > 1) + return false; + if (m_element_count > 1 && r.m_element_count == 1) + return true; + + // At this point, both have at least two elements with + // a similar first element. Note than the final answer + // in this case depends on the second element only, because + // we don't need to compare the elements further. + // Note that the second element is at (index == 1), because + // the first element is at (index == 0). + if (m_first_element + m_step * 1 < r.m_first_element + r.m_step * 1) + return false; + if (m_first_element + m_step * 1 > r.m_first_element + r.m_step * 1) + return true; + + // if the first two elements of both ranges are equal, then + // they are co-linear ranges(because the step is constant). + // In that case, they comparison depends only on + // the size of the ranges by convention. + return m_element_count > r.m_element_count; + } + + bool operator<=(const basic_range &r) const { + return !(*this > r); + } + + bool operator>=(const basic_range &r) const { + return !(*this < r); + } + + const_iterator begin() const { + return const_iterator(this, 0); + } + + const_iterator end() const { + return const_iterator(this, m_element_count); + } + + const_reverse_iterator rbegin() const { + return const_reverse_iterator(this, 0); + } + + const_reverse_iterator rend() const { + return const_reverse_iterator(this, m_element_count); + } + + size_type size() const { + return m_element_count; + } + + size_type max_size() const { + // Because this is an immutable container, + // max_size() == size() + return m_element_count; + } + + bool empty() const { + return m_element_count == 0; + } + + // exist() and find() are similar except that + // find() returns the index of the element. + iterator find(value_type element) const { + value_type element_index = (element - m_first_element) / m_step; + bool in_range = element_index >= 0 && element_index < m_element_count && + (element - m_first_element) % m_step == 0; + if (in_range) + return begin() + element_index; + return end(); + } + + bool exist(value_type element) const { + return find(element) != end(); + } + + // In the standard, the operator[] + // should return a const reference. + // Because Range Generator doesn't store its elements + // internally, we return a copy of the value. + // In any case, this doesn't affect the semantics of the operator. + value_type operator[](size_type index) const { + return m_first_element + m_step * index; + } + + private: + // m_first_element: begin (see specifications). + // m_element_count: (end - begin) / step + value_type m_first_element, m_element_count, m_step; +}; + +// This is the default type of range! +typedef basic_range range; +} +} +} + +#endif // range_h__ diff --git a/lib/pls/include/pls/internal/helpers/tsan_fiber_api.h b/lib/pls/include/pls/internal/helpers/tsan_fiber_api.h new file mode 100644 index 0000000..04df6f0 --- /dev/null +++ b/lib/pls/include/pls/internal/helpers/tsan_fiber_api.h @@ -0,0 +1,21 @@ + +#ifndef PREDICTABLE_PARALLEL_PATTERNS_LIB_PLS_INCLUDE_PLS_INTERNAL_HELPERS_TSAN_FIBER_API_H_ +#define PREDICTABLE_PARALLEL_PATTERNS_LIB_PLS_INCLUDE_PLS_INTERNAL_HELPERS_TSAN_FIBER_API_H_ + +extern "C" { +// Fiber switching API. +// - TSAN context for fiber can be created by __tsan_create_fiber +// and freed by __tsan_destroy_fiber. +// - TSAN context of current fiber or thread can be obtained +// by calling __tsan_get_current_fiber. +// - __tsan_switch_to_fiber should be called immediatly before switch +// to fiber, such as call of swapcontext. +// - Fiber name can be set by __tsan_set_fiber_name. +void *__tsan_get_current_fiber(void); +void *__tsan_create_fiber(unsigned flags); +void __tsan_destroy_fiber(void *fiber); +void __tsan_switch_to_fiber(void *fiber, unsigned flags); +void __tsan_set_fiber_name(void *fiber, const char *name); +}; + +#endif //PREDICTABLE_PARALLEL_PATTERNS_LIB_PLS_INCLUDE_PLS_INTERNAL_HELPERS_TSAN_FIBER_API_H_ diff --git a/lib/pls/include/pls/internal/scheduling/data_structures/deque.h b/lib/pls/include/pls/internal/scheduling/data_structures/deque.h deleted file mode 100644 index 729bbf9..0000000 --- a/lib/pls/include/pls/internal/scheduling/data_structures/deque.h +++ /dev/null @@ -1,21 +0,0 @@ - -#ifndef PLS_DEQUE_H_ -#define PLS_DEQUE_H_ - -#include "work_stealing_deque.h" -#include "locking_deque.h" - -namespace pls { -namespace internal { -namespace scheduling { -namespace data_structures { - -template -using deque = work_stealing_deque; - -} -} -} -} - -#endif //PLS_DEQUE_H_ diff --git a/lib/pls/include/pls/internal/scheduling/data_structures/locking_deque.h b/lib/pls/include/pls/internal/scheduling/data_structures/locking_deque.h deleted file mode 100644 index e78acd0..0000000 --- a/lib/pls/include/pls/internal/scheduling/data_structures/locking_deque.h +++ /dev/null @@ -1,81 +0,0 @@ - -#ifndef PLS_LOCKING_DEQUE_H -#define PLS_LOCKING_DEQUE_H - -#include - -#include "pls/internal/base/spin_lock.h" -#include "aligned_stack.h" - -namespace pls { -namespace internal { -namespace scheduling { -namespace data_structures { - -using deque_offset = aligned_stack::stack_offset; - -/** - * Wraps any object into a deque Task. - */ -template -struct locking_deque_task { - Task *item_; - - locking_deque_task *prev_; - locking_deque_task *next_; - -}; - -template -struct locking_deque_container : public locking_deque_task { - Content content_; - - public: - template - explicit locking_deque_container(ARGS &&... args) : content_{std::forward(args)...} {} -}; - -/** - * A double linked list based deque. - * Storage is therefore only needed for the individual items. - * - * @tparam Task The type of Tasks stored in this deque - */ -template -class locking_deque { - aligned_stack *stack_; - - locking_deque_task *head_; - locking_deque_task *tail_; - - locking_deque_task *last_inserted_; - - base::spin_lock lock_; - - public: - explicit locking_deque(aligned_stack *stack) - : stack_{stack}, head_{nullptr}, tail_{nullptr}, lock_{} {} - - template - T *push_task(ARGS &&... args); - template - T *push_object(ARGS &&... args); - void *push_bytes(size_t size); - void publish_last_task(); - - Task *pop_local_task(bool &cas_fail_out); - Task *pop_local_task(); - Task *pop_external_task(bool &cas_fail_out); - Task *pop_external_task(); - - void reset_offset(deque_offset state); - deque_offset save_offset(); -}; - -} -} -} -} -#include "locking_deque_impl.h" - -#endif //PLS_LOCKING_DEQUE_H diff --git a/lib/pls/include/pls/internal/scheduling/data_structures/locking_deque_impl.h b/lib/pls/include/pls/internal/scheduling/data_structures/locking_deque_impl.h deleted file mode 100644 index 1723cd6..0000000 --- a/lib/pls/include/pls/internal/scheduling/data_structures/locking_deque_impl.h +++ /dev/null @@ -1,121 +0,0 @@ - -#ifndef PLS_LOCKING_DEQUE_IMPL_H_ -#define PLS_LOCKING_DEQUE_IMPL_H_ - -namespace pls { -namespace internal { -namespace scheduling { -namespace data_structures { - -template -template -T *locking_deque::push_task(ARGS &&...args) { - static_assert(std::is_same::value || std::is_base_of::value, - "Must only push types of onto work_stealing_deque"); - - // Allocate object - auto deque_item = stack_->push>(std::forward(args)...); - deque_item->item_ = &deque_item->content_; - - // Keep for later publishing - last_inserted_ = deque_item; - - // ...actual data reference - return &deque_item->content_; -} - -template -template -T *locking_deque::push_object(ARGS &&... args) { - // Simply add data to the stack, do not publish it in any way - return stack_->push(std::forward(args)...); -} - -template -void *locking_deque::push_bytes(size_t size) { - // Simply add data to the stack, do not publish it in any way - return stack_->push_bytes(size); -} - -template -void locking_deque::publish_last_task() { - std::lock_guard lock{lock_}; - - if (tail_ != nullptr) { - tail_->next_ = last_inserted_; - } else { - head_ = last_inserted_; - } - last_inserted_->prev_ = tail_; - last_inserted_->next_ = nullptr; - tail_ = last_inserted_; -} - -template -Task *locking_deque::pop_local_task() { - bool cas_fail_out; - return pop_local_task(cas_fail_out); -} - -template -Task *locking_deque::pop_local_task(bool &cas_fail_out) { - std::lock_guard lock{lock_}; - cas_fail_out = false; // Can not fail CAS in locking implementation - - if (tail_ == nullptr) { - return nullptr; - } - - auto result = tail_; - tail_ = tail_->prev_; - if (tail_ == nullptr) { - head_ = nullptr; - } else { - tail_->next_ = nullptr; - } - - return result->item_; -} - -template -Task *locking_deque::pop_external_task() { - bool cas_fail_out; - return pop_external_task(cas_fail_out); -} - -template -Task *locking_deque::pop_external_task(bool &cas_fail_out) { - std::lock_guard lock{lock_}; - cas_fail_out = false; // Can not fail CAS in locking implementation - - if (head_ == nullptr) { - return nullptr; - } - - auto result = head_; - head_ = head_->next_; - if (head_ == nullptr) { - tail_ = nullptr; - } else { - head_->prev_ = nullptr; - } - - return result->item_; -} - -template -void locking_deque::reset_offset(deque_offset state) { - stack_->reset_offset(state); -} - -template -deque_offset locking_deque::save_offset() { - return stack_->save_offset(); -} - -} -} -} -} - -#endif //PLS_LOCKING_DEQUE_IMPL_H_ diff --git a/lib/pls/include/pls/internal/scheduling/data_structures/work_stealing_deque.h b/lib/pls/include/pls/internal/scheduling/data_structures/work_stealing_deque.h deleted file mode 100644 index e874942..0000000 --- a/lib/pls/include/pls/internal/scheduling/data_structures/work_stealing_deque.h +++ /dev/null @@ -1,101 +0,0 @@ - -#ifndef PLS_WORK_STEALING_DEQUE_H_ -#define PLS_WORK_STEALING_DEQUE_H_ - -#include - -#include "pls/internal/base/error_handling.h" -#include "stamped_integer.h" - -#include "aligned_stack.h" - -namespace pls { -namespace internal { -namespace scheduling { -namespace data_structures { - -using base::system_details::pointer_t; - -// Integer split into two halfs, can be used in CAS operations -using data_structures::stamped_integer; -using deque_offset = stamped_integer::member_t; - -// Single Item in the deque -class work_stealing_deque_item { - // TODO: In our opinion these atomic's are a pure formality to make the thread sanitizer happy, - // as the race occurs in 'pop_head', where ALL CASES reading a corrupt/old value are cases - // where the next CAS fails anywas, thus making these corrupted values have no influence on - // the overall program execution. - // ==> If we find performance problems in this queue, try removing the atomics again. - // Pointer to the actual data - std::atomic data_; - // Index (relative to stack base) to the next and previous element - std::atomic next_item_; - deque_offset previous_item_; - - public: - work_stealing_deque_item() : data_{0}, next_item_{}, previous_item_{} {} - - template - Item *data() { - return reinterpret_cast(data_.load()); - } - - template - void set_data(Item *data) { - data_ = reinterpret_cast(data); - } - - deque_offset next_item() const { return next_item_.load(); } - void set_next_item(deque_offset next_item) { next_item_ = next_item; } - - deque_offset previous_item() const { return previous_item_; } - void set_previous_item(deque_offset previous_item) { previous_item_ = previous_item; } -}; - -template -class work_stealing_deque { - // Deque 'takes over' stack and handles memory management while in use. - // At any point in time the deque can stop using more memory and the stack can be used by other entities. - aligned_stack *stack_; - - std::atomic head_; - std::atomic tail_; - deque_offset previous_tail_; - - Task *last_pushed_task_; - - public: - explicit work_stealing_deque(aligned_stack *stack) : stack_{stack}, - head_{stamped_integer{0, 0}}, - tail_{0}, - previous_tail_{0}, - last_pushed_task_{0} {} - - template - T *push_task(ARGS &&... args); - template - T *push_object(ARGS &&... args); - void *push_bytes(size_t size); - void publish_last_task(); - - Task *pop_local_task(bool &cas_fail_out); - Task *pop_local_task(); - Task *pop_external_task(bool &cas_fail_out); - Task *pop_external_task(); - - void reset_offset(deque_offset offset); - deque_offset save_offset(); - - private: - work_stealing_deque_item *item_at(deque_offset offset); - deque_offset current_stack_offset(); -}; - -} -} -} -} -#include "work_stealing_deque_impl.h" - -#endif //PLS_WORK_STEALING_DEQUE_H_ diff --git a/lib/pls/include/pls/internal/scheduling/data_structures/work_stealing_deque_impl.h b/lib/pls/include/pls/internal/scheduling/data_structures/work_stealing_deque_impl.h deleted file mode 100644 index 33046fd..0000000 --- a/lib/pls/include/pls/internal/scheduling/data_structures/work_stealing_deque_impl.h +++ /dev/null @@ -1,183 +0,0 @@ - -#ifndef PLS_WORK_STEALING_DEQUE_IMPL_H_ -#define PLS_WORK_STEALING_DEQUE_IMPL_H_ - -#include -#include - -namespace pls { -namespace internal { -namespace scheduling { -namespace data_structures { - -template -work_stealing_deque_item *work_stealing_deque::item_at(deque_offset offset) { - return reinterpret_cast(stack_->memory_at_offset(offset)); -} - -template -deque_offset work_stealing_deque::current_stack_offset() { - return stack_->save_offset(); -} - -template -template -T *work_stealing_deque::push_task(ARGS &&... args) { - static_assert(std::is_same::value || std::is_base_of::value, - "Must only push types of onto work_stealing_deque"); - - // 'Union' type to push both the task and the deque entry as one part onto the stack - using pair_t = std::pair; - // Allocate space on stack - auto new_pair = reinterpret_cast(stack_->push_bytes()); - // Initialize memory on stack - new((void *) &(new_pair->first)) work_stealing_deque_item(); - new((void *) &(new_pair->second)) T(std::forward(args)...); - - // Keep reference for later publishing - last_pushed_task_ = &new_pair->second; - - // Item is not publicly visible until it is published - return &(new_pair->second); -} - -template -template -T *work_stealing_deque::push_object(ARGS &&... args) { - // Simply add data to the stack, do not publish it in any way - return stack_->push(std::forward(args)...); -} - -template -void *work_stealing_deque::push_bytes(size_t size) { - // Simply add data to the stack, do not publish it in any way - return stack_->push_bytes(size); -} - -template -void work_stealing_deque::publish_last_task() { - deque_offset local_tail = tail_; - - // Prepare current tail to point to correct next task - auto tail_deque_item = item_at(local_tail); - tail_deque_item->set_data(last_pushed_task_); - tail_deque_item->set_next_item(current_stack_offset()); - tail_deque_item->set_previous_item(previous_tail_); - previous_tail_ = local_tail; - - // Linearization point, task appears after this write - deque_offset new_tail = current_stack_offset(); - tail_ = new_tail; -} - -template -Task *work_stealing_deque::pop_local_task() { - bool cas_fail_out; - return pop_local_task(cas_fail_out); -} - -template -Task *work_stealing_deque::pop_local_task(bool &cas_fail_out) { - deque_offset local_tail = tail_; - stamped_integer local_head = head_; - - if (local_tail <= local_head.value) { - cas_fail_out = false; - return nullptr; // EMPTY - } - - work_stealing_deque_item *previous_tail_item = item_at(previous_tail_); - deque_offset new_tail = previous_tail_; - previous_tail_ = previous_tail_item->previous_item(); - - // Publish our wish to set the tail back - tail_ = new_tail; - // Get the state of local head AFTER we published our wish - local_head = head_; // Linearization point, outside knows list is empty - - if (local_head.value < new_tail) { - cas_fail_out = false; - return previous_tail_item->data(); // Success, enough distance to other threads - } - - if (local_head.value == new_tail) { - stamped_integer new_head = stamped_integer{local_head.stamp + 1, new_tail}; - // Try competing with consumers by updating the head's stamp value - if (head_.compare_exchange_strong(local_head, new_head)) { - cas_fail_out = false; - return previous_tail_item->data(); // SUCCESS, we won the competition with other threads - } - } - - // Some other thread either won the competition or it already set the head further than we are - // before we even tried to compete with it. - // Reset the queue into an empty state => head_ = tail_ - tail_ = local_head.value; // ...we give up to the other winning thread - - cas_fail_out = false; // We failed the CAS race, but the queue is also empty for sure! - return nullptr; // EMPTY, we lost the competition with other threads -} - -template -Task *work_stealing_deque::pop_external_task() { - bool cas_fail_out; - return pop_external_task(cas_fail_out); -} - -template -Task *work_stealing_deque::pop_external_task(bool &cas_fail_out) { - stamped_integer local_head = head_; - deque_offset local_tail = tail_; - - if (local_tail <= local_head.value) { - cas_fail_out = false; - return nullptr; // EMPTY - } - // Load info on current deque item. - // In case we have a race with a new (aba) overwritten item at this position, - // there has to be a competition over the tail -> the stamp increased and our next - // operation will fail anyways! - work_stealing_deque_item *head_deque_item = item_at(local_head.value); - deque_offset next_item_offset = head_deque_item->next_item(); - Task *head_data_item = head_deque_item->data(); - - // We try to set the head to this new position. - // Possible outcomes: - // 1) no one interrupted us, we win this competition - // 2) other thread took the head, we lose to this - // 3) owning thread removed tail, we lose to this - stamped_integer new_head = stamped_integer{local_head.stamp + 1, next_item_offset}; - if (head_.compare_exchange_strong(local_head, new_head)) { - cas_fail_out = false; - return head_data_item; // SUCCESS, we won the competition - } - - cas_fail_out = true; - return nullptr; // EMPTY, we lost the competition -} - -template -void work_stealing_deque::reset_offset(deque_offset offset) { - stack_->reset_offset(offset); - - stamped_integer local_head = head_; - deque_offset local_tail = tail_; - if (offset < local_tail) { - tail_ = offset; - if (local_head.value >= local_tail) { - head_ = stamped_integer{local_head.stamp + 1, offset}; - } - } -} - -template -deque_offset work_stealing_deque::save_offset() { - return current_stack_offset(); -} - -} -} -} -} - -#endif //PLS_WORK_STEALING_DEQUE_IMPL_H_ diff --git a/lib/pls/include/pls/internal/scheduling/external_trading_deque.h b/lib/pls/include/pls/internal/scheduling/external_trading_deque.h new file mode 100644 index 0000000..e873fe1 --- /dev/null +++ b/lib/pls/include/pls/internal/scheduling/external_trading_deque.h @@ -0,0 +1,106 @@ + +#ifndef PLS_INTERNAL_SCHEDULING_TASK_TRADING_DEQUE_H_ +#define PLS_INTERNAL_SCHEDULING_TASK_TRADING_DEQUE_H_ + +#include +#include +#include + +#include "pls/internal/base/error_handling.h" +#include "pls/internal/base/system_details.h" + +#include "pls/internal/data_structures/optional.h" +#include "pls/internal/data_structures/stamped_integer.h" + +#include "pls/internal/scheduling/traded_cas_field.h" +#include "pls/internal/scheduling/task.h" + +namespace pls::internal::scheduling { + +using namespace data_structures; + +struct trading_deque_entry { + std::atomic traded_task_{nullptr}; + std::atomic forwarding_stamp_{}; +}; + +/** + * A work stealing deque (single produces/consumer at the end, multiple consumers at the start). + * A task object can only be acquired by stealing consumers (from the start), + * when they also offer a task to trade in for it. + * + * The exchange of 'goods' (here tasks) happens atomically at a linearization point. + * This means that the owning thread always gets a tasks for each and every task that was + * successfully stolen. + * + * As each task is associated with memory this suffices to exchange memory blocks needed for execution. + */ +class external_trading_deque { + public: + external_trading_deque(unsigned thread_id, size_t num_entries) : thread_id_(thread_id), entries_(num_entries) {} + + static optional peek_traded_object(task *target_task); + static optional get_trade_object(task *target_task); + + /** + * Pushes a task on the bottom of the deque. + * The task itself wil be filled with the unique, synchronizing cas word. + * + * @param published_task The task to publish on the bottom of the deque. + */ + void push_bot(task *published_task); + + /** + * Tries to pop the last task on the deque. + * + * @return optional holding the popped task if successful. + */ + optional pop_bot(); + + struct peek_result { + peek_result(optional top_task, stamped_integer top_pointer) : top_task_{std::move(top_task)}, + top_pointer_{top_pointer} {}; + optional top_task_; + stamped_integer top_pointer_; + }; + + /** + * Peek at the current task on top of the deque. + * This is required, as we need to look at the task to figure out what we trade in for it. + * (Note: we could go without this by doing some tricks with top/bot pointers, but this + * is simpler and also more flexible if the traded objects are not as trivial as currently). + * + * @return a peek result containing the optional top task (if present) and the current head pointer. + */ + peek_result peek_top(); + + /** + * Tries to pop the task on top of the deque that was + * previously observed by 'peeking' at the deque. + * + * Returns the task if successful, returns nothing if + * either the peeked task is no longer at the top of the deque + * or another thread interfered and 'won' the task. + * + * @return optional holding the popped task if successful. + */ + optional pop_top(task *offered_task, peek_result peek_result); + + private: + void reset_bot_and_top(); + void decrease_bot(); + + // info on this deque + unsigned thread_id_; + std::vector entries_; + + // fields for stealing/interacting + stamped_integer bot_internal_{0, 0}; + + PLS_CACHE_ALIGN std::atomic top_{{0, 0}}; + PLS_CACHE_ALIGN std::atomic bot_{0}; +}; + +} + +#endif //PLS_INTERNAL_SCHEDULING_TASK_TRADING_DEQUE_H_ diff --git a/lib/pls/include/pls/internal/scheduling/heap_scheduler_memory.h b/lib/pls/include/pls/internal/scheduling/heap_scheduler_memory.h new file mode 100644 index 0000000..07c44c2 --- /dev/null +++ b/lib/pls/include/pls/internal/scheduling/heap_scheduler_memory.h @@ -0,0 +1,59 @@ +#ifndef PLS_HEAP_SCHEDULER_MEMORY_H +#define PLS_HEAP_SCHEDULER_MEMORY_H + +#include + +#include "pls/internal/base/thread.h" + +#include "pls/internal/scheduling/scheduler_memory.h" +#include "pls/internal/scheduling/thread_state.h" +#include "pls/internal/scheduling/thread_state_static.h" + +namespace pls { +namespace internal { +namespace scheduling { + +template +class heap_scheduler_memory : public scheduler_memory { + public: + explicit heap_scheduler_memory(size_t max_threads) : max_threads_{max_threads}, + thread_vector_{}, + thread_state_vector_{}, + thread_state_pointers_{} { + thread_vector_.reserve(max_threads); + thread_state_vector_.reserve(max_threads); + + for (size_t i = 0; i < max_threads; i++) { + thread_vector_.emplace_back(); + thread_state_vector_.emplace_back(); + thread_state_pointers_.emplace_back(&thread_state_vector_[i].get_thread_state()); + } + thread_states_array_ = thread_state_pointers_.data(); + } + + size_t max_threads() const override { + return max_threads_; + } + + base::thread &thread_for(size_t id) override { + return thread_vector_[id]; + } + private: + using thread_state_type = thread_state_static; + // thread_state_type is aligned at the cache line and therefore overaligned (C++ 11 does not require + // the new operator to obey alignments bigger than 16, cache lines are usually 64). + // To allow this object to be allocated using 'new' (which the vector does internally), + // we need to wrap it in an non aligned object. + using thread_state_wrapper = base::alignment::cache_alignment_wrapper; + + size_t max_threads_; + std::vector thread_vector_; + std::vector thread_state_vector_; + std::vector thread_state_pointers_; +}; + +} +} +} + +#endif // PLS_HEOP_SCHEDULER_MEMORY_H diff --git a/lib/pls/include/pls/internal/scheduling/lambda_task.h b/lib/pls/include/pls/internal/scheduling/lambda_task.h deleted file mode 100644 index 1d3d4b1..0000000 --- a/lib/pls/include/pls/internal/scheduling/lambda_task.h +++ /dev/null @@ -1,47 +0,0 @@ - -#ifndef PLS_LAMBDA_TASK_H_ -#define PLS_LAMBDA_TASK_H_ - -#include "pls/internal/scheduling/task.h" - -namespace pls { -namespace internal { -namespace scheduling { - -template -class lambda_task_by_reference : public task { - const Function &function_; - - public: - explicit lambda_task_by_reference(const Function &function) : task{}, function_{function} {}; - - protected: - void execute_internal() override { - function_(); - - wait_for_all(); - this->~lambda_task_by_reference(); - } -}; - -template -class lambda_task_by_value : public task { - const Function function_; - - public: - explicit lambda_task_by_value(const Function &function) : task{}, function_{function} {}; - - protected: - void execute_internal() override { - function_(); - - wait_for_all(); - this->~lambda_task_by_value(); - } -}; - -} -} -} - -#endif //PLS_LAMBDA_TASK_H_ diff --git a/lib/pls/include/pls/internal/scheduling/scheduler.h b/lib/pls/include/pls/internal/scheduling/scheduler.h index f93bab4..ec0e4fb 100644 --- a/lib/pls/include/pls/internal/scheduling/scheduler.h +++ b/lib/pls/include/pls/internal/scheduling/scheduler.h @@ -2,56 +2,58 @@ #ifndef PLS_SCHEDULER_H #define PLS_SCHEDULER_H -#include -#include +#include +#include +#include +#include #include "pls/internal/helpers/profiler.h" -#include "pls/internal/scheduling/data_structures/aligned_stack.h" - -#include "pls/internal/base/thread.h" #include "pls/internal/base/barrier.h" +#include "pls/internal/base/stack_allocator.h" -#include "pls/internal/scheduling/scheduler_memory.h" #include "pls/internal/scheduling/thread_state.h" -#include "pls/internal/scheduling/task.h" +#include "pls/internal/scheduling/task_manager.h" + +namespace pls::internal::scheduling { -namespace pls { -namespace internal { -namespace scheduling { +struct task; /** * The scheduler is the central part of the dispatching-framework. * It manages a pool of worker threads (creates, sleeps/wakes up, destroys) * and allows to execute parallel sections. * - * It works in close rellation with the 'task' class for scheduling. + * It works in close relation with the 'task' class for scheduling. */ class scheduler { - friend class task; - const unsigned int num_threads_; - const bool reuse_thread_; - scheduler_memory *memory_; - - base::barrier sync_barrier_; - - task *main_thread_root_task_; - std::atomic work_section_done_; - - bool terminated_; public: /** * Initializes a scheduler instance with the given number of threads. - * This will spawn the threads and put them to sleep, ready to process an - * upcoming parallel section. + * This will allocate ALL runtime resources, spawn the worker threads + * and put them to sleep, ready to process an upcoming parallel section. + * + * The initialization should be seen as a heavy and not very predictable operation. + * After it is done the scheduler must (if configured correctly) never run out of resources + * and deliver tight time bounds of randomized work-stealing. * - * @param memory All memory is allocated statically, thus the user is required to provide the memory instance. * @param num_threads The number of worker threads to be created. */ - explicit scheduler(scheduler_memory *memory, unsigned int num_threads, bool reuse_thread = true); + explicit scheduler(unsigned int num_threads, + size_t computation_depth, + size_t stack_size, + bool reuse_thread = true); + + template + explicit scheduler(unsigned int num_threads, + size_t computation_depth, + size_t stack_size, + bool reuse_thread, + ALLOC &&stack_allocator); /** * The scheduler is implicitly terminated as soon as it leaves the scope. + * Resources follow a clean RAII style. */ ~scheduler(); @@ -67,51 +69,57 @@ class scheduler { void perform_work(Function work_section); /** - * Explicitly terminate the worker threads. Scheduler must not be used after this. - */ - void terminate(); - - /** - * Helper to spawn a child on the currently running task. + * Main parallelism construct, spawns a function for potential parallel execution. + * + * The result of the spawned function must not be relied on until sync() is called. + * Best see the lambda as if executed on a thread, e.g. it can cause race conditions + * and it is only finished after you join it back into the parent thread using sync(). * - * @tparam T type of the new task - * @tparam ARGS Constructor argument types - * @param args constructor arguments + * @param lambda the lambda to be executed in parallel. */ - template - static void spawn_child(ARGS &&... args); + template + static void spawn(Function &&lambda); /** - * Helper to spawn a child on the currently running task and waiting for it (skipping over the task-deque). - * - * @tparam T type of the new task - * @tparam ARGS Constructor argument types - * @param args constructor arguments + * Waits for all potentially parallel child tasks created with spawn(...). */ - template - static void spawn_child_and_wait(ARGS &&... args); + static void sync(); + + thread_state &thread_state_for(unsigned int thread_id) { return *thread_states_[thread_id]; } + task_manager &task_manager_for(unsigned int thread_id) { return *task_managers_[thread_id]; } /** - * Helper to wait for all children of the currently executing task. + * Explicitly terminate the worker threads. Scheduler must not be used after this. */ - static void wait_for_all(); + void terminate(); - unsigned int num_threads() const { return num_threads_; } + [[nodiscard]] unsigned int num_threads() const { return num_threads_; } private: - static void worker_routine(); - thread_state *thread_state_for(size_t id); + static void work_thread_main_loop(); + void work_thread_work_section(); + + const unsigned int num_threads_; + const bool reuse_thread_; + base::barrier sync_barrier_; + + std::vector worker_threads_; + std::vector> task_managers_; + std::vector> thread_states_; + + class init_function; + template + class init_function_impl; + + init_function *main_thread_starter_function_; + std::atomic work_section_done_; - task *get_local_task(); - task *steal_task(); + bool terminated_; - bool try_execute_local(); - bool try_execute_stolen(); + std::shared_ptr stack_allocator_; }; } -} -} #include "scheduler_impl.h" #endif //PLS_SCHEDULER_H diff --git a/lib/pls/include/pls/internal/scheduling/scheduler_impl.h b/lib/pls/include/pls/internal/scheduling/scheduler_impl.h index 386a32b..3c3a73b 100644 --- a/lib/pls/include/pls/internal/scheduling/scheduler_impl.h +++ b/lib/pls/include/pls/internal/scheduling/scheduler_impl.h @@ -2,36 +2,94 @@ #ifndef PLS_SCHEDULER_IMPL_H #define PLS_SCHEDULER_IMPL_H -#include "pls/internal/scheduling/lambda_task.h" +#include -namespace pls { -namespace internal { -namespace scheduling { +#include "context_switcher/context_switcher.h" +#include "context_switcher/continuation.h" -// TODO: generally look into the performance implications of using many thread_state::get() calls +#include "pls/internal/scheduling/task_manager.h" +#include "pls/internal/scheduling/task.h" + +#include "pls/internal/helpers/profiler.h" + +namespace pls::internal::scheduling { + +template +scheduler::scheduler(unsigned int num_threads, + size_t computation_depth, + size_t stack_size, + bool reuse_thread, + ALLOC &&stack_allocator) : + num_threads_{num_threads}, + reuse_thread_{reuse_thread}, + sync_barrier_{num_threads + 1 - reuse_thread}, + worker_threads_{}, + thread_states_{}, + main_thread_starter_function_{nullptr}, + work_section_done_{false}, + terminated_{false}, + stack_allocator_{std::make_shared(std::forward(stack_allocator))} { + + worker_threads_.reserve(num_threads); + task_managers_.reserve(num_threads); + thread_states_.reserve(num_threads); + for (unsigned int i = 0; i < num_threads_; i++) { + auto &this_task_manager = + task_managers_.emplace_back(std::make_unique(i, + computation_depth, + stack_size, + stack_allocator_)); + auto &this_thread_state = thread_states_.emplace_back(std::make_unique(*this, i, *this_task_manager)); + + if (reuse_thread && i == 0) { + worker_threads_.emplace_back(); + continue; // Skip over first/main thread when re-using the users thread, as this one will replace the first one. + } + + auto *this_thread_state_pointer = this_thread_state.get(); + worker_threads_.emplace_back([this_thread_state_pointer] { + thread_state::set(this_thread_state_pointer); + work_thread_main_loop(); + }); + } +} + +class scheduler::init_function { + public: + virtual void run() = 0; +}; +template +class scheduler::init_function_impl : public init_function { + public: + explicit init_function_impl(F &function) : function_{function} {} + void run() override { + auto &root_task = thread_state::get().get_task_manager().get_active_task(); + root_task.run_as_task([&](context_switcher::continuation cont) { + thread_state::get().main_continuation() = std::move(cont); + function_(); + thread_state::get().get_scheduler().work_section_done_.store(true); + PLS_ASSERT(thread_state::get().main_continuation().valid(), "Must return valid continuation from main task."); + return std::move(thread_state::get().main_continuation()); + }); + + } + private: + F &function_; +}; template void scheduler::perform_work(Function work_section) { - PROFILE_WORK_BLOCK("scheduler::perform_work") - // Prepare main root task - lambda_task_by_reference root_task{work_section}; - main_thread_root_task_ = &root_task; - work_section_done_ = false; + init_function_impl starter_function{work_section}; + main_thread_starter_function_ = &starter_function; + work_section_done_ = false; if (reuse_thread_) { - // TODO: See if we should change thread-states to not make our state override the current thread state - auto my_state = memory_->thread_state_for(0); - base::this_thread::set_state(my_state); // Make THIS THREAD become the main worker + auto &my_state = thread_state_for(0); + thread_state::set(&my_state); // Make THIS THREAD become the main worker sync_barrier_.wait(); // Trigger threads to wake up - - // Do work (see if we can remove this duplicated code) - root_task.parent_ = nullptr; - root_task.deque_offset_ = my_state->deque_.save_offset(); - root_task.execute(); - work_section_done_ = true; - + work_thread_work_section(); // Simply also perform the work section on the main loop sync_barrier_.wait(); // Wait for threads to finish } else { // Simply trigger the others to do the work, this thread will sleep/wait for the time being @@ -40,18 +98,11 @@ void scheduler::perform_work(Function work_section) { } } -template -void scheduler::spawn_child(ARGS &&... args) { - thread_state::get()->current_task_->spawn_child(std::forward(args)...); -} - -template -void scheduler::spawn_child_and_wait(ARGS &&... args) { - thread_state::get()->current_task_->spawn_child_and_wait(std::forward(args)...); +template +void scheduler::spawn(Function &&lambda) { + thread_state::get().get_task_manager().spawn_child(std::forward(lambda)); } } -} -} #endif //PLS_SCHEDULER_IMPL_H diff --git a/lib/pls/include/pls/internal/scheduling/scheduler_memory.h b/lib/pls/include/pls/internal/scheduling/scheduler_memory.h deleted file mode 100644 index 6552ad1..0000000 --- a/lib/pls/include/pls/internal/scheduling/scheduler_memory.h +++ /dev/null @@ -1,114 +0,0 @@ -#ifndef PLS_SCHEDULER_MEMORY_H -#define PLS_SCHEDULER_MEMORY_H - -#include "pls/internal/scheduling/data_structures/aligned_stack.h" -#include "pls/internal/base/thread.h" - -#include "pls/internal/scheduling/thread_state.h" - -namespace pls { -namespace internal { -namespace scheduling { - -void worker_routine(); - -class scheduler_memory { - private: - size_t max_threads_; - thread_state **thread_states_; - base::thread **threads_; - data_structures::aligned_stack **task_stacks_; - - protected: - void init(size_t max_therads, - thread_state **thread_states, - base::thread **threads, - data_structures::aligned_stack **task_stacks) { - max_threads_ = max_therads; - thread_states_ = thread_states; - threads_ = threads; - task_stacks_ = task_stacks; - } - - public: - size_t max_threads() const { - return max_threads_; - } - thread_state *thread_state_for(size_t id) const { - return thread_states_[id]; - } - base::thread *thread_for(size_t id) const { - return threads_[id]; - } - data_structures::aligned_stack *task_stack_for(size_t id) const { - return task_stacks_[id]; - } -}; - -template -class static_scheduler_memory : public scheduler_memory { - // Everyone of these types has to live on its own cache line, - // as each thread uses one of them independently. - // Therefore it would be a major performance hit if we shared cache lines on these. - using aligned_thread = base::alignment::aligned_wrapper; - using aligned_thread_state = base::alignment::aligned_wrapper; - using aligned_thread_stack = base::alignment::aligned_wrapper>; - using aligned_aligned_stack = base::alignment::aligned_wrapper; - - // Actual Memory - std::array threads_; - std::array thread_states_; - std::array task_stacks_memory_; - std::array task_stacks_; - - // References for parent - std::array thread_refs_; - std::array thread_state_refs_; - std::array task_stack_refs_; - - public: - static_scheduler_memory() : scheduler_memory() { - for (size_t i = 0; i < MAX_THREADS; i++) { - new((void *) task_stacks_[i].pointer()) data_structures::aligned_stack(task_stacks_memory_[i].pointer()->data(), - TASK_STACK_SIZE); - - thread_refs_[i] = threads_[i].pointer(); - thread_state_refs_[i] = thread_states_[i].pointer(); - task_stack_refs_[i] = task_stacks_[i].pointer(); - } - - init(MAX_THREADS, thread_state_refs_.data(), thread_refs_.data(), task_stack_refs_.data()); - } -}; - -class malloc_scheduler_memory : public scheduler_memory { - // Everyone of these types has to live on its own cache line, - // as each thread uses one of them independently. - // Therefore it would be a major performance hit if we shared cache lines on these. - using aligned_thread = base::alignment::aligned_wrapper; - using aligned_thread_state = base::alignment::aligned_wrapper; - using aligned_aligned_stack = base::alignment::aligned_wrapper; - - const size_t num_threads_; - - // Actual Memory - aligned_thread *threads_; - aligned_thread_state *thread_states_; - char **task_stacks_memory_; - aligned_aligned_stack *task_stacks_; - - // References for parent - base::thread **thread_refs_; - thread_state **thread_state_refs_; - data_structures::aligned_stack **task_stack_refs_; - - public: - explicit malloc_scheduler_memory(size_t num_threads, size_t memory_per_stack = 2 << 16); - ~malloc_scheduler_memory(); -}; - -} -} -} - -#endif //PLS_SCHEDULER_MEMORY_H diff --git a/lib/pls/include/pls/internal/scheduling/task.h b/lib/pls/include/pls/internal/scheduling/task.h index e7b23fb..32888fd 100644 --- a/lib/pls/include/pls/internal/scheduling/task.h +++ b/lib/pls/include/pls/internal/scheduling/task.h @@ -1,125 +1,74 @@ - #ifndef PLS_TASK_H #define PLS_TASK_H -#include "pls/internal/helpers/profiler.h" - -#include "pls/internal/scheduling/data_structures/aligned_stack.h" -#include "pls/internal/scheduling/data_structures/deque.h" +#include +#include -#include "pls/internal/scheduling/thread_state.h" +#include "context_switcher/continuation.h" +#include "context_switcher/context_switcher.h" -namespace pls { -namespace internal { -namespace scheduling { +#include "pls/internal/base/system_details.h" +#include "pls/internal/data_structures/stamped_integer.h" +#include "pls/internal/scheduling/traded_cas_field.h" +namespace pls::internal::scheduling { /** - * A task to be executed by the runtime system. - * Tasks are guaranteed to be executed exactly once. + * A task is the smallest unit of execution seen by the runtime system. * - * Override the execute_internal() method for your custom code. + * Tasks represent a action dispatched by a potentially parallel call. + * Tasks have their own execution context (stack and register state), making them stackefull coroutines. + * Tasks can be suspended and resumed (stealing happens by resuming a task). * - * IMPORTANT: - * Tasks memory is re-used without calling the destructor. - * You must call it yourself at the end of execute_internal(). - * This is done to not introduce any overhead of virtual function calls - * if no clean up is required. + * Being coroutines tasks go through a very deliberate state machine: + * - initialized (no execution state) + * - running (currently executing user code) + * - suspended (suspended by switching to a different task). */ -class task { - friend class scheduler; - - // Memory-Management (allow to allocate memory blocks in constructor) - bool finished_construction_; - - // Coordinate finishing of sub_tasks - std::atomic ref_count_; - task *parent_; - - // Stack Management (reset stack pointer after wait_for_all() calls) - data_structures::deque_offset deque_offset_; - - protected: - /* - * Must call the parent constructor. - * - * IMPORTANT: - * Tasks memory is re-used without calling the destructor. - * You must call it yourself at the end of execute_internal(). - */ - explicit task(); - - /** - * Allow to allocate extra memory during run-time for this task. - * Memory will be pushed onto the stack (in aligned memory, thus avoid many small chunks). - * MUST be called in constructor, never afterwards. - * - * Memory is fully self managed. Calling e.g. deconstructors when not needing objects - * anymore is the users responsibility (memory is simply re-used after the life time of the task ends). - * - * @param size Number of bytes to be allocated - * @return The allocated memory region - */ - void *allocate_memory(long size); - - /** - * Overwrite this with the actual behaviour of concrete tasks. - */ - virtual void execute_internal() = 0; - - template - void spawn_child(ARGS &&... args); - template - void spawn_child_and_wait(ARGS &&... args); - void wait_for_all(); - - private: - void execute(); +struct PLS_CACHE_ALIGN task { + task(char *stack_memory, size_t stack_size, unsigned depth, unsigned thread_id) : + stack_memory_{stack_memory}, + stack_size_{stack_size}, + is_synchronized_{false}, + depth_{depth}, + thread_id_{thread_id}, + prev_{nullptr}, + next_{nullptr} {} + + // Do not allow accidental copy/move operations. + // The whole runtime relies on tasks never changing memory positions during execution. + // Create tasks ONCE and use them until the runtime is shut down. + task(const task &other) = delete; + task(task &&other) = delete; + task &operator=(const task &other) = delete; + task &operator=(task &&other) = delete; + + template + context_switcher::continuation run_as_task(F &&lambda) { + return context_switcher::enter_context(stack_memory_, stack_size_, std::forward(lambda)); + } + + // TODO: Proper access control and split it up into responsibilities + // Stack/Continuation Management + char *stack_memory_; + size_t stack_size_; + context_switcher::continuation continuation_; + bool is_synchronized_; + + // TODO: Clean up responsibilities + // Work-Stealing + std::atomic external_trading_deque_cas_{}; + std::atomic resource_stack_next_{}; + std::atomic resource_stack_root_{{0, 0}}; + + // Task Tree (we have a parent that we want to continue when we finish) + unsigned depth_; + unsigned thread_id_; + + // Memory Linked List + task *prev_; + task *next_; }; -template -void task::spawn_child(ARGS &&... args) { - PROFILE_FORK_JOIN_STEALING("spawn_child") - static_assert(std::is_base_of::type>::value, "Only pass task subclasses!"); - - // Keep our refcount up to date - ref_count_++; - - // Push on our deque - auto item = thread_state::get()->deque_.push_task(std::forward(args)...); - - // Assign forced values (for stack and parent management) - item->parent_ = this; - item->finished_construction_ = true; - item->deque_offset_ = thread_state::get()->deque_.save_offset(); - - // Make new task visible to others - thread_state::get()->deque_.publish_last_task(); -} - -template -void task::spawn_child_and_wait(ARGS &&... args) { - static_assert(std::is_base_of::type>::value, "Only pass task subclasses!"); - - spawn_child(std::forward(args)...); - // TODO: Check why 'direct spawn' (even when pushing it onto the tas queue) seems to be slower - // (Also check if it even is slower or if it only appears so on our laptop) -// // Push on our deque -// auto task = thread_state::get()->deque_.push_task(std::forward(args)...); -// -// // Assign forced values (for stack and parent management) -// task->parent_ = nullptr; // ...do not assign this to a parent => it will not notify our reference counter -// task->finished_construction_ = true; -// task->deque_offset_ = thread_state::get()->deque_.save_offset(); -// -// // Execute it -// task->execute(); - - // Wait for the rest of the tasks - wait_for_all(); -} - -} -} } #endif //PLS_TASK_H diff --git a/lib/pls/include/pls/internal/scheduling/task_manager.h b/lib/pls/include/pls/internal/scheduling/task_manager.h new file mode 100644 index 0000000..7ec2637 --- /dev/null +++ b/lib/pls/include/pls/internal/scheduling/task_manager.h @@ -0,0 +1,86 @@ + +#ifndef PLS_TASK_MANAGER_H_ +#define PLS_TASK_MANAGER_H_ + +#include +#include +#include + +#include "pls/internal/scheduling/task.h" +#include "pls/internal/scheduling/external_trading_deque.h" + +#include "pls/internal/base/stack_allocator.h" + +namespace pls::internal::scheduling { + +/** + * Handles management of tasks in the system. Each thread has a local task manager, + * responsible for allocating, freeing and publishing tasks for stealing. + * + * All interaction for spawning, stealing and task trading are managed through this class. + */ +class task_manager { + using stack_allocator = pls::internal::base::stack_allocator; + + public: + explicit task_manager(unsigned thread_id, + size_t num_tasks, + size_t stack_size, + std::shared_ptr &stack_allocator); + ~task_manager(); + + void push_resource_on_task(task *target_task, task *spare_task_chain); + task *pop_resource_from_task(task *target_task); + + task &get_this_thread_task(size_t depth) { + return *tasks_[depth]; + } + + task &get_active_task() { + return *active_task_; + } + void set_active_task(task *active_task) { + active_task_ = active_task; + } + + template + void spawn_child(F &&lambda); + void sync(); + + task *steal_task(task_manager &stealing_task_manager); + + bool try_clean_return(context_switcher::continuation &result_cont); + + /** + * Helper to check if a task chain is correctly chained forward form the given starting task. + * + * @param start_task The start of the 'to be clean' chain + * @return true if the chain is clean/consistent. + */ + bool check_task_chain_forward(task *start_task); + /** + * Helper to check if a task chain is correctly chained backward form the given starting task. + * + * @param start_task The end of the 'to be clean' chain + * @return true if the chain was is clean/consistent. + */ + bool check_task_chain_backward(task *start_task); + /** + * Check the task chain maintained by this task manager. + * + * @return true if the chain is in a clean/consistent state. + */ + bool check_task_chain(); + + private: + std::shared_ptr stack_allocator_; + std::vector> tasks_; + task *active_task_; + + external_trading_deque deque_; +}; + +} +#include "task_manager_impl.h" + +#endif //PLS_TASK_MANAGER_H_ diff --git a/lib/pls/include/pls/internal/scheduling/task_manager_impl.h b/lib/pls/include/pls/internal/scheduling/task_manager_impl.h new file mode 100644 index 0000000..07d1d78 --- /dev/null +++ b/lib/pls/include/pls/internal/scheduling/task_manager_impl.h @@ -0,0 +1,78 @@ + +#ifndef PLS_TASK_MANAGER_IMPL_H_ +#define PLS_TASK_MANAGER_IMPL_H_ + +#include +#include +#include + +#include "context_switcher/continuation.h" + +#include "pls/internal/scheduling/task.h" +#include "pls/internal/scheduling/thread_state.h" + +namespace pls::internal::scheduling { + +template +void task_manager::spawn_child(F &&lambda) { + auto *spawning_task_manager = this; + auto *last_task = spawning_task_manager->active_task_; + auto *spawned_task = spawning_task_manager->active_task_->next_; + + auto continuation = + spawned_task->run_as_task([=](context_switcher::continuation cont) { + // allow stealing threads to continue the last task. + last_task->continuation_ = std::move(cont); + + // we are now executing the new task, allow others to steal the last task continuation. + spawned_task->is_synchronized_ = true; + spawning_task_manager->active_task_ = spawned_task; + spawning_task_manager->deque_.push_bot(last_task); + + // execute the lambda itself, which could lead to a different thread returning. + lambda(); + auto *syncing_task_manager = &thread_state::get().get_task_manager(); + PLS_ASSERT(syncing_task_manager->active_task_ == spawned_task, + "Task manager must always point its active task onto whats executing."); + + // try to pop a task of the syncing task manager. + // possible outcomes: + // - this is a different task manager, it must have an empty deque and fail + // - this is the same task manager and someone stole last tasks, thus this will fail + // - this is the same task manager and no one stole the last task, this this will succeed + auto pop_result = syncing_task_manager->deque_.pop_bot(); + if (pop_result) { + // Fast path, simply continue execution where we left of before spawn. + PLS_ASSERT(*pop_result == last_task, + "Fast path, nothing can have changed until here."); + PLS_ASSERT(spawning_task_manager == syncing_task_manager, + "Fast path, nothing can have changed here."); + PLS_ASSERT(last_task->continuation_.valid(), + "Fast path, no one can have continued working on the last task."); + + syncing_task_manager->active_task_ = last_task; + return std::move(last_task->continuation_); + } else { + // Slow path, the last task was stolen. Sync using the resource stack. + context_switcher::continuation result_cont; + if (syncing_task_manager->try_clean_return(result_cont)) { + // We return back to the main scheduling loop + PLS_ASSERT(result_cont.valid(), "Must only return valid continuations..."); + return result_cont; + } else { + // We finish up the last task and are the sole owner again + PLS_ASSERT(result_cont.valid(), "Must only return valid continuations..."); + return result_cont; + } + } + }); + + if (continuation.valid()) { + // We jumped in here from the main loop, keep track! + thread_state::get().main_continuation() = std::move(continuation); + } +} + +} + +#endif //PLS_TASK_MANAGER_IMPL_H_ diff --git a/lib/pls/include/pls/internal/scheduling/thread_state.h b/lib/pls/include/pls/internal/scheduling/thread_state.h index 48a7b29..7f717fd 100644 --- a/lib/pls/include/pls/internal/scheduling/thread_state.h +++ b/lib/pls/include/pls/internal/scheduling/thread_state.h @@ -3,48 +3,74 @@ #define PLS_THREAD_STATE_H #include +#include +#include -#include "pls/internal/base/thread.h" +#include "context_switcher/continuation.h" -#include "pls/internal/scheduling/data_structures/aligned_stack.h" -#include "pls/internal/scheduling/data_structures/deque.h" +#include "pls/internal/base/system_details.h" -namespace pls { -namespace internal { -namespace scheduling { +namespace pls::internal::scheduling { -// forward declaration class scheduler; -class task; +class task_manager; -struct thread_state { - alignas(base::system_details::CACHE_LINE_SIZE) scheduler *scheduler_; - alignas(base::system_details::CACHE_LINE_SIZE) task *current_task_; - alignas(base::system_details::CACHE_LINE_SIZE) data_structures::aligned_stack *task_stack_; - alignas(base::system_details::CACHE_LINE_SIZE) data_structures::deque deque_; - alignas(base::system_details::CACHE_LINE_SIZE) size_t id_; - alignas(base::system_details::CACHE_LINE_SIZE) std::minstd_rand random_; +/** + * Proxy-Object for thread local state needed during scheduling. + * The main use is to perform thread_state::get() as a thread local + * memory to identify the current worker thread state. + * + * Holds only minimal data by itself and points to the appropriate scheduler + * and task manager objects associated with this thread. + */ +struct PLS_CACHE_ALIGN thread_state { + private: + const unsigned thread_id_; + scheduler &scheduler_; + task_manager &task_manager_; - thread_state(scheduler *scheduler, data_structures::aligned_stack *task_stack, unsigned int id) : + PLS_CACHE_ALIGN context_switcher::continuation main_loop_continuation_; + PLS_CACHE_ALIGN std::minstd_rand random_; + + public: + explicit thread_state(scheduler &scheduler, + unsigned thread_id, + task_manager &task_manager) : + thread_id_{thread_id}, scheduler_{scheduler}, - current_task_{nullptr}, - task_stack_{task_stack}, - deque_{task_stack_}, - id_{id}, - random_{id_} {} + task_manager_{task_manager}, + random_{static_cast(std::chrono::steady_clock::now().time_since_epoch().count())} {}; + + // Do not allow accidental copy/move operations. + thread_state(const thread_state &) = delete; + thread_state(thread_state &&) = delete; + thread_state &operator=(const thread_state &) = delete; + thread_state &operator=(thread_state &&) = delete; /** * Convenience helper to get the thread_state instance associated with this thread. * Must only be called on threads that are associated with a thread_state, * this will most likely be threads created by the scheduler. * + * Each call is guaranteed to be a new lookup, i.e. it is not cached after fiber context switches. + * * @return The thread_state of this thread. */ - static thread_state *get() { return base::this_thread::state(); } + [[nodiscard]] static thread_state &PLS_NOINLINE get(); + static void set(thread_state *); + + [[nodiscard]] unsigned get_thread_id() const { return thread_id_; } + [[nodiscard]] task_manager &get_task_manager() { return task_manager_; } + [[nodiscard]] scheduler &get_scheduler() { return scheduler_; } + [[nodiscard]] long get_rand() { + return random_(); + } + + [[nodiscard]] context_switcher::continuation &main_continuation() { + return main_loop_continuation_; + } }; } -} -} #endif //PLS_THREAD_STATE_H diff --git a/lib/pls/include/pls/internal/scheduling/traded_cas_field.h b/lib/pls/include/pls/internal/scheduling/traded_cas_field.h new file mode 100644 index 0000000..3d31528 --- /dev/null +++ b/lib/pls/include/pls/internal/scheduling/traded_cas_field.h @@ -0,0 +1,84 @@ + +#ifndef PLS_INTERNAL_SCHEDULING_TRADED_CAS_FIELD_H_ +#define PLS_INTERNAL_SCHEDULING_TRADED_CAS_FIELD_H_ + +#include + +#include "pls/internal/base/error_handling.h" +#include "pls/internal/base/system_details.h" + +namespace pls::internal::scheduling { + +struct task; +struct traded_cas_field { + static_assert(base::system_details::CACHE_LINE_SIZE >= 4, + "Traded objects must not use their last address bits, as we use them for status flags." + "As traded objects are usually cache aligned, we need big enough cache lines."); + + // Base size of our CAS integer/pointer + static constexpr base::system_details::cas_integer CAS_SIZE = base::system_details::CAS_SIZE; + + // States of the integer (tag indicating current content) + static constexpr base::system_details::cas_integer EMPTY_TAG = 0x0lu; + static constexpr base::system_details::cas_integer STAMP_TAG = 0x1lu; + static constexpr base::system_details::cas_integer TRADE_TAG = 0x2lu; + + // Bitmasks and shifts for cas_integer_, two variants: + // cas_integer_ = traded object | tag + // cas_integer_ = stamp | id | tag + static constexpr base::system_details::cas_integer TAG_SIZE = 2ul; + static constexpr base::system_details::cas_integer TAG_BITS = ~((~0x0ul) << TAG_SIZE); + + static constexpr base::system_details::cas_integer TRADED_OBJECT_SIZE = CAS_SIZE - TAG_SIZE; + static constexpr base::system_details::cas_integer TRADED_OBJECT_SHIFT = TAG_SIZE; + static constexpr base::system_details::cas_integer + TRADE_OBJECT_BITS = ~((~0x0ul) << TRADED_OBJECT_SIZE) << TRADED_OBJECT_SHIFT; + + static constexpr base::system_details::cas_integer ID_SIZE = 10ul; // Up to 1024 cores + static constexpr base::system_details::cas_integer ID_SHIFT = TAG_SIZE; + static constexpr base::system_details::cas_integer ID_BITS = ~((~0x0ul) << ID_SIZE) << ID_SHIFT; + + static constexpr base::system_details::cas_integer STAMP_SIZE = CAS_SIZE - TAG_SIZE - ID_SIZE; + static constexpr base::system_details::cas_integer STAMP_SHIFT = TAG_SIZE + ID_SIZE; + static constexpr base::system_details::cas_integer STAMP_BITS = ~((~0x0ul) << STAMP_SIZE) << STAMP_SHIFT; + + public: + void fill_with_stamp(base::system_details::cas_integer stamp, base::system_details::cas_integer deque_id) { + cas_integer_ = (((stamp << STAMP_SHIFT) & STAMP_BITS) | ((deque_id << ID_SHIFT) & ID_BITS) | STAMP_TAG); + } + base::system_details::cas_integer get_stamp() { + PLS_ASSERT(is_filled_with_stamp(), "Must only read out the tag when the traded field contains one."); + return (((base::system_details::cas_integer) cas_integer_) & STAMP_BITS) >> STAMP_SHIFT; + } + base::system_details::cas_integer get_deque_id() { + PLS_ASSERT(is_filled_with_stamp(), "Must only read out the tag when the traded field contains one."); + return (((base::system_details::cas_integer) cas_integer_) & ID_BITS) >> ID_SHIFT; + } + bool is_filled_with_stamp() { + return (((base::system_details::cas_integer) cas_integer_) & TAG_BITS) == STAMP_TAG; + } + + void fill_with_trade_object(task *new_task) { + PLS_ASSERT((((base::system_details::cas_integer) new_task) & TAG_BITS) == 0, + "Must only store aligned objects in this data structure (last bits are needed for tag bit)"); + cas_integer_ = (((base::system_details::cas_integer) new_task) | TRADE_TAG); + } + task *get_trade_object() { + PLS_ASSERT(is_filled_with_object(), "Must only read out the object when the traded field contains one."); + return reinterpret_cast(((base::system_details::cas_integer) cas_integer_) & TRADE_OBJECT_BITS); + } + bool is_filled_with_object() { + return (((base::system_details::cas_integer) cas_integer_) & TAG_BITS) == TRADE_TAG; + } + + bool is_empty() { + return (((base::system_details::cas_integer) cas_integer_) & TAG_BITS) == EMPTY_TAG; + } + + private: + base::system_details::cas_integer cas_integer_{}; +}; + +} + +#endif //PLS_INTERNAL_SCHEDULING_TRADED_CAS_FIELD_H_ diff --git a/lib/pls/include/pls/pls.h b/lib/pls/include/pls/pls.h index cbaef30..daff79d 100644 --- a/lib/pls/include/pls/pls.h +++ b/lib/pls/include/pls/pls.h @@ -1,34 +1,39 @@ #ifndef PLS_LIBRARY_H #define PLS_LIBRARY_H +#include + #include "pls/algorithms/invoke.h" #include "pls/algorithms/for_each.h" -#include "pls/algorithms/scan.h" -#include "pls/internal/scheduling/task.h" +#include "pls/algorithms/reduce.h" + #include "pls/internal/scheduling/scheduler.h" -#include "pls/internal/helpers/unique_id.h" + +#include "pls/internal/helpers/range.h" #include "pls/internal/helpers/member_function.h" namespace pls { -using internal::scheduling::static_scheduler_memory; -using internal::scheduling::malloc_scheduler_memory; - +// 'basic' for-join APIs using internal::scheduling::scheduler; +template +static void spawn(Function &&function) { + scheduler::spawn(std::forward(function)); +} +static void sync() { + scheduler::sync(); +} -using unique_id = internal::helpers::unique_id; +// general helpers that can be handy when using PLS template using member_function = internal::helpers::member_function; +using internal::helpers::range; -using internal::scheduling::task; -using internal::scheduling::lambda_task_by_reference; -using internal::scheduling::lambda_task_by_value; -using internal::scheduling::task; - +// parallel patterns API using algorithm::invoke; using algorithm::for_each; using algorithm::for_each_range; -using algorithm::scan; +using algorithm::reduce; } #endif diff --git a/lib/pls/src/internal/base/alignment.cpp b/lib/pls/src/internal/base/alignment.cpp index 79b7a44..ea8a22d 100644 --- a/lib/pls/src/internal/base/alignment.cpp +++ b/lib/pls/src/internal/base/alignment.cpp @@ -1,35 +1,12 @@ #include "pls/internal/base/alignment.h" -#include "pls/internal/base/system_details.h" namespace pls { namespace internal { namespace base { namespace alignment { -void *allocate_aligned(size_t size) { - return aligned_alloc(system_details::CACHE_LINE_SIZE, size); -} - -system_details::pointer_t next_alignment(system_details::pointer_t size) { - system_details::pointer_t miss_alignment = size % base::system_details::CACHE_LINE_SIZE; - if (miss_alignment == 0) { - return size; - } else { - return size + (base::system_details::CACHE_LINE_SIZE - miss_alignment); - } -} - -system_details::pointer_t previous_alignment(system_details::pointer_t size) { - system_details::pointer_t miss_alignment = size % base::system_details::CACHE_LINE_SIZE; - if (miss_alignment == 0) { - return size; - } else { - return size - miss_alignment; - } -} - -char *next_alignment(char *pointer) { - return reinterpret_cast(next_alignment(reinterpret_cast(pointer))); +char *next_alignment(char *pointer, size_t alignment) { + return reinterpret_cast(next_alignment(reinterpret_cast(pointer), alignment)); } } diff --git a/lib/pls/src/internal/base/error_handling.cpp b/lib/pls/src/internal/base/error_handling.cpp new file mode 100644 index 0000000..4243a13 --- /dev/null +++ b/lib/pls/src/internal/base/error_handling.cpp @@ -0,0 +1,5 @@ +#include "pls/internal/base/error_handling.h" + +void pls_error(const char *msg) { + PLS_ERROR(msg); +} diff --git a/lib/pls/src/internal/base/stack_allocator.cpp b/lib/pls/src/internal/base/stack_allocator.cpp new file mode 100644 index 0000000..5356d75 --- /dev/null +++ b/lib/pls/src/internal/base/stack_allocator.cpp @@ -0,0 +1,32 @@ +#include "pls/internal/base/stack_allocator.h" + +#include "pls/internal/base/alignment.h" +#include "pls/internal/base/system_details.h" + +namespace pls::internal::base { +char *mmap_stack_allocator::allocate_stack(size_t size) { + const size_t page_size = system_details::get_page_size(); + + const size_t stack_size = alignment::next_alignment(size, page_size); + const size_t guard_size = page_size; + + const size_t mmap_size = stack_size + guard_size; + char *const memory_range = reinterpret_cast(system_details::memory_map_range(mmap_size)); + + char *const stack_block = memory_range + guard_size; + char *const guard_block = memory_range; + system_details::memory_protect_range(guard_block, guard_size); + + return stack_block; +} + +void mmap_stack_allocator::free_stack(size_t size, char *stack) { + const size_t page_size = system_details::get_page_size(); + const size_t guard_size = page_size; + + const size_t mmap_size = size + guard_size; + char *const memory_range = stack - guard_size; + + system_details::memory_unmap_range(memory_range, mmap_size); +} +} diff --git a/lib/pls/src/internal/base/system_details.cpp b/lib/pls/src/internal/base/system_details.cpp new file mode 100644 index 0000000..219a7bd --- /dev/null +++ b/lib/pls/src/internal/base/system_details.cpp @@ -0,0 +1,28 @@ +#include "pls/internal/base/system_details.h" + +#include +#include + +namespace pls::internal::base::system_details { + +size_t get_page_size() { + return sysconf(_SC_PAGESIZE); +} + +void *memory_map_range(size_t size) { + PLS_ASSERT(size % get_page_size() == 0, "Must only map memory regions in page_size chunks."); + return mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); +} + +int memory_unmap_range(void *addr, size_t size) { + PLS_ASSERT((pointer_t) addr % get_page_size() == 0, "Must only unmap memory page_size aligned memory regions."); + PLS_ASSERT(size % get_page_size() == 0, "Must only map memory regions in page_size chunks."); + + return munmap(addr, size); +} + +int memory_protect_range(void *addr, size_t size) { + return mprotect(addr, size, PROT_NONE); +} + +} diff --git a/lib/pls/src/internal/base/thread.cpp b/lib/pls/src/internal/base/thread.cpp deleted file mode 100644 index 4991952..0000000 --- a/lib/pls/src/internal/base/thread.cpp +++ /dev/null @@ -1,21 +0,0 @@ -#include "pls/internal/base/thread.h" - -namespace pls { -namespace internal { -namespace base { - -#ifdef PLS_THREAD_SPECIFIC_PTHREAD -pthread_key_t this_thread::local_storage_key_ = false; -bool this_thread::local_storage_key_initialized_; -#endif -#ifdef PLS_THREAD_SPECIFIC_COMPILER -__thread void *this_thread::local_state_; -#endif - -void thread::join() { - pthread_join(pthread_thread_, nullptr); -} - -} -} -} diff --git a/lib/pls/src/internal/data_structures/aligned_stack.cpp b/lib/pls/src/internal/data_structures/aligned_stack.cpp new file mode 100644 index 0000000..991778b --- /dev/null +++ b/lib/pls/src/internal/data_structures/aligned_stack.cpp @@ -0,0 +1,47 @@ +#include "pls/internal/data_structures/aligned_stack.h" +#include "pls/internal/base/system_details.h" + +namespace pls { +namespace internal { +namespace data_structures { + +aligned_stack::aligned_stack(char *memory_pointer, size_t size) : + unaligned_memory_pointer_{memory_pointer}, + memory_pointer_{memory_pointer}, // MUST be aligned + max_offset_{size / base::system_details::CACHE_LINE_SIZE}, + current_offset_{0} { + PLS_ASSERT((pointer_t) memory_pointer_ % base::system_details::CACHE_LINE_SIZE == 0, + "Must initialize an aligned_stack with a properly aligned memory region!") +} + +aligned_stack::aligned_stack(char *unaligned_memory_pointer, size_t size, size_t unaligned_size) : + unaligned_memory_pointer_{unaligned_memory_pointer}, + memory_pointer_{base::alignment::next_alignment(unaligned_memory_pointer)}, + max_offset_{unaligned_size / base::system_details::CACHE_LINE_SIZE}, + current_offset_{0} { + PLS_ASSERT(size == base::alignment::previous_alignment(unaligned_size), + "Initialized aligned stack with invalid memory configuration!") +} + +char *aligned_stack::memory_at_offset(stack_offset offset) const { + const auto byte_offset = offset * base::system_details::CACHE_LINE_SIZE; + return reinterpret_cast(memory_pointer_ + byte_offset); +} + +char *aligned_stack::push_bytes(size_t size) { + size_t round_up_size = base::alignment::next_alignment(size); + size_t num_cache_lines = round_up_size / base::system_details::CACHE_LINE_SIZE; + + char *result = memory_at_offset(current_offset_); + + // Move head to next aligned position after new object + current_offset_ += num_cache_lines; + PLS_ASSERT(current_offset_ <= max_offset_, + "Tried to allocate object on alligned_stack without sufficient memory!"); + + return result; +} + +} +} +} diff --git a/lib/pls/src/internal/scheduling/data_structures/aligned_stack.cpp b/lib/pls/src/internal/scheduling/data_structures/aligned_stack.cpp deleted file mode 100644 index 8ad0ead..0000000 --- a/lib/pls/src/internal/scheduling/data_structures/aligned_stack.cpp +++ /dev/null @@ -1,41 +0,0 @@ -#include "pls/internal/scheduling/data_structures/aligned_stack.h" -#include "pls/internal/base/system_details.h" - -namespace pls { -namespace internal { -namespace scheduling { -namespace data_structures { - -aligned_stack::aligned_stack(pointer_t memory_region, const std::size_t size) : - aligned_memory_start_{base::alignment::next_alignment(memory_region)}, - aligned_memory_end_{base::alignment::previous_alignment(memory_region + size)}, - max_offset_{(aligned_memory_end_ - aligned_memory_start_) / base::system_details::CACHE_LINE_SIZE}, - current_offset_{0} {} - -aligned_stack::aligned_stack(char *memory_region, const std::size_t size) : - aligned_stack((pointer_t) memory_region, size) {} - -void *aligned_stack::memory_at_offset(stack_offset offset) const { - const auto byte_offset = offset * base::system_details::CACHE_LINE_SIZE; - return reinterpret_cast(aligned_memory_start_ + byte_offset); -} - -void *aligned_stack::push_bytes(size_t size) { - size_t round_up_size = base::alignment::next_alignment(size); - size_t num_cache_lines = round_up_size / base::system_details::CACHE_LINE_SIZE; - - void *result = memory_at_offset(current_offset_); - - // Move head to next aligned position after new object - current_offset_ += num_cache_lines; - if (current_offset_ > max_offset_) { - PLS_ERROR("Tried to allocate object on alligned_stack without sufficient memory!"); - } - - return result; -} - -} -} -} -} diff --git a/lib/pls/src/internal/scheduling/external_trading_deque.cpp b/lib/pls/src/internal/scheduling/external_trading_deque.cpp new file mode 100644 index 0000000..927528e --- /dev/null +++ b/lib/pls/src/internal/scheduling/external_trading_deque.cpp @@ -0,0 +1,136 @@ +#include "pls/internal/scheduling/external_trading_deque.h" + +namespace pls::internal::scheduling { + +optional external_trading_deque::peek_traded_object(task *target_task) { + traded_cas_field current_cas = target_task->external_trading_deque_cas_.load(); + if (current_cas.is_filled_with_object()) { + return optional{current_cas.get_trade_object()}; + } else { + return optional{}; + } +} + +optional external_trading_deque::get_trade_object(task *target_task) { + traded_cas_field current_cas = target_task->external_trading_deque_cas_.load(); + if (current_cas.is_filled_with_object()) { + task *result = current_cas.get_trade_object(); + traded_cas_field empty_cas; + if (target_task->external_trading_deque_cas_.compare_exchange_strong(current_cas, empty_cas)) { + return optional{result}; + } + } + + return optional{}; +} + +void external_trading_deque::push_bot(task *published_task) { + auto expected_stamp = bot_internal_.stamp; + auto ¤t_entry = entries_[bot_internal_.value]; + + // Publish the prepared task in the deque. + current_entry.forwarding_stamp_.store(expected_stamp, std::memory_order_relaxed); + current_entry.traded_task_.store(published_task, std::memory_order_relaxed); + + // Field that all threads synchronize on. + // This happens not in the deque itself, but in the published task. + traded_cas_field sync_cas_field; + sync_cas_field.fill_with_stamp(expected_stamp, thread_id_); + published_task->external_trading_deque_cas_.store(sync_cas_field, std::memory_order_release); + + // Advance the bot pointer. Linearization point for making the task public. + bot_internal_.stamp++; + bot_internal_.value++; + bot_.store(bot_internal_.value, std::memory_order_release); +} + +void external_trading_deque::reset_bot_and_top() { + bot_internal_.value = 0; + bot_internal_.stamp++; + + bot_.store(0); + top_.store({bot_internal_.stamp, 0}); +} + +void external_trading_deque::decrease_bot() { + bot_internal_.value--; + bot_.store(bot_internal_.value, std::memory_order_relaxed); +} + +optional external_trading_deque::pop_bot() { + if (bot_internal_.value == 0) { + reset_bot_and_top(); + return optional{}; + } + decrease_bot(); + + auto ¤t_entry = entries_[bot_internal_.value]; + auto *popped_task = current_entry.traded_task_.load(std::memory_order_relaxed); + auto expected_stamp = current_entry.forwarding_stamp_.load(std::memory_order_relaxed); + + // We know what value must be in the cas field if no other thread stole it. + traded_cas_field expected_sync_cas_field; + expected_sync_cas_field.fill_with_stamp(expected_stamp, thread_id_); + traded_cas_field empty_cas_field; + + if (popped_task->external_trading_deque_cas_.compare_exchange_strong(expected_sync_cas_field, + empty_cas_field, + std::memory_order_acq_rel)) { + return optional{popped_task}; + } else { + reset_bot_and_top(); + return optional{}; + } +} + +external_trading_deque::peek_result external_trading_deque::peek_top() { + auto local_top = top_.load(); + auto local_bot = bot_.load(); + + if (local_top.value < local_bot) { + return peek_result{optional{entries_[local_top.value].traded_task_}, local_top}; + } else { + return peek_result{optional{}, local_top}; + } +} + +optional external_trading_deque::pop_top(task *offered_task, peek_result peek_result) { + stamped_integer expected_top = peek_result.top_pointer_; + auto local_bot = bot_.load(); + if (expected_top.value >= local_bot) { + return data_structures::optional{}; + } + + auto &target_entry = entries_[expected_top.value]; + + // Read our potential result + task *result = target_entry.traded_task_.load(); + unsigned long forwarding_stamp = target_entry.forwarding_stamp_.load(); + + // Try to get it by CAS with the expected field entry, giving up our offered_task for it + traded_cas_field expected_sync_cas_field; + expected_sync_cas_field.fill_with_stamp(expected_top.stamp, thread_id_); + + traded_cas_field offered_field; + offered_field.fill_with_trade_object(offered_task); + + if (result->external_trading_deque_cas_.compare_exchange_strong(expected_sync_cas_field, offered_field)) { + // We got it, for sure move the top pointer forward. + top_.compare_exchange_strong(expected_top, {expected_top.stamp + 1, expected_top.value + 1}); + // Return the stolen task + return data_structures::optional{result}; + } else { + // We did not get it...help forwarding the top pointer anyway. + if (expected_top.stamp == forwarding_stamp) { + // ...move the pointer forward if someone else put a valid trade object in there. + top_.compare_exchange_strong(expected_top, {expected_top.stamp + 1, expected_top.value + 1}); + } else { + // ...we failed because the top tag lags behind...try to fix it. + // This means only updating the tag, as this location can still hold data we need. + top_.compare_exchange_strong(expected_top, {forwarding_stamp, expected_top.value}); + } + return data_structures::optional{}; + } +} + +} diff --git a/lib/pls/src/internal/scheduling/scheduler.cpp b/lib/pls/src/internal/scheduling/scheduler.cpp index 3280c39..29a776a 100644 --- a/lib/pls/src/internal/scheduling/scheduler.cpp +++ b/lib/pls/src/internal/scheduling/scheduler.cpp @@ -1,73 +1,104 @@ #include "pls/internal/scheduling/scheduler.h" -#include "pls/internal/scheduling/thread_state.h" -#include "pls/internal/scheduling/task.h" -#include "pls/internal/scheduling/data_structures/deque.h" +#include "context_switcher/context_switcher.h" #include "pls/internal/base/error_handling.h" -namespace pls { -namespace internal { -namespace scheduling { - -scheduler::scheduler(scheduler_memory *memory, const unsigned int num_threads, bool reuse_thread) : - num_threads_{num_threads}, - reuse_thread_{reuse_thread}, - memory_{memory}, - sync_barrier_{num_threads + 1 - reuse_thread}, - terminated_{false} { - if (num_threads_ > memory_->max_threads()) { - PLS_ERROR("Tried to create scheduler with more OS threads than pre-allocated memory."); - } +#include - for (unsigned int i = 0; i < num_threads_; i++) { - // Placement new is required, as the memory of `memory_` is not required to be initialized. - new((void *) memory_->thread_state_for(i)) thread_state{this, memory_->task_stack_for(i), i}; +namespace pls::internal::scheduling { - if (reuse_thread && i == 0) { - continue; // Skip over first/main thread when re-using the users thread, as this one will replace the first one. - } - new((void *) memory_->thread_for(i))base::thread(&scheduler::worker_routine, memory_->thread_state_for(i)); - - } -} +scheduler::scheduler(unsigned int num_threads, + size_t computation_depth, + size_t stack_size, + bool reuse_thread) : scheduler(num_threads, + computation_depth, + stack_size, + reuse_thread, + base::mmap_stack_allocator{}) {} scheduler::~scheduler() { terminate(); } - -void scheduler::worker_routine() { - auto my_state = thread_state::get(); - auto scheduler = my_state->scheduler_; - +void scheduler::work_thread_main_loop() { + auto &scheduler = thread_state::get().get_scheduler(); while (true) { // Wait to be triggered - scheduler->sync_barrier_.wait(); + scheduler.sync_barrier_.wait(); // Check for shutdown - if (scheduler->terminated_) { + if (scheduler.terminated_) { return; } - // Execute work - if (my_state->id_ == 0) { - // Main Thread - auto root_task = scheduler->main_thread_root_task_; - root_task->parent_ = nullptr; - root_task->deque_offset_ = my_state->deque_.save_offset(); + scheduler.work_thread_work_section(); + + // Sync back with main thread + scheduler.sync_barrier_.wait(); + } +} + +void scheduler::work_thread_work_section() { + auto &my_state = thread_state::get(); + auto &my_task_manager = my_state.get_task_manager(); - root_task->execute(); - scheduler->work_section_done_ = true; + auto const num_threads = my_state.get_scheduler().num_threads(); + + if (my_state.get_thread_id() == 0) { + // Main Thread, kick off by executing the user's main code block. + main_thread_starter_function_->run(); + } + + unsigned int failed_steals = 0; + while (!work_section_done_) { + PLS_ASSERT(my_task_manager.check_task_chain(), "Must start stealing with a clean task chain."); + + // TODO: move steal routine into separate function + const size_t target = my_state.get_rand() % num_threads; + if (target == my_state.get_thread_id()) { + continue; + } + + auto &target_state = my_state.get_scheduler().thread_state_for(target); + task *traded_task = target_state.get_task_manager().steal_task(my_task_manager); + + if (traded_task != nullptr) { + // The stealing procedure correctly changed our chain and active task. + // Now we need to perform the 'post steal' actions (manage resources and execute the stolen task). + PLS_ASSERT(my_task_manager.check_task_chain_forward(&my_task_manager.get_active_task()), + "We are sole owner of this chain, it has to be valid!"); + + // Move the traded in resource of this active task over to the stack of resources. + auto *stolen_task = &my_task_manager.get_active_task(); + // Push the traded in resource on the resource stack to clear the traded_field for later steals/spawns. + my_task_manager.push_resource_on_task(stolen_task, traded_task); + + auto optional_exchanged_task = external_trading_deque::get_trade_object(stolen_task); + if (optional_exchanged_task) { + // All good, we pushed the task over to the stack, nothing more to do + PLS_ASSERT(*optional_exchanged_task == traded_task, + "We are currently executing this, no one else can put another task in this field!"); + } else { + // The last other active thread took it as its spare resource... + // ...remove our traded object from the stack again (it must be empty now and no one must access it anymore). + auto current_root = stolen_task->resource_stack_root_.load(); + current_root.stamp++; + current_root.value = 0; + stolen_task->resource_stack_root_.store(current_root); + } + + // Execute the stolen task by jumping to it's continuation. + PLS_ASSERT(stolen_task->continuation_.valid(), + "A task that we can steal must have a valid continuation for us to start working."); + stolen_task->is_synchronized_ = false; + context_switcher::switch_context(std::move(stolen_task->continuation_)); + // We will continue execution in this line when we finished the stolen work. + failed_steals = 0; } else { - // Worker Threads - while (!scheduler->work_section_done_) { - if (!scheduler->try_execute_local()) { - scheduler->try_execute_stolen(); - } + failed_steals++; + if (failed_steals >= num_threads) { + std::this_thread::yield(); } } - - // Sync back with main thread - my_state->scheduler_->sync_barrier_.wait(); } } @@ -83,80 +114,12 @@ void scheduler::terminate() { if (reuse_thread_ && i == 0) { continue; } - memory_->thread_for(i)->join(); + worker_threads_[i].join(); } } -task *scheduler::get_local_task() { - PROFILE_STEALING("Get Local Task") - return thread_state::get()->deque_.pop_local_task(); +void scheduler::sync() { + thread_state::get().get_task_manager().sync(); } -task *scheduler::steal_task() { - PROFILE_STEALING("Steal Task") - - // Data for victim selection - const auto my_state = thread_state::get(); - - const auto my_id = my_state->id_; - const size_t offset = my_state->random_() % num_threads(); - const size_t max_tries = num_threads(); // TODO: Tune this value - bool any_cas_fails_occured = false; - - // Current strategy: random start, then round robin from there - for (size_t i = 0; i < max_tries; i++) { - size_t target = (offset + i) % num_threads(); - - // Skip our self for stealing - target = ((target == my_id) + target) % num_threads(); - - auto target_state = thread_state_for(target); - - bool cas_fail; - auto result = target_state->deque_.pop_external_task(cas_fail); - any_cas_fails_occured |= cas_fail; - if (result != nullptr) { - return result; - } - - // TODO: See if we should backoff here (per missed steal) - } - - if (!any_cas_fails_occured) { - // Went through every task and we did not find any work. - // Most likely there is non available right now, yield to other threads. - pls::internal::base::this_thread::yield(); - } - return nullptr; -} - -bool scheduler::try_execute_local() { - task *local_task = get_local_task(); - if (local_task != nullptr) { - local_task->execute(); - return true; - } else { - return false; - } -} - -bool scheduler::try_execute_stolen() { - task *stolen_task = steal_task(); - if (stolen_task != nullptr) { - stolen_task->deque_offset_ = thread_state::get()->deque_.save_offset(); - stolen_task->execute(); - return true; - } - - return false; -} - -void scheduler::wait_for_all() { - thread_state::get()->current_task_->wait_for_all(); -} - -thread_state *scheduler::thread_state_for(size_t id) { return memory_->thread_state_for(id); } - -} -} } diff --git a/lib/pls/src/internal/scheduling/scheduler_memory.cpp b/lib/pls/src/internal/scheduling/scheduler_memory.cpp deleted file mode 100644 index d2764d7..0000000 --- a/lib/pls/src/internal/scheduling/scheduler_memory.cpp +++ /dev/null @@ -1,52 +0,0 @@ -#include "pls/internal/scheduling/scheduler_memory.h" -#include "pls/internal/scheduling/data_structures/aligned_stack.h" - -namespace pls { -namespace internal { -namespace scheduling { - -malloc_scheduler_memory::malloc_scheduler_memory(const size_t num_threads, const size_t memory_per_stack) : - num_threads_{num_threads} { - threads_ = - reinterpret_cast(base::alignment::allocate_aligned(num_threads * sizeof(aligned_thread))); - thread_states_ = reinterpret_cast(base::alignment::allocate_aligned( - num_threads * sizeof(aligned_thread_state))); - task_stacks_ = reinterpret_cast(base::alignment::allocate_aligned( - num_threads * sizeof(aligned_aligned_stack))); - task_stacks_memory_ = reinterpret_cast(base::alignment::allocate_aligned(num_threads * sizeof(char *))); - - thread_refs_ = static_cast(malloc(num_threads * sizeof(base::thread *))); - thread_state_refs_ = static_cast(malloc(num_threads * sizeof(thread_state *))); - task_stack_refs_ = - static_cast(malloc(num_threads * sizeof(data_structures::aligned_stack *))); - - for (size_t i = 0; i < num_threads_; i++) { - task_stacks_memory_[i] = reinterpret_cast(base::alignment::allocate_aligned(memory_per_stack)); - new((void *) task_stacks_[i].pointer()) data_structures::aligned_stack(task_stacks_memory_[i], memory_per_stack); - - thread_refs_[i] = threads_[i].pointer(); - thread_state_refs_[i] = thread_states_[i].pointer(); - task_stack_refs_[i] = task_stacks_[i].pointer(); - } - - init(num_threads, thread_state_refs_, thread_refs_, task_stack_refs_); -} - -malloc_scheduler_memory::~malloc_scheduler_memory() { - free(threads_); - free(thread_states_); - - for (size_t i = 0; i < num_threads_; i++) { - free(task_stacks_memory_[i]); - } - free(task_stacks_); - free(task_stacks_memory_); - - free(thread_refs_); - free(thread_state_refs_); - free(task_stack_refs_); -} - -} -} -} diff --git a/lib/pls/src/internal/scheduling/task.cpp b/lib/pls/src/internal/scheduling/task.cpp index 1b25604..4b9b0e3 100644 --- a/lib/pls/src/internal/scheduling/task.cpp +++ b/lib/pls/src/internal/scheduling/task.cpp @@ -1,53 +1,9 @@ -#include "pls/internal/helpers/profiler.h" - -#include "pls/internal/scheduling/scheduler.h" #include "pls/internal/scheduling/task.h" -#include "pls/internal/scheduling/thread_state.h" namespace pls { namespace internal { namespace scheduling { -task::task() : - finished_construction_{false}, - ref_count_{0}, - parent_{nullptr}, - deque_offset_{0} {} - -void *task::allocate_memory(long size) { - if (finished_construction_) { - PLS_ERROR("Must not allocate dynamic task memory after it's construction.") - } - return thread_state::get()->deque_.push_bytes(size); -} - -void task::execute() { - PROFILE_WORK_BLOCK("execute task") - auto last_executing = thread_state::get()->current_task_; - thread_state::get()->current_task_ = this; - - execute_internal(); - PROFILE_END_BLOCK - - wait_for_all(); - thread_state::get()->current_task_ = last_executing; - - if (parent_ != nullptr) { - parent_->ref_count_--; - } -} - -void task::wait_for_all() { - auto scheduler = thread_state::get()->scheduler_; - - while (ref_count_ > 0) { - if (!scheduler->try_execute_local()) { - scheduler->try_execute_stolen(); - } - } - thread_state::get()->deque_.reset_offset(deque_offset_); -} - } } } diff --git a/lib/pls/src/internal/scheduling/task_manager.cpp b/lib/pls/src/internal/scheduling/task_manager.cpp new file mode 100644 index 0000000..8a2aa7d --- /dev/null +++ b/lib/pls/src/internal/scheduling/task_manager.cpp @@ -0,0 +1,241 @@ +#include "pls/internal/scheduling/task_manager.h" + +#include "pls/internal/scheduling/task.h" +#include "pls/internal/scheduling/thread_state.h" +#include "pls/internal/scheduling/scheduler.h" + +namespace pls::internal::scheduling { + +task_manager::task_manager(unsigned thread_id, + size_t num_tasks, + size_t stack_size, + std::shared_ptr &stack_allocator) : stack_allocator_{stack_allocator}, + tasks_{}, + deque_{thread_id, num_tasks} { + tasks_.reserve(num_tasks); + + for (size_t i = 0; i < num_tasks - 1; i++) { + char *stack_memory = stack_allocator->allocate_stack(stack_size); + tasks_.emplace_back(std::make_unique(stack_memory, stack_size, i, thread_id)); + + if (i > 0) { + tasks_[i - 1]->next_ = tasks_[i].get(); + tasks_[i]->prev_ = tasks_[i - 1].get(); + } + } + active_task_ = tasks_[0].get(); +} + +task_manager::~task_manager() { + for (auto &task : tasks_) { + stack_allocator_->free_stack(task->stack_size_, task->stack_memory_); + } +} + +static task &find_task(unsigned id, unsigned depth) { + return thread_state::get().get_scheduler().thread_state_for(id).get_task_manager().get_this_thread_task(depth); +} + +task *task_manager::steal_task(task_manager &stealing_task_manager) { + PLS_ASSERT(stealing_task_manager.active_task_->depth_ == 0, "Must only steal with clean task chain."); + PLS_ASSERT(stealing_task_manager.check_task_chain(), "Must only steal with clean task chain."); + + auto peek = deque_.peek_top(); + if (peek.top_task_) { + // search for the task we want to trade in + task *stolen_task = *peek.top_task_; + task *traded_task = stealing_task_manager.active_task_; + for (unsigned i = 0; i < stolen_task->depth_; i++) { + traded_task = traded_task->next_; + } + + // keep a reference to the rest of the task chain that we keep + task *next_own_task = traded_task->next_; + // 'unchain' the traded tasks (to help us find bugs) + traded_task->next_ = nullptr; + + // perform the actual pop operation + auto pop_result_task = deque_.pop_top(traded_task, peek); + if (pop_result_task) { + PLS_ASSERT(stolen_task->thread_id_ != traded_task->thread_id_, + "It is impossible to steal an task we already own!"); + PLS_ASSERT(*pop_result_task == stolen_task, + "We must only steal the task that we peeked at!"); + + // the steal was a success, link the chain so we own the stolen part + stolen_task->next_ = next_own_task; + next_own_task->prev_ = stolen_task; + stealing_task_manager.active_task_ = stolen_task; + + return traded_task; + } else { + // the steal failed, reset our chain to its old, clean state (re-link what we have broken) + traded_task->next_ = next_own_task; + + return nullptr; + } + } else { + return nullptr; + } +} + +void task_manager::push_resource_on_task(task *target_task, task *spare_task_chain) { + PLS_ASSERT(target_task->thread_id_ != spare_task_chain->thread_id_, + "Makes no sense to push task onto itself, as it is not clean by definition."); + PLS_ASSERT(target_task->depth_ == spare_task_chain->depth_, "Must only push tasks with correct depth."); + + data_structures::stamped_integer current_root; + data_structures::stamped_integer target_root; + do { + current_root = target_task->resource_stack_root_.load(); + target_root.stamp = current_root.stamp + 1; + target_root.value = spare_task_chain->thread_id_ + 1; + + if (current_root.value == 0) { + // Empty, simply push in with no successor + spare_task_chain->resource_stack_next_.store(nullptr); + } else { + // Already an entry. Find it's corresponding task and set it as our successor. + auto ¤t_root_task = find_task(current_root.value - 1, target_task->depth_); + spare_task_chain->resource_stack_next_.store(¤t_root_task); + } + + } while (!target_task->resource_stack_root_.compare_exchange_strong(current_root, target_root)); +} + +task *task_manager::pop_resource_from_task(task *target_task) { + data_structures::stamped_integer current_root; + data_structures::stamped_integer target_root; + task *output_task; + do { + current_root = target_task->resource_stack_root_.load(); + if (current_root.value == 0) { + // Empty... + return nullptr; + } else { + // Found something, try to pop it + auto ¤t_root_task = find_task(current_root.value - 1, target_task->depth_); + auto *next_stack_task = current_root_task.resource_stack_next_.load(); + + target_root.stamp = current_root.stamp + 1; + target_root.value = next_stack_task != nullptr ? next_stack_task->thread_id_ + 1 : 0; + + output_task = ¤t_root_task; + } + } while (!target_task->resource_stack_root_.compare_exchange_strong(current_root, target_root)); + + PLS_ASSERT(check_task_chain_backward(output_task), "Must only pop proper task chains."); + output_task->resource_stack_next_.store(nullptr); + return output_task; +} + +void task_manager::sync() { + auto *spawning_task_manager = this; + auto *last_task = spawning_task_manager->active_task_; + auto *spawned_task = spawning_task_manager->active_task_->next_; + + if (last_task->is_synchronized_) { + return; // We are already the sole owner of last_task + } else { + auto continuation = spawned_task->run_as_task([=](context_switcher::continuation cont) { + last_task->continuation_ = std::move(cont); + spawning_task_manager->active_task_ = spawned_task; + + context_switcher::continuation result_cont; + if (spawning_task_manager->try_clean_return(result_cont)) { + // We return back to the main scheduling loop + return result_cont; + } else { + // We finish up the last task + return result_cont; + } + }); + + PLS_ASSERT(!continuation.valid(), + "We only return to a sync point, never jump to it directly." + "This must therefore never return an unfinished fiber/continuation."); + + return; // We cleanly synced to the last one finishing work on last_task + } +} + +bool task_manager::try_clean_return(context_switcher::continuation &result_cont) { + task *this_task = active_task_; + task *last_task = active_task_->prev_; + + PLS_ASSERT(last_task != nullptr, + "Must never try to return from a task at level 0 (no last task), as we must have a target to return to."); + + // Try to get a clean resource chain to go back to the main stealing loop + task *clean_chain = pop_resource_from_task(last_task); + if (clean_chain == nullptr) { + // double-check if we are really last one or we only have unlucky timing + auto optional_cas_task = external_trading_deque::get_trade_object(last_task); + if (optional_cas_task) { + clean_chain = *optional_cas_task; + } else { + clean_chain = pop_resource_from_task(last_task); + } + } + + if (clean_chain != nullptr) { + // We got a clean chain to continue working on. + PLS_ASSERT(last_task->depth_ == clean_chain->depth_, + "Resources must only reside in the correct depth!"); + PLS_ASSERT(clean_chain != last_task, + "We want to swap out the last task and its chain to use a clean one, thus they must differ."); + PLS_ASSERT(check_task_chain_backward(clean_chain), + "Can only acquire clean chains for clean returns!"); + this_task->prev_ = clean_chain; + clean_chain->next_ = this_task; + + // Walk back chain to make first task active + active_task_ = clean_chain; + while (active_task_->prev_ != nullptr) { + active_task_ = active_task_->prev_; + } + + // jump back to the continuation in main scheduling loop, time to steal some work + result_cont = std::move(thread_state::get().main_continuation()); + PLS_ASSERT(result_cont.valid(), "Must return a valid continuation."); + return true; + } else { + // Make sure that we are owner fo this full continuation/task chain. + last_task->next_ = this_task; + this_task->prev_ = last_task; + + // We are the last one working on this task. Thus the sync must be finished, continue working. + active_task_ = last_task; + + last_task->is_synchronized_ = true; + result_cont = std::move(last_task->continuation_); + PLS_ASSERT(result_cont.valid(), "Must return a valid continuation."); + return false; + } +} + +bool task_manager::check_task_chain_forward(task *start_task) { + while (start_task->next_ != nullptr) { + if (start_task->next_->prev_ != start_task) { + return false; + } + start_task = start_task->next_; + } + return true; +} + +bool task_manager::check_task_chain_backward(task *start_task) { + while (start_task->prev_ != nullptr) { + if (start_task->prev_->next_ != start_task) { + return false; + } + start_task = start_task->prev_; + } + return true; +} + +bool task_manager::check_task_chain() { + return check_task_chain_backward(active_task_) && check_task_chain_forward(active_task_); +} + +} diff --git a/lib/pls/src/internal/scheduling/thread_state.cpp b/lib/pls/src/internal/scheduling/thread_state.cpp new file mode 100644 index 0000000..85d1c94 --- /dev/null +++ b/lib/pls/src/internal/scheduling/thread_state.cpp @@ -0,0 +1,10 @@ +#include "pls/internal/scheduling/thread_state.h" + +namespace pls::internal::scheduling { + +thread_local thread_state *my_thread_state{nullptr}; + +thread_state &thread_state::get() { return *my_thread_state; } +void thread_state::set(thread_state *new_state) { my_thread_state = new_state; } + +} diff --git a/media/e34ea267_fft_execution_pattern.png b/media/e34ea267_fft_execution_pattern.png new file mode 100644 index 0000000..108ab8d Binary files /dev/null and b/media/e34ea267_fft_execution_pattern.png differ diff --git a/media/e34ea267_thread_state_for.png b/media/e34ea267_thread_state_for.png new file mode 100644 index 0000000..8431bfc Binary files /dev/null and b/media/e34ea267_thread_state_for.png differ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 501e622..9f4e844 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -3,6 +3,6 @@ add_executable(tests data_structures_test.cpp base_tests.cpp scheduling_tests.cpp - algorithm_test.cpp - dataflow_test.cpp) + patterns_test.cpp + test_helpers.h) target_link_libraries(tests catch2 pls) diff --git a/test/algorithm_test.cpp b/test/algorithm_test.cpp deleted file mode 100644 index beb0cfe..0000000 --- a/test/algorithm_test.cpp +++ /dev/null @@ -1,88 +0,0 @@ -#include -#include - -#include "pls/pls.h" - -using namespace pls; - -TEST_CASE("for_each functions correctly", "[algorithms/for_each.h]") { - malloc_scheduler_memory my_scheduler_memory{8, 2 << 12}; - scheduler my_scheduler{&my_scheduler_memory, 8}; - my_scheduler.perform_work([]() { - constexpr int SIZE = 1000; - std::array result_array{}; - result_array.fill(0); - - SECTION("integer ranges are processed exactly once") { - pls::for_each_range(0, SIZE, [&result_array](int i) { - result_array[i]++; - }); - - bool all_equal = true; - for (int i = 0; i < SIZE; i++) { - all_equal &= result_array[i] == 1; - } - REQUIRE (all_equal); - } - - SECTION("iterators are processed exactly once") { - std::array iterator_array{}; - for (int i = 0; i < SIZE; i++) { - iterator_array[i] = i; - } - - pls::for_each(iterator_array.begin(), iterator_array.end(), [&result_array](int i) { - result_array[i]++; - }); - - bool all_equal = true; - for (int i = 0; i < SIZE; i++) { - all_equal &= result_array[i] == 1; - } - REQUIRE (all_equal); - } - }); -} - -TEST_CASE("scan functions correctly", "[algorithms/scan.h]") { - malloc_scheduler_memory my_scheduler_memory{8, 2 << 12}; - scheduler my_scheduler{&my_scheduler_memory, 8}; - my_scheduler.perform_work([]() { - constexpr int SIZE = 10000; - std::array input_array{}, result_array{}; - input_array.fill(1); - - pls::scan(input_array.begin(), input_array.end(), result_array.begin(), std::plus(), 0); - - bool all_correct = true; - for (int i = 0; i < SIZE; i++) { - all_correct &= result_array[i] == (i + 1); - } - REQUIRE (all_correct); - }); -} - -long fib(long n) { - if (n <= 2) { - return 1; - } - - long a, b; - - pls::invoke( - [&a, n]() { a = fib(n - 1); }, - [&b, n]() { b = fib(n - 2); } - ); - - return a + b; -} - -TEST_CASE("invoke functions correctly", "[algorithms/invoke.h]") { - constexpr long fib_30 = 832040; - - malloc_scheduler_memory my_scheduler_memory{8, 2u << 14}; - scheduler my_scheduler{&my_scheduler_memory, 8}; - my_scheduler.perform_work([=]() { - REQUIRE(fib(30) == fib_30); - }); -} diff --git a/test/base_tests.cpp b/test/base_tests.cpp index dafdfca..e7e3593 100644 --- a/test/base_tests.cpp +++ b/test/base_tests.cpp @@ -1,55 +1,97 @@ #include -#include "pls/internal/base/thread.h" + #include "pls/internal/base/spin_lock.h" #include "pls/internal/base/system_details.h" +#include "pls/internal/base/alignment.h" +#include "pls/internal/base/stack_allocator.h" + +#include "test_helpers.h" -#include #include +#include +#include using namespace pls::internal::base; -using namespace std; -static bool base_tests_visited; -static int base_tests_local_value_one; -static vector base_tests_local_value_two; +int base_tests_shared_counter; -TEST_CASE("thread creation and joining", "[internal/data_structures/thread.h]") { - base_tests_visited = false; - thread t1{[]() { base_tests_visited = true; }}; - t1.join(); +TEST_CASE("align helpers", "[internal/base/alignment.h") { + system_details::pointer_t aligned_64 = 64; + system_details::pointer_t aligned_32 = 32; + system_details::pointer_t not_aligned_64 = 70; + system_details::pointer_t not_aligned_32 = 60; - REQUIRE(base_tests_visited); + REQUIRE(alignment::next_alignment(aligned_64, 64) == 64); + REQUIRE(alignment::next_alignment(aligned_32, 32) == 32); + REQUIRE(alignment::next_alignment(aligned_32, 64) == 64); + + REQUIRE(alignment::previous_alignment(not_aligned_64, 64) == 64); + REQUIRE(alignment::next_alignment(not_aligned_64, 64) == 128); + REQUIRE(alignment::previous_alignment(not_aligned_32, 32) == 32); + REQUIRE(alignment::next_alignment(not_aligned_32, 32) == 64); } -TEST_CASE("thread state", "[internal/data_structures/thread.h]") { - int state_one = 1; - vector state_two{1, 2}; +TEST_CASE("alignment wrapper", "[internal/base/alignment.h") { + char filler1 = '\0'; + alignment::alignment_wrapper int_256{256}; + int filler2 = 0; + alignment::alignment_wrapper int_1024{1024}; + + (void) filler1; + (void) filler2; - thread t1{[]() { base_tests_local_value_one = *this_thread::state(); }, &state_one}; - thread t2{[]() { base_tests_local_value_two = *this_thread::state>(); }, &state_two}; - t1.join(); - t2.join(); + REQUIRE(int_256.object() == 256); + REQUIRE((system_details::pointer_t) int_256.pointer() % 256 == 0); - REQUIRE(base_tests_local_value_one == 1); - REQUIRE(base_tests_local_value_two == vector{1, 2}); + REQUIRE(int_1024.object() == 1024); + REQUIRE((system_details::pointer_t) int_1024.pointer() % 1024 == 0); } -int base_tests_shared_counter; +TEST_CASE("mmap stack allocator", "[internal/base/stack_allocator.h") { + mmap_stack_allocator stack_allocator; + + char *stack = stack_allocator.allocate_stack(4096); + + SECTION("stack passes in valid range") { + stack[0] = 'a'; + stack[4096 - 1] = 'a'; + + REQUIRE(true); + } + + SECTION("guard page sigsev on overflow") { + REQUIRE(CHECK_ABORT([&]() { stack[-10] = 'a'; })); + } + + stack_allocator.free_stack(4096, stack); + + SECTION("stack unmaps after free") { + REQUIRE(CHECK_ABORT([&]() { stack[0] = 'a'; })); + } +} TEST_CASE("spinlock protects concurrent counter", "[internal/data_structures/spinlock.h]") { - constexpr int num_iterations = 1000000; + constexpr int num_iterations = 10000; base_tests_shared_counter = 0; + std::atomic barrier{2}; + spin_lock lock{}; SECTION("lock can be used by itself") { - thread t1{[&]() { + std::thread t1{[&]() { + barrier--; + while (barrier != 0); + for (int i = 0; i < num_iterations; i++) { lock.lock(); base_tests_shared_counter++; lock.unlock(); } }}; - thread t2{[&]() { + std::thread t2{[&]() { + barrier--; + while (barrier != 0); + for (int i = 0; i < num_iterations; i++) { lock.lock(); base_tests_shared_counter--; @@ -64,13 +106,13 @@ TEST_CASE("spinlock protects concurrent counter", "[internal/data_structures/spi } SECTION("lock can be used with std::lock_guard") { - thread t1{[&]() { + std::thread t1{[&]() { for (int i = 0; i < num_iterations; i++) { std::lock_guard my_lock{lock}; base_tests_shared_counter++; } }}; - thread t2{[&]() { + std::thread t2{[&]() { for (int i = 0; i < num_iterations; i++) { std::lock_guard my_lock{lock}; base_tests_shared_counter--; diff --git a/test/data_structures_test.cpp b/test/data_structures_test.cpp index d515972..ac499eb 100644 --- a/test/data_structures_test.cpp +++ b/test/data_structures_test.cpp @@ -1,22 +1,40 @@ #include +#include #include "pls/internal/base/system_details.h" +#include "pls/internal/data_structures/aligned_stack.h" -#include "pls/internal/scheduling/data_structures/aligned_stack.h" -#include "pls/internal/scheduling/data_structures/locking_deque.h" -#include "pls/internal/scheduling/data_structures/work_stealing_deque.h" - -#include - -using namespace pls::internal::scheduling::data_structures; +using namespace pls::internal::data_structures; using namespace pls::internal::base; using namespace std; +// Forward Declaration +void test_stack(aligned_stack &stack); + TEST_CASE("aligned stack stores objects correctly", "[internal/data_structures/aligned_stack.h]") { constexpr long data_size = 1024; - char data[data_size]; - aligned_stack stack{data, data_size}; + SECTION("plain aligned stack") { + alignas(system_details::CACHE_LINE_SIZE) char data[data_size]; + aligned_stack stack{data, data_size, data_size}; + + test_stack(stack); + } + + SECTION("static aligned stack") { + static_aligned_stack stack; + + test_stack(stack.get_stack()); + } + + SECTION("heap aligned stack") { + heap_aligned_stack stack{data_size}; + + test_stack(stack.get_stack()); + } +} + +void test_stack(aligned_stack &stack) { SECTION("stack correctly pushes sub linesize objects") { std::array small_data_one{'a', 'b', 'c', 'd', 'e'}; std::array small_data_two{}; @@ -45,10 +63,11 @@ TEST_CASE("aligned stack stores objects correctly", "[internal/data_structures/a SECTION("stack correctly stores and retrieves objects") { std::array data_one{'a', 'b', 'c', 'd', 'e'}; - stack.push(data_one); - auto retrieved_data = stack.pop>(); + auto *push_one = stack.push(data_one); + stack.pop>(); + auto *push_two = stack.push(data_one); - REQUIRE(retrieved_data == std::array{'a', 'b', 'c', 'd', 'e'}); + REQUIRE(push_one == push_two); } SECTION("stack can push and pop multiple times with correct alignment") { @@ -75,214 +94,4 @@ TEST_CASE("aligned stack stores objects correctly", "[internal/data_structures/a } } -TEST_CASE("work_stealing_deque functions correctly", "[internal/data_structures/work_stealing_deque.h]") { - SECTION("add and remove items form the tail") { - constexpr long data_size = 2 << 14; - char data[data_size]; - aligned_stack stack{data, data_size}; - work_stealing_deque deque{&stack}; - - int one = 1, two = 2, three = 3, four = 4; - - SECTION("add and remove items form the tail") { - deque.push_task(one); - deque.publish_last_task(); - deque.push_task(two); - deque.publish_last_task(); - deque.push_task(three); - deque.publish_last_task(); - - REQUIRE(*deque.pop_local_task() == three); - REQUIRE(*deque.pop_local_task() == two); - REQUIRE(*deque.pop_local_task() == one); - } - - SECTION("handles getting empty by popping the tail correctly") { - deque.push_task(one); - deque.publish_last_task(); - REQUIRE(*deque.pop_local_task() == one); - - deque.push_task(two); - deque.publish_last_task(); - REQUIRE(*deque.pop_local_task() == two); - } - - SECTION("remove items form the head") { - deque.push_task(one); - deque.publish_last_task(); - deque.push_task(two); - deque.publish_last_task(); - deque.push_task(three); - deque.publish_last_task(); - - REQUIRE(*deque.pop_external_task() == one); - REQUIRE(*deque.pop_external_task() == two); - REQUIRE(*deque.pop_external_task() == three); - } - - SECTION("handles getting empty by popping the head correctly") { - deque.push_task(one); - deque.publish_last_task(); - REQUIRE(*deque.pop_external_task() == one); - - deque.push_task(two); - deque.publish_last_task(); - REQUIRE(*deque.pop_external_task() == two); - } - - SECTION("handles getting empty by popping the head and tail correctly") { - deque.push_task(one); - deque.publish_last_task(); - REQUIRE(*deque.pop_local_task() == one); - - deque.push_task(two); - deque.publish_last_task(); - REQUIRE(*deque.pop_external_task() == two); - - deque.push_task(three); - deque.publish_last_task(); - REQUIRE(*deque.pop_local_task() == three); - } - SECTION("handles jumps bigger 1 correctly") { - deque.push_task(one); - deque.publish_last_task(); - deque.push_task(two); - deque.publish_last_task(); - REQUIRE(*deque.pop_local_task() == two); - - deque.push_task(three); - deque.publish_last_task(); - deque.push_task(four); - deque.publish_last_task(); - REQUIRE(*deque.pop_external_task() == one); - REQUIRE(*deque.pop_external_task() == three); - REQUIRE(*deque.pop_external_task() == four); - } - - SECTION("handles stack reset 1 correctly when emptied by tail") { - deque.push_task(one); - deque.publish_last_task(); - auto state = deque.save_offset(); - deque.push_task(two); - deque.publish_last_task(); - REQUIRE(*deque.pop_local_task() == two); - - deque.reset_offset(state); - REQUIRE(*deque.pop_local_task() == one); - - deque.push_task(three); - deque.publish_last_task(); - deque.push_task(four); - deque.publish_last_task(); - REQUIRE(*deque.pop_external_task() == three); - REQUIRE(*deque.pop_local_task() == four); - } - } -} - -TEST_CASE("locking_deque functions correctly", "[internal/data_structures/locking_deque.h]") { - SECTION("add and remove items form the tail") { - constexpr long data_size = 2 << 14; - char data[data_size]; - aligned_stack stack{data, data_size}; - locking_deque deque{&stack}; - - int one = 1, two = 2, three = 3, four = 4; - - SECTION("add and remove items form the tail") { - deque.push_task(one); - deque.publish_last_task(); - deque.push_task(two); - deque.publish_last_task(); - deque.push_task(three); - deque.publish_last_task(); - - REQUIRE(*deque.pop_local_task() == three); - REQUIRE(*deque.pop_local_task() == two); - REQUIRE(*deque.pop_local_task() == one); - } - - SECTION("handles getting empty by popping the tail correctly") { - deque.push_task(one); - deque.publish_last_task(); - REQUIRE(*deque.pop_local_task() == one); - - deque.push_task(two); - deque.publish_last_task(); - REQUIRE(*deque.pop_local_task() == two); - } - - SECTION("remove items form the head") { - deque.push_task(one); - deque.publish_last_task(); - deque.push_task(two); - deque.publish_last_task(); - deque.push_task(three); - deque.publish_last_task(); - - REQUIRE(*deque.pop_external_task() == one); - REQUIRE(*deque.pop_external_task() == two); - REQUIRE(*deque.pop_external_task() == three); - } - - SECTION("handles getting empty by popping the head correctly") { - deque.push_task(one); - deque.publish_last_task(); - REQUIRE(*deque.pop_external_task() == one); - - deque.push_task(two); - deque.publish_last_task(); - REQUIRE(*deque.pop_external_task() == two); - } - - SECTION("handles getting empty by popping the head and tail correctly") { - deque.push_task(one); - deque.publish_last_task(); - REQUIRE(*deque.pop_local_task() == one); - - deque.push_task(two); - deque.publish_last_task(); - REQUIRE(*deque.pop_external_task() == two); - - deque.push_task(three); - deque.publish_last_task(); - REQUIRE(*deque.pop_local_task() == three); - } - - SECTION("handles jumps bigger 1 correctly") { - deque.push_task(one); - deque.publish_last_task(); - deque.push_task(two); - deque.publish_last_task(); - REQUIRE(*deque.pop_local_task() == two); - - deque.push_task(three); - deque.publish_last_task(); - deque.push_task(four); - deque.publish_last_task(); - REQUIRE(*deque.pop_external_task() == one); - REQUIRE(*deque.pop_external_task() == three); - REQUIRE(*deque.pop_external_task() == four); - } - - SECTION("handles stack reset 1 correctly when emptied by tail") { - deque.push_task(one); - deque.publish_last_task(); - auto state = deque.save_offset(); - deque.push_task(two); - deque.publish_last_task(); - REQUIRE(*deque.pop_local_task() == two); - - deque.reset_offset(state); - REQUIRE(*deque.pop_local_task() == one); - - deque.push_task(three); - deque.publish_last_task(); - deque.push_task(four); - deque.publish_last_task(); - REQUIRE(*deque.pop_external_task() == three); - REQUIRE(*deque.pop_local_task() == four); - } - } -} diff --git a/test/dataflow_test.cpp b/test/dataflow_test.cpp deleted file mode 100644 index 58e26e9..0000000 --- a/test/dataflow_test.cpp +++ /dev/null @@ -1,114 +0,0 @@ -#include -#include -#include - -#include "pls/pls.h" -#include "pls/dataflow/dataflow.h" - -using namespace pls; -using namespace pls::dataflow; - -void step_1(const int &in, int &out) { - out = in * 2; -} - -class member_call_test { - public: - void step_2(const int &in, int &out) { - out = in * 2; - } -}; - -TEST_CASE("dataflow functions correctly", "[dataflow/dataflow.h]") { - malloc_scheduler_memory my_scheduler_memory{8, 2u << 12u}; - scheduler my_scheduler{&my_scheduler_memory, 8}; - my_scheduler.perform_work([]() { - SECTION("linear pipelines") { - auto step_1 = [](const int &in, double &out1, double &out2) { - out1 = (double) in / 2.0; - out2 = (double) in / 3.0; - }; - auto step_2 = [](const double &in1, const double &in2, double &out) { - out = in1 * in2; - }; - - graph, outputs> linear_graph; - function_node, outputs, decltype(step_1)> node_1{step_1}; - function_node, outputs, decltype(step_2)> node_2{step_2}; - - linear_graph >> node_1 >> node_2 >> linear_graph; - linear_graph.build(); - - std::tuple out{}; - linear_graph.run(5, &out); - linear_graph.wait_for_all(); - - REQUIRE(std::get<0>(out) == (5 / 2.0) * (5 / 3.0)); - } - - SECTION("member and function steps") { - member_call_test instance; - using member_func_type = member_function; - member_func_type func_1{&instance, &member_call_test::step_2}; - - graph, outputs> graph; - function_node, outputs, void (*)(const int &, int &)> node_1{&step_1}; - function_node, outputs, member_func_type> node_2{func_1}; - - graph >> node_1 >> node_2 >> graph; - graph.build(); - - std::tuple out{}; - graph.run(1, &out); - graph.wait_for_all(); - - REQUIRE(std::get<0>(out) == 4); - } - - SECTION("non linear pipeline") { - auto path_one = [](const int &in, int &out) { - out = in + 1; - }; - auto path_two = [](const int &in, int &out) { - out = in - 1; - }; - - graph, outputs> graph; - function_node, outputs, decltype(path_one)> node_1{path_one}; - function_node, outputs, decltype(path_two)> node_2{path_two}; - switch_node switch_node; - merge_node merge_node; - split_node split; - - // Split up boolean signal - graph.input<1>() >> split.value_in_port(); - - // Feed switch - graph.input<0>() >> switch_node.value_in_port(); - split.out_port_1() >> switch_node.condition_in_port(); - - // True path - switch_node.true_out_port() >> node_1.in_port<0>(); - node_1.out_port<0>() >> merge_node.true_in_port(); - // False path - switch_node.false_out_port() >> node_2.in_port<0>(); - node_2.out_port<0>() >> merge_node.false_in_port(); - - // Read Merge - split.out_port_2() >> merge_node.condition_in_port(); - merge_node.value_out_port() >> graph.output<0>(); - - - // Build and run - graph.build(); - std::tuple out1{}, out2{}; - graph.run({0, true}, &out1); - graph.run({0, false}, &out2); - graph.wait_for_all(); - - REQUIRE(std::get<0>(out1) == 1); - REQUIRE(std::get<0>(out2) == -1); - } - - }); -} diff --git a/test/patterns_test.cpp b/test/patterns_test.cpp new file mode 100644 index 0000000..bc92ffd --- /dev/null +++ b/test/patterns_test.cpp @@ -0,0 +1,96 @@ +#include + +#include +#include + +#include "pls/pls.h" + +constexpr int MAX_NUM_TASKS = 32; +constexpr int MAX_STACK_SIZE = 1024 * 8; + +TEST_CASE("spawn/sync invoke calls correctly", "[algorithms/invoke.h]") { + pls::scheduler scheduler{3, MAX_NUM_TASKS, MAX_STACK_SIZE}; + + std::atomic num_run{0}; + scheduler.perform_work([&] { + pls::spawn([&] { + num_run++; + while (num_run < 3); + }); + pls::spawn([&] { + while (num_run < 1); + num_run++; + while (num_run < 3); + }); + pls::spawn([&] { + while (num_run < 2); + num_run++; + }); + pls::sync(); + REQUIRE(num_run == 3); + }); +} + +TEST_CASE("parallel invoke calls correctly", "[algorithms/invoke.h]") { + pls::scheduler scheduler{3, MAX_NUM_TASKS, MAX_STACK_SIZE}; + + std::atomic num_run{0}; + scheduler.perform_work([&] { + pls::invoke([&] { + num_run++; + while (num_run < 3); + }, [&] { + while (num_run < 1); + num_run++; + while (num_run < 3); + }, [&] { + while (num_run < 2); + num_run++; + }); + REQUIRE(num_run == 3); + }); +} + +TEST_CASE("parallel for calls correctly (might fail, timing based)", "[algorithms/for_each.h]") { + pls::scheduler scheduler{8, MAX_NUM_TASKS, MAX_STACK_SIZE}; + + auto start = std::chrono::steady_clock::now(); + std::atomic work_done{0}; + scheduler.perform_work([&] { + pls::for_each_range(0, 100, [&](const int) { + work_done++; + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + }); + }); + auto end = std::chrono::steady_clock::now(); + auto elapsed = + std::chrono::duration_cast(end - start).count(); + + REQUIRE(work_done == 100); + // It makes sense that 100 iterations on at least 4 threads take less than half the serial time. + // We want to make sure that at least some work is distributed on multiple cores. + REQUIRE(elapsed <= 50); +} + +TEST_CASE("reduce calls correctly (might fail, timing based)", "[algorithms/for_each.h]") { + pls::scheduler scheduler{8, MAX_NUM_TASKS, MAX_STACK_SIZE}; + + auto start = std::chrono::steady_clock::now(); + int num_elements = 100; + pls::range range{1, num_elements + 1}; + int result; + scheduler.perform_work([&] { + result = pls::reduce(range.begin(), range.end(), 0, [&](const int a, const int b) { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + return a + b; + }); + }); + auto end = std::chrono::steady_clock::now(); + auto elapsed = + std::chrono::duration_cast(end - start).count(); + + REQUIRE(result == (num_elements * (num_elements + 1)) / 2); + // It makes sense that 100 iterations on at least 4 threads take less than half the serial time. + // We want to make sure that at least some work is distributed on multiple cores. + REQUIRE(elapsed <= 50); +} diff --git a/test/scheduling_tests.cpp b/test/scheduling_tests.cpp index 2cce39a..826bdf5 100644 --- a/test/scheduling_tests.cpp +++ b/test/scheduling_tests.cpp @@ -1,75 +1,119 @@ #include +#include + +#include "pls/internal/scheduling/traded_cas_field.h" +#include "pls/internal/scheduling/external_trading_deque.h" #include "pls/pls.h" -using namespace pls; +using namespace pls::internal::scheduling; -class once_sub_task : public task { - std::atomic *counter_; - int children_; +constexpr int MAX_NUM_TASKS = 32; +constexpr int MAX_STACK_SIZE = 1024 * 8; - protected: - void execute_internal() override { - (*counter_)++; - for (int i = 0; i < children_; i++) { - spawn_child(counter_, children_ - 1); - } - } +TEST_CASE("tasks distributed over workers (do not block)", "[internal/scheduling/scheduler.h]") { + scheduler scheduler{3, MAX_NUM_TASKS, MAX_STACK_SIZE}; - public: - explicit once_sub_task(std::atomic *counter, int children) : - task{}, - counter_{counter}, - children_{children} {} -}; - -class force_steal_sub_task : public task { - std::atomic *parent_counter_; - std::atomic *overall_counter_; - - protected: - void execute_internal() override { - (*overall_counter_)--; - if (overall_counter_->load() > 0) { - std::atomic counter{1}; - spawn_child(&counter, overall_counter_); - while (counter.load() > 0); // Spin... - } - - (*parent_counter_)--; - } + std::atomic num_run{0}; + scheduler.perform_work([&] { + scheduler::spawn([&] { + num_run++; + while (num_run < 3); + }); + scheduler::spawn([&] { + while (num_run < 1); + num_run++; + while (num_run < 3); + }); + scheduler::spawn([&] { + while (num_run < 2); + num_run++; + }); + scheduler::sync(); + }); + REQUIRE(num_run == 3); +} + +TEST_CASE("traded cas field bitmaps correctly", "[internal/scheduling/traded_cas_field.h]") { + traded_cas_field empty_field; + REQUIRE(empty_field.is_empty()); + REQUIRE(!empty_field.is_filled_with_stamp()); + REQUIRE(!empty_field.is_filled_with_object()); - public: - explicit force_steal_sub_task(std::atomic *parent_counter, std::atomic *overall_counter) : - task{}, - parent_counter_{parent_counter}, - overall_counter_{overall_counter} {} -}; + const int stamp = 42; + const int ID = 10; + traded_cas_field tag_field; + tag_field.fill_with_stamp(stamp, ID); + REQUIRE(tag_field.is_filled_with_stamp()); + REQUIRE(!tag_field.is_empty()); + REQUIRE(!tag_field.is_filled_with_object()); + REQUIRE(tag_field.get_stamp() == stamp); + REQUIRE(tag_field.get_deque_id() == ID); -TEST_CASE("tbb task are scheduled correctly", "[internal/scheduling/fork_join_task.h]") { - malloc_scheduler_memory my_scheduler_memory{8, 2 << 12}; + alignas(64) task obj{nullptr, 0, 0, 0}; + traded_cas_field obj_field; + obj_field.fill_with_trade_object(&obj); + REQUIRE(obj_field.is_filled_with_object()); + REQUIRE(!obj_field.is_empty()); + REQUIRE(!obj_field.is_filled_with_stamp()); +} - SECTION("tasks are executed exactly once") { - scheduler my_scheduler{&my_scheduler_memory, 2}; - int start_counter = 4; - int total_tasks = 1 + 4 + 4 * 3 + 4 * 3 * 2 + 4 * 3 * 2 * 1; - std::atomic counter{0}; +TEST_CASE("external trading deque", "[internal/scheduling/external_trading_deque]") { + external_trading_deque deque_1{1, 16}; + external_trading_deque deque_2{2, 16}; - my_scheduler.perform_work([&]() { - scheduler::spawn_child(&counter, start_counter); - }); + task tasks[4] = {{nullptr, 0, 0, 0}, + {nullptr, 0, 1, 0}, + {nullptr, 0, 2, 0}, + {nullptr, 0, 3, 0}}; + + SECTION("basic operations") { + // Must start empty + REQUIRE(!deque_1.pop_bot()); + REQUIRE(!deque_2.pop_bot()); + + // Local push/pop + deque_1.push_bot(&tasks[0]); + REQUIRE(*deque_1.pop_bot() == &tasks[0]); + REQUIRE(!deque_1.pop_bot()); - REQUIRE(counter.load() == total_tasks); + // Local push, external pop + deque_1.push_bot(&tasks[0]); + auto peek = deque_1.peek_top(); + REQUIRE(*deque_1.pop_top(&tasks[1], peek) == &tasks[0]); + REQUIRE(*external_trading_deque::get_trade_object(&tasks[0]) == &tasks[1]); + REQUIRE(!deque_1.pop_top(&tasks[1], peek)); + REQUIRE(!deque_1.pop_bot()); + + // Keeps push/pop order + deque_1.push_bot(&tasks[0]); + deque_1.push_bot(&tasks[1]); + REQUIRE(*deque_1.pop_bot() == &tasks[1]); + REQUIRE(*deque_1.pop_bot() == &tasks[0]); + REQUIRE(!deque_1.pop_bot()); + + deque_1.push_bot(&tasks[0]); + deque_1.push_bot(&tasks[1]); + auto peek1 = deque_1.peek_top(); + REQUIRE(*deque_1.pop_top(&tasks[2], peek1) == &tasks[0]); + auto peek2 = deque_1.peek_top(); + REQUIRE(*deque_1.pop_top(&tasks[3], peek2) == &tasks[1]); } - SECTION("tasks can be stolen") { - scheduler my_scheduler{&my_scheduler_memory, 8}; - my_scheduler.perform_work([&]() { - std::atomic dummy_parent{1}, overall_counter{8}; - scheduler::spawn_child(&dummy_parent, &overall_counter); + SECTION("Interwined execution #1") { + // Two top poppers + deque_1.push_bot(&tasks[0]); + auto peek1 = deque_1.peek_top(); + auto peek2 = deque_1.peek_top(); + REQUIRE(*deque_1.pop_top(&tasks[1], peek1) == &tasks[0]); + REQUIRE(!deque_1.pop_top(&tasks[2], peek2)); + } - // Required, as child operates on our stack's memory!!! - scheduler::wait_for_all(); - }); + SECTION("Interwined execution #2") { + // Top and bottom access + deque_1.push_bot(&tasks[0]); + auto peek1 = deque_1.peek_top(); + REQUIRE(*deque_1.pop_bot() == &tasks[0]); + REQUIRE(!deque_1.pop_top(&tasks[2], peek1)); } } diff --git a/test/test_helpers.h b/test/test_helpers.h new file mode 100644 index 0000000..95d3f36 --- /dev/null +++ b/test/test_helpers.h @@ -0,0 +1,52 @@ +#ifndef PLS_TEST_TEST_HELPERS_H_ +#define PLS_TEST_TEST_HELPERS_H_ + +#include +#include +#include + +#include +#include + +// Source: https://stackoverflow.com/questions/15749071/fork-and-wait-in-c +// Works well enough for our purpose. +void abort_handler(int) { + std::exit(1); +} + +template +bool CHECK_ABORT(F &&f) { + //spawn a new process + auto child_pid = fork(); + + //if the fork succeed + if (child_pid >= 0) { + + //if we are in the child process + if (child_pid == 0) { + // Re-direct the signal handlers added by catch to simply exit with an error code + std::signal(SIGABRT, abort_handler); + std::signal(SIGFPE, abort_handler); + std::signal(SIGSEGV, abort_handler); + std::signal(SIGILL, abort_handler); + + //call the lambda that we expect to abort + f(); + + //if the function didn't abort, we'll exit cleanly + std::exit(EXIT_SUCCESS); + } + } + + //determine if the child process aborted + int exit_status; + wait(&exit_status); + + //we check the exit status instead of a signal interrupt, because + //Catch is going to catch the signal and exit with an error + bool aborted = exit_status != 0; + + return aborted; +} + +#endif //PLS_TEST_TEST_HELPERS_H_