Commit ec10685a by Florian Fritz

Merge branch 'coninuation' into 'master'

Merge continuation-style into master branch

See merge request !13
parents 5bc35f9e f28cb00c
Pipeline #1438 passed with stages
in 3 minutes 18 seconds
......@@ -18,10 +18,11 @@ run_tests:
script:
./ci_scripts/run_tests.sh
run_thread_sanitizer:
stage: sanitizer
script:
./ci_scripts/run_thread_sanitizer.sh
# Disable until we can get a clang with the tsan patch for fibers on the CI server
#run_thread_sanitizer:
# stage: sanitizer
# script:
# ./ci_scripts/run_thread_sanitizer.sh
run_address_sanitizer:
stage: sanitizer
......
This diff is collapsed. Click to expand it.
cmake_minimum_required(VERSION 3.10)
project(predictable_parallel_patterns
VERSION 0.0.1
DESCRIPTION "predictable parallel patterns for scalable smart systems using work stealing")
DESCRIPTION "predictable parallel patterns for scalable smart systems using work stealing"
LANGUAGES CXX ASM)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD 17)
# seperate library and test/example executable output paths.
set(EXECUTABLE_OUTPUT_PATH ${CMAKE_BINARY_DIR}/bin)
......@@ -12,6 +13,7 @@ set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}/lib)
# specific setup code is located in individual files.
include(cmake/DisabelInSource.cmake)
include(cmake/SetupOptimizationLevel.cmake)
include(cmake/SetupAssemblyOutput.cmake)
include(cmake/SetupThreadingSupport.cmake)
include(cmake/SetupThreadSanitizer.cmake)
include(cmake/SetupAddressSanitizer.cmake)
......@@ -25,19 +27,21 @@ list(APPEND CMAKE_PREFIX_PATH "${PROJECT_SOURCE_DIR}/cmake")
# Each library has an own CMakeLists.txt that should make it avaliabale as a library target,
# thus allowing one to include it as any cmake dependency later on.
add_subdirectory(extern/catch2)
add_subdirectory(extern/picosha2)
add_subdirectory(extern/benchmark_base)
add_subdirectory(extern/benchmark_runner)
# Include all internal subprojects (library, examples, testing).
add_subdirectory(lib/context_switcher)
add_subdirectory(lib/pls)
# Include examples
add_subdirectory(app/playground)
add_subdirectory(app/test_for_new)
add_subdirectory(app/invoke_parallel)
add_subdirectory(app/benchmark_fft)
add_subdirectory(app/benchmark_unbalanced)
add_subdirectory(app/benchmark_matrix)
add_subdirectory(app/benchmark_prefix)
add_subdirectory(app/benchmark_pipeline)
add_subdirectory(app/benchmark_fib)
add_subdirectory(app/context_switch)
# Add optional tests
option(PACKAGE_TESTS "Build the tests" ON)
......
# Notes on performance measures during development
#### Commit e34ea267 - 05.12.2019 - First Version of new Algorithm - Scaling Problems
The first version of our memory trading work stealing algorithm works. It still shows scaling issues over
the hyperthreading mark, very similar to what we have seen in version 1. This indicates some sort of
contention between the threads when running the FFT algorithm.
Analyzing the current version we find issue with the frequent call to `thread_state_for(id)` in
the stealing loop.
![](./media/e34ea267_thread_state_for.png)
It is obvious that the method takes some amount of runtime, as FFT has a structure that tends to only
work on the continuations in the end of the computation (the critical path of FFT can only be executed
after most parallel tasks are done).
![](./media/e34ea267_fft_execution_pattern.png)
What we can see here is the long tail of continuations running at the end of the computation. During
this time the non working threads constantly steal, thus requiring the `thread_state_for(id)`
virtual method, potentially hindering other threads from doing their work properly.
......@@ -76,12 +76,11 @@ long fib(long n) {
```
## Project Structure
The project uses [CMAKE](https://cmake.org/) as it's build system,
the recommended IDE is either a simple text editor or [CLion](https://www.jetbrains.com/clion/).
We divide the project into subtargets to separate for the library
We divide the project into sub-targets to separate for the library
itself, testing and example code. The library itself can be found in
`lib/pls`, testing related code is in `test`, example and playground
apps are in `app`.
......@@ -114,11 +113,16 @@ Available Settings:
- Enables thread/datarace sanitizer to be linked to the executable
- Only one sanitizer can be active at once
- Enabling has a performance hit (do not use in releases)
- `-DDEBUG_SYMBOLS=ON`
- `-DDEBUG_SYMBOLS=ON/OFF`
- default OFF
- Enables the build with debug symbols
- Use for e.g. profiling the release build
Note that these settings are persistent for one CMake build folder.
If you e.g. set a flag in the debug build it will not influence
the release build, but it will persist in the debug build folder
until you explicitly change it back.
### Testing
Testing is done using [Catch2](https://github.com/catchorg/Catch2/)
......@@ -167,4 +171,8 @@ For detailed profiling of small performance hotspots we prefer
to use [Intel's VTune Amplifier](https://software.intel.com/en-us/vtune).
It gives insights in detailed microachitecture usage and performance
hotspots. Follow the instructions by Intel for using it.
Make sure to enable debug symbols (`-DDEBUG_SYMBOLS=ON`) in the
analyzed build and that all optimizations are turned on
(by choosing the release build).
add_executable(benchmark_fft main.cpp)
target_link_libraries(benchmark_fft pls)
if(EASY_PROFILER)
target_link_libraries(benchmark_fft easy_profiler)
endif()
add_executable(benchmark_fft_pls_v3 main.cpp)
target_link_libraries(benchmark_fft_pls_v3 pls benchmark_runner benchmark_base)
if (EASY_PROFILER)
target_link_libraries(benchmark_fft_pls_v3 easy_profiler)
endif ()
#include <pls/pls.h>
#include <pls/internal/helpers/profiler.h>
#include <pls/internal/helpers/mini_benchmark.h>
#include "pls/internal/scheduling/scheduler.h"
#include <iostream>
#include <complex>
#include <vector>
using namespace pls::internal::scheduling;
static constexpr int CUTOFF = 16;
static constexpr int NUM_ITERATIONS = 1000;
static constexpr int INPUT_SIZE = 8192;
typedef std::vector<std::complex<double>> complex_vector;
#include "benchmark_runner.h"
#include "benchmark_base/fft.h"
void divide(complex_vector::iterator data, int n) {
complex_vector tmp_odd_elements(n / 2);
for (int i = 0; i < n / 2; i++) {
tmp_odd_elements[i] = data[i * 2 + 1];
}
for (int i = 0; i < n / 2; i++) {
data[i] = data[i * 2];
}
for (int i = 0; i < n / 2; i++) {
data[i + n / 2] = tmp_odd_elements[i];
}
}
void combine(complex_vector::iterator data, int n) {
for (int i = 0; i < n / 2; i++) {
std::complex<double> even = data[i];
std::complex<double> odd = data[i + n / 2];
// w is the "twiddle-factor".
// this could be cached, but we run the same 'data_structures' algorithm parallel/serial,
// so it won't impact the performance comparison.
std::complex<double> w = exp(std::complex<double>(0, -2. * M_PI * i / n));
data[i] = even + w * odd;
data[i + n / 2] = even - w * odd;
}
}
using namespace comparison_benchmarks::base;
void fft(complex_vector::iterator data, int n) {
void pls_conquer(fft::complex_vector::iterator data, fft::complex_vector::iterator swap_array, int n) {
if (n < 2) {
return;
}
divide(data, n);
if (n <= CUTOFF) {
fft(data, n / 2);
fft(data + n / 2, n / 2);
fft::divide(data, swap_array, n);
if (n <= fft::RECURSIVE_CUTOFF) {
FILE* file = fopen("test.text", "w");
fprintf(file, "test %d", n);
fclose(file);
fft::conquer(data, swap_array, n / 2);
fft::conquer(data + n / 2, swap_array + n / 2, n / 2);
} else {
pls::invoke(
[&] { fft(data, n / 2); },
[&] { fft(data + n / 2, n / 2); }
);
scheduler::spawn([data, n, swap_array]() {
pls_conquer(data, swap_array, n / 2);
});
scheduler::spawn([data, n, swap_array]() {
pls_conquer(data + n / 2, swap_array + n / 2, n / 2);
});
scheduler::sync();
}
combine(data, n);
fft::combine(data, n);
}
complex_vector prepare_input(int input_size) {
std::vector<double> known_frequencies{2, 11, 52, 88, 256};
complex_vector data(input_size);
constexpr int MAX_NUM_TASKS = 32;
constexpr int MAX_STACK_SIZE = 1024 * 4;
// Set our input data to match a time series of the known_frequencies.
// When applying fft to this time-series we should find these frequencies.
for (int i = 0; i < input_size; i++) {
data[i] = std::complex<double>(0.0, 0.0);
for (auto frequencie : known_frequencies) {
data[i] += sin(2 * M_PI * frequencie * i / input_size);
}
}
int main(int argc, char **argv) {
int num_threads;
string directory;
benchmark_runner::read_args(argc, argv, num_threads, directory);
return data;
}
string test_name = to_string(num_threads) + ".csv";
string full_directory = directory + "/PLS_v3/";
benchmark_runner runner{full_directory, test_name};
fft::complex_vector data(fft::SIZE);
fft::complex_vector swap_array(fft::SIZE);
fft::fill_input(data);
int main() {
PROFILE_ENABLE
complex_vector initial_input = prepare_input(INPUT_SIZE);
scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE};
pls::internal::helpers::run_mini_benchmark([&] {
complex_vector input = initial_input;
fft(input.begin(), input.size());
}, 7, 1000);
runner.run_iterations(fft::NUM_ITERATIONS, [&]() {
scheduler.perform_work([&]() {
pls_conquer(data.begin(), swap_array.begin(), fft::SIZE);;
});
}, fft::NUM_WARMUP_ITERATIONS, [&]() {
fft::fill_input(data); // Reset data before each run
});
runner.commit_results(true);
PROFILE_SAVE("test_profile.prof")
return 0;
}
add_executable(benchmark_fib_pls_v3 main.cpp)
target_link_libraries(benchmark_fib_pls_v3 pls benchmark_runner benchmark_base)
if (EASY_PROFILER)
target_link_libraries(benchmark_fib_pls_v3 easy_profiler)
endif ()
#include "pls/internal/scheduling/scheduler.h"
using namespace pls::internal::scheduling;
#include <iostream>
#include "benchmark_runner.h"
#include "benchmark_base/fib.h"
using namespace comparison_benchmarks::base;
int pls_fib(int n) {
if (n == 0) {
return 0;
}
if (n == 1) {
return 1;
}
int a, b;
scheduler::spawn([n, &a]() {
a = pls_fib(n - 1);
});
scheduler::spawn([n, &b]() {
b = pls_fib(n - 2);
});
scheduler::sync();
return a + b;
}
constexpr int MAX_NUM_TASKS = 32;
constexpr int MAX_STACK_SIZE = 1024 * 32;
int main(int argc, char **argv) {
int num_threads;
string directory;
benchmark_runner::read_args(argc, argv, num_threads, directory);
string test_name = to_string(num_threads) + ".csv";
string full_directory = directory + "/PLS_v3/";
benchmark_runner runner{full_directory, test_name};
scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE};
volatile int res;
runner.run_iterations(fib::NUM_ITERATIONS, [&]() {
scheduler.perform_work([&]() {
res = pls_fib(fib::INPUT_N);
});
}, fib::NUM_WARMUP_ITERATIONS);
runner.commit_results(true);
return 0;
}
add_executable(benchmark_matrix main.cpp)
target_link_libraries(benchmark_matrix pls)
add_executable(benchmark_matrix_pls_v3 main.cpp)
target_link_libraries(benchmark_matrix_pls_v3 pls benchmark_runner benchmark_base)
if (EASY_PROFILER)
target_link_libraries(benchmark_matrix easy_profiler)
target_link_libraries(benchmark_matrix_pls_v3 easy_profiler)
endif ()
#include <pls/pls.h>
#include <pls/internal/helpers/profiler.h>
#include <pls/internal/helpers/mini_benchmark.h>
#include "pls/internal/scheduling/scheduler.h"
#include "pls/algorithms/for_each.h"
#include <chrono>
using namespace pls::internal::scheduling;
const int MATRIX_SIZE = 128;
#include "benchmark_runner.h"
#include "benchmark_base/matrix.h"
using namespace comparison_benchmarks::base;
template<typename T, int SIZE>
class matrix {
class pls_matrix : public matrix::matrix<T, SIZE> {
public:
T data[SIZE][SIZE];
explicit matrix(T i = 1) {
std::fill(&data[0][0], &data[0][0] + SIZE * SIZE, i);
}
pls_matrix() : matrix::matrix<T, SIZE>() {}
void multiply(const matrix<T, SIZE> &a, const matrix<T, SIZE> &b) {
pls::for_each_range(0, SIZE, [&](int i) {
void multiply(const matrix::matrix<T, SIZE> &a, const matrix::matrix<T, SIZE> &b) override {
pls::algorithm::for_each_range(0, SIZE, [&](int i) {
this->multiply_column(i, a, b);
});
}
private:
void multiply_column(int i, const matrix<T, SIZE> &a, const matrix<T, SIZE> &b) {
for (int j = 0; j < SIZE; ++j) {
data[i][j] = 0;
}
for (int k = 0; k < SIZE; ++k) {
for (int j = 0; j < SIZE; ++j) {
data[i][j] += a.data[i][k] * b.data[k][j];
}
}
}
};
void fill_with_data(matrix<double, MATRIX_SIZE> &a, matrix<double, MATRIX_SIZE> &b) {
// Fill in some data...
for (int i = 0; i < MATRIX_SIZE; i++) {
for (int j = 0; j < MATRIX_SIZE; j++) {
a.data[i][j] = i;
b.data[i][j] = j;
}
}
}
constexpr int MAX_NUM_TASKS = 32;
constexpr int MAX_STACK_SIZE = 1024 * 1;
int main() {
PROFILE_ENABLE
matrix<double, MATRIX_SIZE> a;
matrix<double, MATRIX_SIZE> b;
matrix<double, MATRIX_SIZE> result;
fill_with_data(a, b);
int main(int argc, char **argv) {
int num_threads;
string directory;
benchmark_runner::read_args(argc, argv, num_threads, directory);
pls::internal::helpers::run_mini_benchmark([&] {
result.multiply(a, b);
}, 8, 1000);
string test_name = to_string(num_threads) + ".csv";
string full_directory = directory + "/PLS_v3/";
benchmark_runner runner{full_directory, test_name};
PROFILE_SAVE("test_profile.prof")
}
pls_matrix<double, matrix::MATRIX_SIZE> a;
pls_matrix<double, matrix::MATRIX_SIZE> b;
pls_matrix<double, matrix::MATRIX_SIZE> result;
//int main() {
// PROFILE_ENABLE
// pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 18u};
// pls::scheduler scheduler{&my_scheduler_memory, 4};
//
// matrix<double, MATRIX_SIZE> a;
// matrix<double, MATRIX_SIZE> b;
// matrix<double, MATRIX_SIZE> result;
// fill_with_data(a, b);
//
// scheduler.perform_work([&] {
// auto start_time = std::chrono::high_resolution_clock::now();
// PROFILE_MAIN_THREAD
// for (int i = 0; i < 10000; i++) {
// PROFILE_WORK_BLOCK("Top Level")
// result.multiply(a, b);
// }
// auto end_time = std::chrono::high_resolution_clock::now();
// long time = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count();
// std::cout << "Runtime: " << time << "us" << std::endl;
// });
//
// PROFILE_SAVE("test_profile.prof")
//}
scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE};
runner.run_iterations(matrix::NUM_ITERATIONS, [&]() {
scheduler.perform_work([&]() {
result.multiply(a, b);
});
}, matrix::WARMUP_ITERATIONS);
runner.commit_results(true);
}
add_executable(benchmark_pipeline main.cpp)
target_link_libraries(benchmark_pipeline pls)
if (EASY_PROFILER)
target_link_libraries(benchmark_pipeline easy_profiler)
endif ()
#include <pls/pls.h>
#include <pls/dataflow/dataflow.h>
#include <pls/internal/helpers/profiler.h>
#include <pls/internal/helpers/mini_benchmark.h>
#include <iostream>
#include <complex>
#include <vector>
#include <tuple>
#include <atomic>
static constexpr int INPUT_SIZE = 8192;
typedef std::vector<std::complex<double>> complex_vector;
using namespace pls::dataflow;
void divide(complex_vector::iterator data, int n) {
complex_vector tmp_odd_elements(n / 2);
for (int i = 0; i < n / 2; i++) {
tmp_odd_elements[i] = data[i * 2 + 1];
}
for (int i = 0; i < n / 2; i++) {
data[i] = data[i * 2];
}
for (int i = 0; i < n / 2; i++) {
data[i + n / 2] = tmp_odd_elements[i];
}
}
void combine(complex_vector::iterator data, int n) {
for (int i = 0; i < n / 2; i++) {
std::complex<double> even = data[i];
std::complex<double> odd = data[i + n / 2];
// w is the "twiddle-factor".
// this could be cached, but we run the same 'data_structures' algorithm parallel/serial,
// so it won't impact the performance comparison.
std::complex<double> w = exp(std::complex<double>(0, -2. * M_PI * i / n));
data[i] = even + w * odd;
data[i + n / 2] = even - w * odd;
}
}
void fft(complex_vector::iterator data, int n) {
if (n < 2) {
return;
}
divide(data, n);
fft(data, n / 2);
fft(data + n / 2, n / 2);
combine(data, n);
}
complex_vector prepare_input(int input_size) {
std::vector<double> known_frequencies{2, 11, 52, 88, 256};
complex_vector data(input_size);
// Set our input data to match a time series of the known_frequencies.
// When applying fft to this time-series we should find these frequencies.
for (int i = 0; i < input_size; i++) {
data[i] = std::complex<double>(0.0, 0.0);
for (auto frequencie : known_frequencies) {
data[i] += sin(2 * M_PI * frequencie * i / input_size);
}
}
return data;
}
int main() {
PROFILE_ENABLE
pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 18u};
pls::scheduler scheduler{&my_scheduler_memory, 4};
graph<inputs<int>, outputs<int>> graph;
std::atomic<int> count{0};
auto lambda = [&](const int &in, int &out) {
PROFILE_WORK_BLOCK("Work Lambda")
auto tmp = in;
out = tmp;
complex_vector input = prepare_input(INPUT_SIZE);
fft(input.begin(), input.size());
count++;
};
function_node<inputs<int>, outputs<int>, decltype(lambda)> step_1{lambda};
function_node<inputs<int>, outputs<int>, decltype(lambda)> step_2{lambda};
function_node<inputs<int>, outputs<int>, decltype(lambda)> step_3{lambda};
function_node<inputs<int>, outputs<int>, decltype(lambda)> step_4{lambda};
graph >> step_1 >> step_2 >> step_3 >> step_4 >> graph;
graph.build();
const int num_elements = 10;
std::vector<std::tuple<int>> results(num_elements);
pls::internal::helpers::run_mini_benchmark([&] {
PROFILE_WORK_BLOCK("Top Level")
for (int j = 0; j < num_elements; j++) {
graph.run(std::tuple<int>{j}, &results[j]);
}
pls::scheduler::wait_for_all();
}, 8, 1000);
PROFILE_SAVE("test_profile.prof")
}
//int main() {
// PROFILE_ENABLE
// pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 18u};
// pls::scheduler scheduler{&my_scheduler_memory, 4};
//
// graph<inputs<int>, outputs<int>> graph;
// std::atomic<int> count{0};
// auto lambda = [&](const int &in, int &out) {
// PROFILE_WORK_BLOCK("Work Lambda")
// out = in;
// complex_vector input = prepare_input(INPUT_SIZE);
// fft(input.begin(), input.size());
// count++;
// };
// function_node<inputs<int>, outputs<int>, decltype(lambda)> step_1{lambda};
// function_node<inputs<int>, outputs<int>, decltype(lambda)> step_2{lambda};
// function_node<inputs<int>, outputs<int>, decltype(lambda)> step_3{lambda};
// function_node<inputs<int>, outputs<int>, decltype(lambda)> step_4{lambda};
//
// graph >> step_1 >> step_2 >> step_3 >> step_4 >> graph;
// graph.build();
//
// const int num_elements = 10;
// std::vector<std::tuple<int>> results(num_elements);
//
// scheduler.perform_work([&] {
// PROFILE_MAIN_THREAD
// for (int i = 0; i < 10; i++) {
// PROFILE_WORK_BLOCK("Top Level")
// for (int j = 0; j < num_elements; j++) {
// graph.run(std::tuple<int>{j}, &results[j]);
// }
// pls::scheduler::wait_for_all();
// }
// });
//
// std::cout << count << std::endl;
//
// PROFILE_SAVE("test_profile.prof")
//}
add_executable(benchmark_prefix main.cpp)
target_link_libraries(benchmark_prefix pls)
if (EASY_PROFILER)
target_link_libraries(benchmark_prefix easy_profiler)
endif ()
#include <pls/pls.h>
#include <pls/internal/helpers/profiler.h>
#include <pls/internal/helpers/mini_benchmark.h>
#include <iostream>
#include <vector>
#include <functional>
static constexpr int INPUT_SIZE = 10e7;
int main() {
PROFILE_ENABLE
std::vector<double> vec(INPUT_SIZE, 1);
std::vector<double> out(INPUT_SIZE);
for (int i = 0; i < INPUT_SIZE; i++) {
vec[i] = i;
}
pls::internal::helpers::run_mini_benchmark([&] {
pls::scan(vec.begin(), vec.end(), out.begin(), std::plus<double>(), 0.0);
}, 8, 1000);
PROFILE_SAVE("test_profile.prof")
}
//int main() {
// PROFILE_ENABLE
// pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 18};
// pls::scheduler scheduler{&my_scheduler_memory, 8};
//
// std::vector<double> vec(INPUT_SIZE, 1);
// std::vector<double> out(INPUT_SIZE);
//
// for (int i = 0; i < INPUT_SIZE; i++) {
// vec[i] = 1;
// }
//
// scheduler.perform_work([&] {
// PROFILE_MAIN_THREAD
// for (int i = 0; i < 100; i++) {
// pls::scan(vec.begin(), vec.end(), out.begin(), std::plus<double>(), 0.0);
// }
// });
//
// PROFILE_SAVE("test_profile.prof")
//}
add_executable(benchmark_unbalanced main.cpp node.h function_node.cpp picosha2.h)
target_link_libraries(benchmark_unbalanced pls)
add_executable(benchmark_unbalanced_pls_v3 main.cpp)
target_link_libraries(benchmark_unbalanced_pls_v3 benchmark_runner benchmark_base pls)
if (EASY_PROFILER)
target_link_libraries(benchmark_unbalanced easy_profiler)
target_link_libraries(benchmark_unbalanced_pls_v3 easy_profiler)
endif ()
#include <pls/pls.h>
#include <pls/internal/helpers/profiler.h>
#include <pls/internal/helpers/mini_benchmark.h>
#include "pls/internal/scheduling/scheduler.h"
#include "node.h"
using namespace pls::internal::scheduling;
const int SEED = 42;
const int ROOT_CHILDREN = 140;
const double Q = 0.124875;
const int NORMAL_CHILDREN = 8;
#include "benchmark_runner.h"
#include "benchmark_base/unbalanced.h"
const int NUM_NODES = 71069;
using namespace comparison_benchmarks::base;
int count_child_nodes(uts::node &node) {
int child_count = 1;
std::vector<uts::node> children = node.spawn_child_nodes();
#include <atomic>
if (children.empty()) {
return child_count;
int count_child_nodes(unbalanced::node &node) {
if (node.get_num_children() < 1) {
return 1;
}
std::vector<int> results(children.size());
for (size_t i = 0; i < children.size(); i++) {
size_t index = i;
auto lambda = [&, index] {
results[index] = count_child_nodes(children[index]);
};
using child_type = pls::lambda_task_by_value<typeof(lambda)>;
pls::scheduler::spawn_child<child_type>(lambda);
}
pls::scheduler::wait_for_all();
for (auto result : results) {
child_count += result;
std::atomic<int> count{1};
for (int i = 0; i < node.get_num_children(); i++) {
scheduler::spawn([i, &count, &node] {
unbalanced::node child_node = node.spawn_child_node(i);
count.fetch_add(count_child_nodes(child_node));
});
}
scheduler::sync();
return child_count;
return count;
}
int unbalanced_tree_search(int seed, int root_children, double q, int normal_children) {
int result;
unbalanced::node root(seed, root_children, q, normal_children);
return count_child_nodes(root);
}
auto lambda = [&] {
uts::node root(seed, root_children, q, normal_children);
result = count_child_nodes(root);
};
using child_type = pls::lambda_task_by_reference<typeof(lambda)>;
pls::scheduler::spawn_child<child_type>(lambda);
pls::scheduler::wait_for_all();
constexpr int MAX_NUM_TASKS = 256;
constexpr int MAX_STACK_SIZE = 1024 * 2;
return result;
}
int main(int argc, char **argv) {
int num_threads;
string directory;
benchmark_runner::read_args(argc, argv, num_threads, directory);
string test_name = to_string(num_threads) + ".csv";
string full_directory = directory + "/PLS_v3/";
benchmark_runner runner{full_directory, test_name};
scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE};
int main() {
PROFILE_ENABLE
pls::internal::helpers::run_mini_benchmark([&] {
unbalanced_tree_search(SEED, ROOT_CHILDREN, Q, NORMAL_CHILDREN);
}, 8, 2000);
runner.run_iterations(unbalanced::NUM_ITERATIONS, [&]() {
scheduler.perform_work([&]() {
unbalanced_tree_search(unbalanced::SEED,
unbalanced::ROOT_CHILDREN,
unbalanced::Q,
unbalanced::NORMAL_CHILDREN);
});
}, unbalanced::WARMUP_ITERATIONS);
runner.commit_results(true);
PROFILE_SAVE("test_profile.prof")
}
//int main() {
......
add_subdirectory(deboost.context)
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND CMAKE_SYSTEM_NAME STREQUAL "Linux")
SET(SWITCH_ASSEMBLY "custom_stack_callback_x86_64.s")
elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l" AND CMAKE_SYSTEM_NAME STREQUAL "Linux")
SET(SWITCH_ASSEMBLY "custom_stack_callback_arm32.s")
else ()
MESSAGE(FATAL_ERROR "Platform (${CMAKE_SYSTEM_PROCESSOR} on ${CMAKE_SYSTEM_NAME}) not supported! Please see Readme for instructions to port.")
endif ()
add_executable(context_switch
main.cpp
${SWITCH_ASSEMBLY})
# Example for adding the library to your app (as a cmake project dependency)
target_link_libraries(context_switch fcontext context_switcher)
.arm
.text
.global custom_stack_callback
.type custom_stack_callback, %function
.align 4
custom_stack_callback:
/* r0 new stack adress (passed as parameter) */
/* r4 temporary for restoring old stack (callee saved, so we get the correct value in case of a return) */
push {r4, lr} /* store the callee saved register as required and the return address */
mov r4, sp /* store current stack pointer */
mov sp, r0 /* update stack pointer to new user level stack */
bl callback /* enter next tasks (will not return if continuation is stolen) */
mov sp, r4 /* restore to the old stack pointer */
pop {r4, pc} /* restore the callee saved register as required and returns */
.file "custom_stack_callback_x86_64.s"
.text
.global custom_stack_callback
.type custom_stack_callback, @function
.align 16
custom_stack_callback:
# rdi = new stack adress (passed as parameter)
# r12 temporary for restoring old stack (callee saved, so we get the correct value in case of a return)
push %r12 # store the callee saved register as required
movq %rsp, %r12 # store current stack pointer
movq %rdi, %rsp # update stack pointer to new user level stack
call callback # enter next tasks (will not return if continuation is stolen)
movq %r12, %rsp # restore to the old stack pointer
pop %r12 # restore the callee saved register as required
ret
## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.
# User-specific files
*.suo
*.user
*.userosscache
*.sln.docstates
# User-specific files (MonoDevelop/Xamarin Studio)
*.userprefs
# Build results
[Dd]ebug/
[Dd]ebugPublic/
[Rr]elease/
[Rr]eleases/
x64/
x86/
bld/
[Bb]in/
[Oo]bj/
.build
# Visual Studio 2015 cache/options directory
.vs/
# Uncomment if you have tasks that create the project's static files in wwwroot
#wwwroot/
# MSTest test Results
[Tt]est[Rr]esult*/
[Bb]uild[Ll]og.*
# NUNIT
*.VisualState.xml
TestResult.xml
# Build Results of an ATL Project
[Dd]ebugPS/
[Rr]eleasePS/
dlldata.c
# DNX
project.lock.json
artifacts/
*_i.c
*_p.c
*_i.h
*.ilk
*.meta
*.obj
*.pch
*.pdb
*.pgc
*.pgd
*.rsp
*.sbr
*.tlb
*.tli
*.tlh
*.tmp
*.tmp_proj
*.log
*.vspscc
*.vssscc
.builds
*.pidb
*.svclog
*.scc
# Chutzpah Test files
_Chutzpah*
# Visual C++ cache files
ipch/
*.aps
*.ncb
*.opendb
*.opensdf
*.sdf
*.cachefile
# Visual Studio profiler
*.psess
*.vsp
*.vspx
*.sap
# TFS 2012 Local Workspace
$tf/
# Guidance Automation Toolkit
*.gpState
# ReSharper is a .NET coding add-in
_ReSharper*/
*.[Rr]e[Ss]harper
*.DotSettings.user
# JustCode is a .NET coding add-in
.JustCode
# TeamCity is a build add-in
_TeamCity*
# DotCover is a Code Coverage Tool
*.dotCover
# NCrunch
_NCrunch_*
.*crunch*.local.xml
nCrunchTemp_*
# MightyMoose
*.mm.*
AutoTest.Net/
# Web workbench (sass)
.sass-cache/
# Installshield output folder
[Ee]xpress/
# DocProject is a documentation generator add-in
DocProject/buildhelp/
DocProject/Help/*.HxT
DocProject/Help/*.HxC
DocProject/Help/*.hhc
DocProject/Help/*.hhk
DocProject/Help/*.hhp
DocProject/Help/Html2
DocProject/Help/html
# Click-Once directory
publish/
# Publish Web Output
*.[Pp]ublish.xml
*.azurePubxml
# TODO: Comment the next line if you want to checkin your web deploy settings
# but database connection strings (with potential passwords) will be unencrypted
*.pubxml
*.publishproj
# NuGet Packages
*.nupkg
# The packages folder can be ignored because of Package Restore
**/packages/*
# except build/, which is used as an MSBuild target.
!**/packages/build/
# Uncomment if necessary however generally it will be regenerated when needed
#!**/packages/repositories.config
# NuGet v3's project.json files produces more ignoreable files
*.nuget.props
*.nuget.targets
# Microsoft Azure Build Output
csx/
*.build.csdef
# Microsoft Azure Emulator
ecf/
rcf/
# Microsoft Azure ApplicationInsights config file
ApplicationInsights.config
# Windows Store app package directory
AppPackages/
BundleArtifacts/
# Visual Studio cache files
# files ending in .cache can be ignored
*.[Cc]ache
# but keep track of directories ending in .cache
!*.[Cc]ache/
# Others
ClientBin/
~$*
*~
*.dbmdl
*.dbproj.schemaview
*.pfx
*.publishsettings
node_modules/
orleans.codegen.cs
# RIA/Silverlight projects
Generated_Code/
# Backup & report files from converting an old project file
# to a newer Visual Studio version. Backup files are not needed,
# because we have git ;-)
_UpgradeReport_Files/
Backup*/
UpgradeLog*.XML
UpgradeLog*.htm
# SQL Server files
*.mdf
*.ldf
# Business Intelligence projects
*.rdl.data
*.bim.layout
*.bim_*.settings
# Microsoft Fakes
FakesAssemblies/
# GhostDoc plugin setting file
*.GhostDoc.xml
# Node.js Tools for Visual Studio
.ntvs_analysis.dat
# Visual Studio 6 build log
*.plg
# Visual Studio 6 workspace options file
*.opt
# Visual Studio LightSwitch build output
**/*.HTMLClient/GeneratedArtifacts
**/*.DesktopClient/GeneratedArtifacts
**/*.DesktopClient/ModelManifest.xml
**/*.Server/GeneratedArtifacts
**/*.Server/ModelManifest.xml
_Pvt_Extensions
# Paket dependency manager
.paket/paket.exe
# FAKE - F# Make
.fake/
# PROJECT: fcontext
cmake_minimum_required(VERSION 3.0)
project(fcontext C)
if (NOT CMAKE_MODULE_PATH)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
endif ()
if (MSVC)
enable_language(CXX ASM_MASM)
else ()
enable_language(CXX ASM)
endif ()
if (MSVC)
add_definitions(-D_ITERATOR_DEBUG_LEVEL=0)
add_definitions(-D_HAS_EXCEPTIONS=0)
endif ()
add_definitions(-DBOOST_CONTEXT_EXPORT=)
set(HEADER "include/fcontext/fcontext.h")
set(SOURCES "source/stack.c")
# OS
if (APPLE)
set(CPU_ARCH "combined")
set(ASM_EXT "all_macho_gas.S")
elseif (ANDROID)
# Android
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm")
set(CPU_ARCH "arm")
set(ASM_EXT "aapcs_elf_gas.S")
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
set(CPU_ARCH "arm64")
set(ASM_EXT "aapcs_elf_gas.S")
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "i686")
set(CPU_ARCH "i386")
set(ASM_EXT "sysv_elf_gas.S")
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64")
set(CPU_ARCH "x86_64")
set(ASM_EXT "sysv_elf_gas.S")
endif ()
elseif (UNIX)
# PC (x86/x64)
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l")
set(CPU_ARCH "arm")
set(ASM_EXT "aapcs_elf_gas.S") # Untested, but should work for linux/unix
elseif (CMAKE_SIZEOF_VOID_P EQUAL 8)
set(CPU_ARCH "x86_64")
set(ASM_EXT "sysv_elf_gas.S") # Linux/Unix
else ()
set(CPU_ARCH "i386")
set(ASM_EXT "sysv_elf_gas.S") # Linux/Unix
endif ()
elseif (WIN32)
# Windows PC
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
set(CPU_ARCH "x86_64")
else ()
set(CPU_ARCH "i386")
endif ()
set(ASM_EXT "ms_pe_masm.asm")
endif ()
set(ASM_SOURCES "asm/make_${CPU_ARCH}_${ASM_EXT}"
"asm/jump_${CPU_ARCH}_${ASM_EXT}"
"asm/ontop_${CPU_ARCH}_${ASM_EXT}")
add_library(fcontext STATIC ${SOURCES} ${ASM_SOURCES})
target_include_directories(fcontext
PRIVATE include/fcontext
INTERFACE include)
set_property(TARGET fcontext PROPERTY INTERPROCEDURAL_OPTIMIZATION FALSE)
set_target_properties(fcontext PROPERTIES FOLDER Deps ${IOS_GENERAL_PROPERTIES})
install(TARGETS fcontext DESTINATION lib)
install(FILES ${HEADER} DESTINATION include/fcontext)
The MIT License (MIT)
Copyright (c) 2016 Sepehr Taghdisian
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
## deboost.context
"Deboostified" version of boost.context (coroutines), Plain and simple C API for context switching. Easy build on multiple platforms.
### Build
#### Currently supported platforms
- Windows (x86_64, Win32)
- Linux (x86_64/x86)
- OSX (x86_64/x86)
- Android (ARM/x86/ARM64/x86_64)
- iOS (Arm64, Arm7, x86_64, i386)
#### iOS
I've made an extra xcode project files for iOS ```projects/xcode/fcontext``` because I didn't know how to set different ASM files for each ARM architecture in cmake. So If you know how to do it, I'd be happy if you tell me.
So, you can use the included toolchain file or use your own, just define _IOS_ to include the xcode project instead of generating it with cmake.
```
cd deboost.context
mkdir .build
cd .build
cmake .. -DCMAKE_TOOLCHAIN_FILE=../cmake/ios.toolchain.cmake -G Xcode
```
### Usage
Link your program with fcontext.lib/libfcontext.a and include the file _fcontext.h_.
See _include/fcontext/fcontext.h_ for API usage.
More info is available at: [boost.context](http://www.boost.org/doc/libs/1_60_0/libs/context/doc/html/index.html)
### Credits
- Boost.context: This library uses the code from boost.context [github](https://github.com/boostorg/context)
### Thanks
- Ali Salehi [github](https://github.com/lordhippo)
/*
Copyright Edward Nevill + Oliver Kowalke 2015
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
/*******************************************************
* *
* ------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* ------------------------------------------------- *
* | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| *
* ------------------------------------------------- *
* | x19 | x20 | x21 | x22 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
* ------------------------------------------------- *
* | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| *
* ------------------------------------------------- *
* | x23 | x24 | x25 | x26 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | *
* ------------------------------------------------- *
* | 0x40| 0x44| 0x48| 0x4c| 0x50| 0x54| 0x58| 0x5c| *
* ------------------------------------------------- *
* | x27 | x28 | FP | LR | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | *
* ------------------------------------------------- *
* | 0x60| 0x64| 0x68| 0x6c| 0x70| 0x74| 0x78| 0x7c| *
* ------------------------------------------------- *
* | PC | align | | | *
* ------------------------------------------------- *
* *
*******************************************************/
.cpu generic+fp+simd
.text
.align 2
.global jump_fcontext
.type jump_fcontext, %function
jump_fcontext:
# prepare stack for GP + FPU
sub sp, sp, #0x70
# save x19-x30
stp x19, x20, [sp, #0x00]
stp x21, x22, [sp, #0x10]
stp x23, x24, [sp, #0x20]
stp x25, x26, [sp, #0x30]
stp x27, x28, [sp, #0x40]
stp x29, x30, [sp, #0x50]
# save LR as PC
str x30, [sp, #0x60]
# store RSP (pointing to context-data) in X0
mov x4, sp
# restore RSP (pointing to context-data) from X1
mov sp, x0
# load x19-x30
ldp x19, x20, [sp, #0x00]
ldp x21, x22, [sp, #0x10]
ldp x23, x24, [sp, #0x20]
ldp x25, x26, [sp, #0x30]
ldp x27, x28, [sp, #0x40]
ldp x29, x30, [sp, #0x50]
# return transfer_t from jump
# pass transfer_t as first arg in context function
# X0 == FCTX, X1 == DATA
mov x0, x4
# load pc
ldr x4, [sp, #0x60]
# restore stack from GP + FPU
add sp, sp, #0x70
ret x4
.size jump_fcontext,.-jump_fcontext
# Mark that we don't need executable stack.
.section .note.GNU-stack,"",%progbits
/*******************************************************
* *
* ------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* ------------------------------------------------- *
* | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| *
* ------------------------------------------------- *
* | x19 | x20 | x21 | x22 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
* ------------------------------------------------- *
* | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| *
* ------------------------------------------------- *
* | x23 | x24 | x25 | x26 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | *
* ------------------------------------------------- *
* | 0x40| 0x44| 0x48| 0x4c| 0x50| 0x54| 0x58| 0x5c| *
* ------------------------------------------------- *
* | x27 | x28 | FP | LR | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | *
* ------------------------------------------------- *
* | 0x60| 0x64| 0x68| 0x6c| 0x70| 0x74| 0x78| 0x7c| *
* ------------------------------------------------- *
* | PC | align | | | *
* ------------------------------------------------- *
* *
*******************************************************/
.text
.globl _jump_fcontext
.balign 16
_jump_fcontext:
; prepare stack for GP + FPU
sub sp, sp, #0x70
; save x19-x30
stp x19, x20, [sp, #0x00]
stp x21, x22, [sp, #0x10]
stp x23, x24, [sp, #0x20]
stp x25, x26, [sp, #0x30]
stp x27, x28, [sp, #0x40]
stp fp, lr, [sp, #0x50]
; save LR as PC
str lr, [sp, #0x60]
; store RSP (pointing to context-data) in X0
mov x4, sp
; restore RSP (pointing to context-data) from X1
mov sp, x0
; load x19-x30
ldp x19, x20, [sp, #0x00]
ldp x21, x22, [sp, #0x10]
ldp x23, x24, [sp, #0x20]
ldp x25, x26, [sp, #0x30]
ldp x27, x28, [sp, #0x40]
ldp fp, lr, [sp, #0x50]
; return transfer_t from jump
; pass transfer_t as first arg in context function
; X0 == FCTX, X1 == DATA
mov x0, x4
; load pc
ldr x4, [sp, #0x60]
; restore stack from GP + FPU
add sp, sp, #0x70
ret x4
/*
Copyright Oliver Kowalke 2009.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
/*******************************************************
* *
* ------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* ------------------------------------------------- *
* | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| *
* ------------------------------------------------- *
* |hiddn| v1 | v2 | v3 | v4 | v5 | v6 | v7 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
* ------------------------------------------------- *
* | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| *
* ------------------------------------------------- *
* | v8 | lr | pc | FCTX| DATA| | *
* ------------------------------------------------- *
* *
*******************************************************/
.text
.globl jump_fcontext
.align 2
.type jump_fcontext,%function
jump_fcontext:
@ save LR as PC
push {lr}
@ save hidden,V1-V8,LR
push {a1,v1-v8,lr}
@ store RSP (pointing to context-data) in A1
mov a1, sp
@ restore RSP (pointing to context-data) from A2
mov sp, a2
@ restore hidden,V1-V8,LR
pop {a4,v1-v8,lr}
@ return transfer_t from jump
str a1, [a4, #0]
str a3, [a4, #4]
@ pass transfer_t as first arg in context function
@ A1 == FCTX, A2 == DATA
mov a2, a3
@ restore PC
pop {pc}
.size jump_fcontext,.-jump_fcontext
@ Mark that we don't need executable stack.
.section .note.GNU-stack,"",%progbits
/*
Copyright Oliver Kowalke 2009.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
/*******************************************************
* *
* ------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* ------------------------------------------------- *
* | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| *
* ------------------------------------------------- *
* | sjlj|hiddn| v1 | v2 | v3 | v4 | v5 | v6 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
* ------------------------------------------------- *
* | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| *
* ------------------------------------------------- *
* | v7 | v8 | lr | pc | FCTX| DATA| | *
* ------------------------------------------------- *
* *
*******************************************************/
.text
.globl _jump_fcontext
.align 2
_jump_fcontext:
@ save LR as PC
push {lr}
@ save hidden,V1-V8,LR
push {a1,v1-v8,lr}
@ locate TLS to save/restore SjLj handler
mrc p15, 0, v2, c13, c0, #3
bic v2, v2, #3
@ load TLS[__PTK_LIBC_DYLD_Unwind_SjLj_Key]
ldr v1, [v2, #8]
@ save SjLj handler
push {v1}
@ store RSP (pointing to context-data) in A1
mov a1, sp
@ restore RSP (pointing to context-data) from A2
mov sp, a2
@ r#estore SjLj handler
pop {v1}
@ store SjLj handler in TLS
str v1, [v2, #8]
@ restore hidden,V1-V8,LR
pop {a4,v1-v8,lr}
@ return transfer_t from jump
str a1, [a4, #0]
str a3, [a4, #4]
@ pass transfer_t as first arg in context function
@ A1 == FCTX, A2 == DATA
mov a2, a3
@ restore PC
pop {pc}
;/*
; Copyright Oliver Kowalke 2009.
; Distributed under the Boost Software License, Version 1.0.
; (See accompanying file LICENSE_1_0.txt or copy at
; http://www.boost.org/LICENSE_1_0.txt)
;*/
; *******************************************************
; * *
; * ------------------------------------------------- *
; * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
; * ------------------------------------------------- *
; * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| *
; * ------------------------------------------------- *
; * |deall|limit| base|hiddn| v1 | v2 | v3 | v4 | *
; * ------------------------------------------------- *
; * ------------------------------------------------- *
; * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
; * ------------------------------------------------- *
; * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| *
; * ------------------------------------------------- *
; * | v5 | v6 | v7 | v8 | lr | pc | FCTX| DATA| *
; * ------------------------------------------------- *
; * *
; *******************************************************
AREA |.text|, CODE
ALIGN 4
EXPORT jump_fcontext
jump_fcontext PROC
; save LR as PC
push {lr}
; save hidden,V1-V8,LR
push {a1,v1-v8,lr}
; load TIB to save/restore thread size and limit.
; we do not need preserve CPU flag and can use it's arg register
mrc p15, #0, v1, c13, c0, #2
; save current stack base
ldr a5, [v1, #0x04]
push {a5}
; save current stack limit
ldr a5, [v1, #0x08]
push {a5}
; save current deallocation stack
ldr a5, [v1, #0xe0c]
push {a5}
; store RSP (pointing to context-data) in A1
mov a1, sp
; restore RSP (pointing to context-data) from A2
mov sp, a2
; restore deallocation stack
pop {a5}
str a5, [v1, #0xe0c]
; restore stack limit
pop {a5}
str a5, [v1, #0x08]
; restore stack base
pop {a5}
str a5, [v1, #0x04]
; restore hidden,V1-V8,LR
pop {a4,v1-v8,lr}
; return transfer_t from jump
str a1, [a4, #0]
str a3, [a4, #4]
; pass transfer_t as first arg in context function
; A1 == FCTX, A2 == DATA
mov a2, a3
; restore PC
pop {pc}
ENDP
END
/*
Copyright Sergue E. Leontiev 2013.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
// Stub file for universal binary
#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)
#if defined(__aarch64__)
#include "jump_arm64_aapcs_macho_gas.S"
#else
#include "jump_arm_aapcs_macho_gas.S"
#endif
#else
#if defined(__i386__)
#include "jump_i386_sysv_macho_gas.S"
#elif defined(__x86_64__)
#include "jump_x86_64_sysv_macho_gas.S"
#elif defined(__ppc__)
#include "jump_ppc32_sysv_macho_gas.S"
#elif defined(__ppc64__)
#include "jump_ppc64_sysv_macho_gas.S"
#else
#error "No arch's"
#endif
#endif
\ No newline at end of file
/*
Copyright Sergue E. Leontiev 2013.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
// Stub file for universal binary
#if defined(__i386__)
#include "jump_i386_sysv_macho_gas.S"
#elif defined(__x86_64__)
#include "jump_x86_64_sysv_macho_gas.S"
#elif defined(__ppc__)
#include "jump_ppc32_sysv_macho_gas.S"
#elif defined(__ppc64__)
#include "jump_ppc64_sysv_macho_gas.S"
#else
#error "No arch's"
#endif
/*
Copyright Oliver Kowalke 2009.
Copyright Thomas Sailer 2013.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
/*************************************************************************************
* --------------------------------------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* --------------------------------------------------------------------------------- *
* | 0h | 04h | 08h | 0ch | 010h | 014h | 018h | 01ch | *
* --------------------------------------------------------------------------------- *
* | fc_strg |fc_deallo| limit | base | fc_seh | EDI | ESI | EBX | *
* --------------------------------------------------------------------------------- *
* --------------------------------------------------------------------------------- *
* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
* --------------------------------------------------------------------------------- *
* | 020h | 024h | 028h | 02ch | 030h | 034h | 038h | 03ch | *
* --------------------------------------------------------------------------------- *
* | EBP | EIP | to | data | | EH NXT |SEH HNDLR| | *
* --------------------------------------------------------------------------------- *
*************************************************************************************/
.file "jump_i386_ms_pe_gas.asm"
.text
.p2align 4,,15
.globl _jump_fcontext
.def _jump_fcontext; .scl 2; .type 32; .endef
_jump_fcontext:
pushl %ebp /* save EBP */
pushl %ebx /* save EBX */
pushl %esi /* save ESI */
pushl %edi /* save EDI */
/* load NT_TIB */
movl %fs:(0x18), %edx
/* load current SEH exception list */
movl (%edx), %eax
push %eax
/* load current stack base */
movl 0x04(%edx), %eax
push %eax
/* load current stack limit */
movl 0x08(%edx), %eax
push %eax
/* load current dealloction stack */
movl 0xe0c(%edx), %eax
push %eax
/* load fiber local storage */
movl 0x10(%edx), %eax
push %eax
/* store ESP (pointing to context-data) in EAX */
movl %esp, %eax
/* first arg of jump_fcontext() == fcontext to jump to */
movl 0x28(%esp), %ecx
/* restore ESP (pointing to context-data) from EDX */
movl %ecx, %esp
/* load NT_TIB into ECX */
movl %fs:(0x18), %edx
/* restore fiber local storage */
popl %ecx
movl %ecx, 0x10(%edx)
/* restore current deallocation stack */
popl %ecx
movl %ecx, 0xe0c(%edx)
/* restore current stack limit */
popl %ecx
movl %ecx, 0x08(%edx)
/* restore current stack base */
popl %ecx
movl %ecx, 0x04(%edx)
/* restore current SEH exception list */
popl %ecx
movl %ecx, (%edx)
popl %edi /* save EDI */
popl %esi /* save ESI */
popl %ebx /* save EBX */
popl %ebp /* save EBP */
/* return transfer_t */
/* FCTX == EAX, DATA == EDX */
movl 0x2c(%eax), %edx
/* jump to context */
ret
.section .drectve
.ascii " -export:\"jump_fcontext\""
; Copyright Oliver Kowalke 2009.
; Distributed under the Boost Software License, Version 1.0.
; (See accompanying file LICENSE_1_0.txt or copy at
; http://www.boost.org/LICENSE_1_0.txt)
; ---------------------------------------------------------------------------------
; | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
; ---------------------------------------------------------------------------------
; | 0h | 04h | 08h | 0ch | 010h | 014h | 018h | 01ch |
; ---------------------------------------------------------------------------------
; | fc_strg |fc_deallo| limit | base | fc_seh | EDI | ESI | EBX |
; ---------------------------------------------------------------------------------
; ---------------------------------------------------------------------------------
; | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 |
; ---------------------------------------------------------------------------------
; | 020h | 024h | 028h | 02ch | 030h | 034h | 038h | 03ch |
; ---------------------------------------------------------------------------------
; | EBP | EIP | to | data | | EH NXT |SEH HNDLR| |
; ---------------------------------------------------------------------------------
.386
.XMM
.model flat, c
.code
jump_fcontext PROC BOOST_CONTEXT_EXPORT
push ebp ; save EBP
push ebx ; save EBX
push esi ; save ESI
push edi ; save EDI
assume fs:nothing
; load NT_TIB into ECX
mov edx, fs:[018h]
assume fs:error
; load current SEH exception list
mov eax, [edx]
push eax
; load current stack base
mov eax, [edx+04h]
push eax
; load current stack limit
mov eax, [edx+08h]
push eax
; load current deallocation stack
mov eax, [edx+0e0ch]
push eax
; load fiber local storage
mov eax, [edx+010h]
push eax
; store ESP (pointing to context-data) in EAX
mov eax, esp
; firstarg of jump_fcontext() == fcontext to jump to
mov ecx, [esp+028h]
; restore ESP (pointing to context-data) from EAX
mov esp, ecx
assume fs:nothing
; load NT_TIB into EDX
mov edx, fs:[018h]
assume fs:error
; restore fiber local storage
pop ecx
mov [edx+010h], ecx
; restore current deallocation stack
pop ecx
mov [edx+0e0ch], ecx
; restore current stack limit
pop ecx
mov [edx+08h], ecx
; restore current stack base
pop ecx
mov [edx+04h], ecx
; restore current SEH exception list
pop ecx
mov [edx], ecx
pop edi ; save EDI
pop esi ; save ESI
pop ebx ; save EBX
pop ebp ; save EBP
; return transfer_t
; FCTX == EAX, DATA == EDX
mov edx, [eax+02ch]
; jump to context
ret
jump_fcontext ENDP
END
/*
Copyright Oliver Kowalke 2009.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
/*****************************************************************************************
* *
* ----------------------------------------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* ----------------------------------------------------------------------------------- *
* | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | *
* ----------------------------------------------------------------------------------- *
* | EDI | ESI | EBX | EBP | EIP | hidden | to | data | *
* ----------------------------------------------------------------------------------- *
* *
*****************************************************************************************/
.text
.globl jump_fcontext
.align 2
.type jump_fcontext,@function
jump_fcontext:
pushl %ebp /* save EBP */
pushl %ebx /* save EBX */
pushl %esi /* save ESI */
pushl %edi /* save EDI */
/* store fcontext_t in ECX */
movl %esp, %ecx
/* first arg of jump_fcontext() == fcontext to jump to */
movl 0x18(%esp), %eax
/* second arg of jump_fcontext() == data to be transferred */
movl 0x1c(%esp), %edx
/* restore ESP (pointing to context-data) from EAX */
movl %eax, %esp
/* address of returned transport_t */
movl 0x14(%esp), %eax
/* return parent fcontext_t */
movl %ecx, (%eax)
/* return data */
movl %edx, 0x4(%eax)
popl %edi /* restore EDI */
popl %esi /* restore ESI */
popl %ebx /* restore EBX */
popl %ebp /* restore EBP */
/* jump to context */
ret $4
.size jump_fcontext,.-jump_fcontext
/* Mark that we don't need executable stack. */
.section .note.GNU-stack,"",%progbits
/*
Copyright Oliver Kowalke 2009.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
/*****************************************************************************************
* *
* ----------------------------------------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* ----------------------------------------------------------------------------------- *
* | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | *
* ----------------------------------------------------------------------------------- *
* | EDI | ESI | EBX | EBP | EIP | hidden | to | data | *
* ----------------------------------------------------------------------------------- *
* *
*****************************************************************************************/
.text
.globl _jump_fcontext
.align 2
_jump_fcontext:
pushl %ebp /* save EBP */
pushl %ebx /* save EBX */
pushl %esi /* save ESI */
pushl %edi /* save EDI */
/* store fcontext_t in ECX */
movl %esp, %ecx
/* first arg of jump_fcontext() == context jumping to */
movl 0x18(%esp), %eax
/* second arg of jump_fcontext() == data to be transferred */
movl 0x1c(%esp), %edx
/* restore ESP (pointing to context-data) from EAX */
movl %eax, %esp
/* address of returned transport_t */
movl 0x14(%esp), %eax
/* return parent fcontext_t */
movl %ecx, (%eax)
/* return data */
movl %edx, 0x4(%eax)
popl %edi /* restore EDI */
popl %esi /* restore ESI */
popl %ebx /* restore EBX */
popl %ebp /* restore EBP */
/* jump to context */
ret $4
/*
Copyright Sergue E. Leontiev 2013.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
// Stub file for universal binary
#if defined(__i386__)
#include "jump_i386_sysv_macho_gas.S"
#elif defined(__x86_64__)
#include "jump_x86_64_sysv_macho_gas.S"
#else
#error "No arch's"
#endif
/*
Copyright Oliver Kowalke 2009.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
/*******************************************************
* *
* ------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* ------------------------------------------------- *
* | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | *
* ------------------------------------------------- *
* | S0 | S1 | S2 | S3 | S4 | S5 | S6 | S7 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
* ------------------------------------------------- *
* | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | *
* ------------------------------------------------- *
* | FP |hiddn| RA | PC | GP | FCTX| DATA| | *
* ------------------------------------------------- *
* *
* *****************************************************/
.text
.globl jump_fcontext
.align 2
.type jump_fcontext,@function
.ent jump_fcontext
jump_fcontext:
# reserve space on stack
addiu $sp, $sp, -112
sw $s0, ($sp) # save S0
sw $s1, 4($sp) # save S1
sw $s2, 8($sp) # save S2
sw $s3, 12($sp) # save S3
sw $s4, 16($sp) # save S4
sw $s5, 20($sp) # save S5
sw $s6, 24($sp) # save S6
sw $s7, 28($sp) # save S7
sw $fp, 32($sp) # save FP
sw $a0, 36($sp) # save hidden, address of returned transfer_t
sw $ra, 40($sp) # save RA
sw $ra, 44($sp) # save RA as PC
# store SP (pointing to context-data) in A0
move $a0, $sp
# restore SP (pointing to context-data) from A1
move $sp, $a1
lw $s0, ($sp) # restore S0
lw $s1, 4($sp) # restore S1
lw $s2, 8($sp) # restore S2
lw $s3, 12($sp) # restore S3
lw $s4, 16($sp) # restore S4
lw $s5, 20($sp) # restore S5
lw $s6, 24($sp) # restore S6
lw $s7, 28($sp) # restore S7
lw $fp, 32($sp) # restore FP
lw $t0, 36($sp) # restore hidden, address of returned transfer_t
lw $ra, 40($sp) # restore RA
# load PC
lw $t9, 44($sp)
# adjust stack
addiu $sp, $sp, 112
# return transfer_t from jump
sw $a0, ($t0) # fctx of transfer_t
sw $a1, 4($t0) # data of transfer_t
# pass transfer_t as first arg in context function
# A0 == fctx, A1 == data
move $a1, $a2
# jump to context
jr $t9
.end jump_fcontext
.size jump_fcontext, .-jump_fcontext
/* Mark that we don't need executable stack. */
.section .note.GNU-stack,"",%progbits
/*
Copyright Sergue E. Leontiev 2013.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
// Stub file for universal binary
#if defined(__ppc__)
#include "jump_ppc32_sysv_macho_gas.S"
#elif defined(__ppc64__)
#include "jump_ppc64_sysv_macho_gas.S"
#else
#error "No arch's"
#endif
/*
Copyright Oliver Kowalke 2009.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
/*******************************************************
* *
* ------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* ------------------------------------------------- *
* | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | *
* ------------------------------------------------- *
* | R13 | R14 | R15 | R16 | R17 | R18 | R19 | R20 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
* ------------------------------------------------- *
* | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | *
* ------------------------------------------------- *
* | R21 | R22 | R23 | R24 | R25 | R26 | R27 | R28 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | *
* ------------------------------------------------- *
* | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | *
* ------------------------------------------------- *
* | R29 | R30 | R31 |hiddn| CR | LR | PC | FCTX| *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | *
* ------------------------------------------------- *
* | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | *
* ------------------------------------------------- *
* | DATA| | | | | *
* ------------------------------------------------- *
* *
*******************************************************/
.text
.globl jump_fcontext
.align 2
.type jump_fcontext,@function
jump_fcontext:
# reserve space on stack
subi %r1, %r1, 92
stw %r13, 0(%r1) # save R13
stw %r14, 4(%r1) # save R14
stw %r15, 8(%r1) # save R15
stw %r16, 12(%r1) # save R16
stw %r17, 16(%r1) # save R17
stw %r18, 20(%r1) # save R18
stw %r19, 24(%r1) # save R19
stw %r20, 28(%r1) # save R20
stw %r21, 32(%r1) # save R21
stw %r22, 36(%r1) # save R22
stw %r23, 40(%r1) # save R23
stw %r24, 44(%r1) # save R24
stw %r25, 48(%r1) # save R25
stw %r26, 52(%r1) # save R26
stw %r27, 56(%r1) # save R27
stw %r28, 60(%r1) # save R28
stw %r29, 64(%r1) # save R29
stw %r30, 68(%r1) # save R30
stw %r31, 72(%r1) # save R31
stw %r3, 76(%r1) # save hidden
# save CR
mfcr %r0
stw %r0, 80(%r1)
# save LR
mflr %r0
stw %r0, 84(%r1)
# save LR as PC
stw %r0, 88(%r1)
# store RSP (pointing to context-data) in R6
mr %r6, %r1
# restore RSP (pointing to context-data) from R4
mr %r1, %r4
lwz %r13, 0(%r1) # restore R13
lwz %r14, 4(%r1) # restore R14
lwz %r15, 8(%r1) # restore R15
lwz %r16, 12(%r1) # restore R16
lwz %r17, 16(%r1) # restore R17
lwz %r18, 20(%r1) # restore R18
lwz %r19, 24(%r1) # restore R19
lwz %r20, 28(%r1) # restore R20
lwz %r21, 32(%r1) # restore R21
lwz %r22, 36(%r1) # restore R22
lwz %r23, 40(%r1) # restore R23
lwz %r24, 44(%r1) # restore R24
lwz %r25, 48(%r1) # restore R25
lwz %r26, 52(%r1) # restore R26
lwz %r27, 56(%r1) # restore R27
lwz %r28, 60(%r1) # restore R28
lwz %r29, 64(%r1) # restore R29
lwz %r30, 68(%r1) # restore R30
lwz %r31, 72(%r1) # restore R31
lwz %r3, 76(%r1) # restore hidden
# restore CR
lwz %r0, 80(%r1)
mtcr %r0
# restore LR
lwz %r0, 84(%r1)
mtlr %r0
# load PC
lwz %r0, 88(%r1)
# restore CTR
mtctr %r0
# adjust stack
addi %r1, %r1, 92
# return transfer_t
stw %r6, 0(%r3)
stw %r5, 4(%r3)
# jump to context
bctr
.size jump_fcontext, .-jump_fcontext
/* Mark that we don't need executable stack. */
.section .note.GNU-stack,"",%progbits
/*
Copyright Oliver Kowalke 2009.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
/*******************************************************
* *
* ------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* ------------------------------------------------- *
* | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | *
* ------------------------------------------------- *
* | R13 | R14 | R15 | R16 | R17 | R18 | R19 | R20 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
* ------------------------------------------------- *
* | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | *
* ------------------------------------------------- *
* | R21 | R22 | R23 | R24 | R25 | R26 | R27 | R28 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | *
* ------------------------------------------------- *
* | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | *
* ------------------------------------------------- *
* | R29 | R30 | R31 |hiddn| CR | LR | PC | FCTX| *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | *
* ------------------------------------------------- *
* | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | *
* ------------------------------------------------- *
* | DATA| | | | | *
* ------------------------------------------------- *
* *
*******************************************************/
.text
.globl _jump_fcontext
.align 2
_jump_fcontext:
; reserve space on stack
subi r1, r1, 92
stw r13, 0(r1) # save R13
stw r14, 4(r1) # save R14
stw r15, 8(r1) # save R15
stw r16, 12(r1) # save R16
stw r17, 16(r1) # save R17
stw r18, 20(r1) # save R18
stw r19, 24(r1) # save R19
stw r20, 28(r1) # save R20
stw r21, 32(r1) # save R21
stw r22, 36(r1) # save R22
stw r23, 40(r1) # save R23
stw r24, 44(r1) # save R24
stw r25, 48(r1) # save R25
stw r26, 52(r1) # save R26
stw r27, 56(r1) # save R27
stw r28, 60(r1) # save R28
stw r29, 64(r1) # save R29
stw r30, 68(r1) # save R30
stw r31, 72(r1) # save R31
stw r3, 76(r1) # save hidden
# save CR
mfcr r0
stw r0, 80(r1)
# save LR
mflr r0
stw r0, 84(r1)
# save LR as PC
stw r0, 88(r1)
# store RSP (pointing to context-data) in R6
mr r6, r1
# restore RSP (pointing to context-data) from R4
mr r1, r4
lwz r13, 0(r1) # restore R13
lwz r14, 4(r1) # restore R14
lwz r15, 8(r1) # restore R15
lwz r16, 12(r1) # restore R16
lwz r17, 16(r1) # restore R17
lwz r18, 20(r1) # restore R18
lwz r19, 24(r1) # restore R19
lwz r20, 28(r1) # restore R20
lwz r21, 32(r1) # restore R21
lwz r22, 36(r1) # restore R22
lwz r23, 40(r1) # restore R23
lwz r24, 44(r1) # restore R24
lwz r25, 48(r1) # restore R25
lwz r26, 52(r1) # restore R26
lwz r27, 56(r1) # restore R27
lwz r28, 60(r1) # restore R28
lwz r29, 64(r1) # restore R29
lwz r30, 68(r1) # restore R30
lwz r31, 72(r1) # restore R31
lwz r3, 76(r1) # restore hidden
# restore CR
lwz r0, 80(r1)
mtcr r0
# restore LR
lwz r0, 84(r1)
mtlr r0
# load PC
lwz r0, 88(r1)
# restore CTR
mtctr r0
# adjust stack
addi r1, r1, 92
# return transfer_t
stw r6, 0(r3)
stw r5, 4(r3)
# jump to context
bctr
.globl .jump_fcontext
.globl jump_fcontext[DS]
.align 2
.csect jump_fcontext[DS]
jump_fcontext:
.long .jump_fcontext
.jump_fcontext:
# reserve space on stack
subi 1, 1, 92
stw 13, 0(1) # save R13
stw 14, 4(1) # save R14
stw 15, 8(1) # save R15
stw 16, 12(1) # save R16
stw 17, 16(1) # save R17
stw 18, 20(1) # save R18
stw 19, 24(1) # save R19
stw 20, 28(1) # save R20
stw 21, 32(1) # save R21
stw 22, 36(1) # save R22
stw 23, 40(1) # save R23
stw 24, 44(1) # save R24
stw 25, 48(1) # save R25
stw 26, 52(1) # save R26
stw 27, 56(1) # save R27
stw 28, 60(1) # save R28
stw 29, 64(1) # save R29
stw 30, 68(1) # save R30
stw 31, 72(1) # save R31
stw 3, 76(1) # save hidden
# save CR
mfcr 0
stw 0, 80(1)
# save LR
mflr 0
stw 0, 84(1)
# save LR as PC
stw 0, 88(1)
# store RSP (pointing to context-data) in R6
mr 6, 1
# restore RSP (pointing to context-data) from R4
mr 1, 4
lwz 13, 0(1) # restore R13
lwz 14, 4(1) # restore R14
lwz 15, 8(1) # restore R15
lwz 16, 12(1) # restore R16
lwz 17, 16(1) # restore R17
lwz 18, 20(1) # restore R18
lwz 19, 24(1) # restore R19
lwz 20, 28(1) # restore R20
lwz 21, 32(1) # restore R21
lwz 22, 36(1) # restore R22
lwz 23, 40(1) # restore R23
lwz 24, 44(1) # restore R24
lwz 25, 48(1) # restore R25
lwz 26, 52(1) # restore R26
lwz 27, 56(1) # restore R27
lwz 28, 60(1) # restore R28
lwz 29, 64(1) # restore R29
lwz 30, 68(1) # restore R30
lwz 31, 72(1) # restore R31
lwz 3, 76(1) # restore hidden
# restore CR
lwz 0, 80(1)
mtcr 0
# restore LR
lwz 0, 84(1)
mtlr 0
# load PC
lwz 0, 88(1)
# restore CTR
mtctr 0
# adjust stack
addi 1, 1, 92
# return transfer_t
stw 6, 0(3)
stw 5, 4(3)
# jump to context
bctr
/*
Copyright Oliver Kowalke 2009.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
/*******************************************************
* *
* ------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* ------------------------------------------------- *
* | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | *
* ------------------------------------------------- *
* | TOC | R14 | R15 | R16 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
* ------------------------------------------------- *
* | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | *
* ------------------------------------------------- *
* | R17 | R18 | R19 | R20 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | *
* ------------------------------------------------- *
* | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | *
* ------------------------------------------------- *
* | R21 | R22 | R23 | R24 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | *
* ------------------------------------------------- *
* | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | *
* ------------------------------------------------- *
* | R25 | R26 | R27 | R28 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | *
* ------------------------------------------------- *
* | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | *
* ------------------------------------------------- *
* | R29 | R30 | R31 | hidden | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | *
* ------------------------------------------------- *
* | 160 | 164 | 168 | 172 | 176 | 180 | 184 | 188 | *
* ------------------------------------------------- *
* | CR | LR | PC | back-chain| *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | *
* ------------------------------------------------- *
* | 192 | 196 | 200 | 204 | 208 | 212 | 216 | 220 | *
* ------------------------------------------------- *
* | cr saved | lr saved | compiler | linker | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | *
* ------------------------------------------------- *
* | 224 | 228 | 232 | 236 | 240 | 244 | 248 | 252 | *
* ------------------------------------------------- *
* | TOC saved | FCTX | DATA | | *
* ------------------------------------------------- *
* *
*******************************************************/
.globl jump_fcontext
#if _CALL_ELF == 2
.text
.align 2
jump_fcontext:
addis %r2, %r12, .TOC.-jump_fcontext@ha
addi %r2, %r2, .TOC.-jump_fcontext@l
.localentry jump_fcontext, . - jump_fcontext
#else
.section ".opd","aw"
.align 3
jump_fcontext:
# ifdef _CALL_LINUX
.quad .L.jump_fcontext,.TOC.@tocbase,0
.type jump_fcontext,@function
.text
.align 2
.L.jump_fcontext:
# else
.hidden .jump_fcontext
.globl .jump_fcontext
.quad .jump_fcontext,.TOC.@tocbase,0
.size jump_fcontext,24
.type .jump_fcontext,@function
.text
.align 2
.jump_fcontext:
# endif
#endif
# reserve space on stack
subi %r1, %r1, 184
#if _CALL_ELF != 2
std %r2, 0(%r1) # save TOC
#endif
std %r14, 8(%r1) # save R14
std %r15, 16(%r1) # save R15
std %r16, 24(%r1) # save R16
std %r17, 32(%r1) # save R17
std %r18, 40(%r1) # save R18
std %r19, 48(%r1) # save R19
std %r20, 56(%r1) # save R20
std %r21, 64(%r1) # save R21
std %r22, 72(%r1) # save R22
std %r23, 80(%r1) # save R23
std %r24, 88(%r1) # save R24
std %r25, 96(%r1) # save R25
std %r26, 104(%r1) # save R26
std %r27, 112(%r1) # save R27
std %r29, 120(%r1) # save R28
std %r29, 128(%r1) # save R29
std %r30, 136(%r1) # save R30
std %r31, 144(%r1) # save R31
std %r3, 152(%r1) # save hidden
# save CR
mfcr %r0
std %r0, 160(%r1)
# save LR
mflr %r0
std %r0, 168(%r1)
# save LR as PC
std %r0, 176(%r1)
# store RSP (pointing to context-data) in R6
mr %r6, %r1
# restore RSP (pointing to context-data) from R4
mr %r1, %r4
#if _CALL_ELF != 2
ld %r2, 0(%r1) # restore TOC
#endif
ld %r14, 8(%r1) # restore R14
ld %r15, 16(%r1) # restore R15
ld %r16, 24(%r1) # restore R16
ld %r17, 32(%r1) # restore R17
ld %r18, 40(%r1) # restore R18
ld %r19, 48(%r1) # restore R19
ld %r20, 56(%r1) # restore R20
ld %r21, 64(%r1) # restore R21
ld %r22, 72(%r1) # restore R22
ld %r23, 80(%r1) # restore R23
ld %r24, 88(%r1) # restore R24
ld %r25, 96(%r1) # restore R25
ld %r26, 104(%r1) # restore R26
ld %r27, 112(%r1) # restore R27
ld %r28, 120(%r1) # restore R28
ld %r29, 128(%r1) # restore R29
ld %r30, 136(%r1) # restore R30
ld %r31, 144(%r1) # restore R31
ld %r3, 152(%r1) # restore hidden
# restore CR
ld %r0, 160(%r1)
mtcr %r0
# restore LR
ld %r0, 168(%r1)
mtlr %r0
# load PC
ld %r12, 176(%r1)
# restore CTR
mtctr %r12
# adjust stack
addi %r1, %r1, 184
# return transfer_t
std %r6, 0(%r3)
std %r5, 8(%r3)
# jump to context
bctr
#if _CALL_ELF == 2
.size jump_fcontext, .-jump_fcontext
#else
# ifdef _CALL_LINUX
.size .jump_fcontext, .-.L.jump_fcontext
# else
.size .jump_fcontext, .-.jump_fcontext
# endif
#endif
/* Mark that we don't need executable stack. */
.section .note.GNU-stack,"",%progbits
/*
Copyright Oliver Kowalke 2009.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
/*******************************************************
* *
* ------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* ------------------------------------------------- *
* | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | *
* ------------------------------------------------- *
* | TOC | R14 | R15 | R16 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
* ------------------------------------------------- *
* | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | *
* ------------------------------------------------- *
* | R17 | R18 | R19 | R20 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | *
* ------------------------------------------------- *
* | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | *
* ------------------------------------------------- *
* | R21 | R22 | R23 | R24 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | *
* ------------------------------------------------- *
* | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | *
* ------------------------------------------------- *
* | R25 | R26 | R27 | R28 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | *
* ------------------------------------------------- *
* | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | *
* ------------------------------------------------- *
* | R29 | R30 | R31 | hidden | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | *
* ------------------------------------------------- *
* | 160 | 164 | 168 | 172 | 176 | 180 | 184 | 188 | *
* ------------------------------------------------- *
* | CR | LR | PC | back-chain| *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | *
* ------------------------------------------------- *
* | 192 | 196 | 200 | 204 | 208 | 212 | 216 | 220 | *
* ------------------------------------------------- *
* | cr saved | lr saved | compiler | linker | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | *
* ------------------------------------------------- *
* | 224 | 228 | 232 | 236 | 240 | 244 | 248 | 252 | *
* ------------------------------------------------- *
* | TOC saved | FCTX | DATA | | *
* ------------------------------------------------- *
* *
*******************************************************/
.text
.align 2
.globl jump_fcontext
_jump_fcontext:
; reserve space on stack
subi r1, r1, 184
std %r14, 8(%r1) ; save R14
std %r15, 16(%r1) ; save R15
std %r16, 24(%r1) ; save R16
std %r17, 32(%r1) ; save R17
std %r18, 40(%r1) ; save R18
std %r19, 48(%r1) ; save R19
std %r20, 56(%r1) ; save R20
std %r21, 64(%r1) ; save R21
std %r22, 72(%r1) ; save R22
std %r23, 80(%r1) ; save R23
std %r24, 88(%r1) ; save R24
std %r25, 96(%r1) ; save R25
std %r26, 104(%r1) ; save R26
std %r27, 112(%r1) ; save R27
std %r29, 120(%r1) ; save R28
std %r29, 128(%r1) ; save R29
std %r30, 136(%r1) ; save R30
std %r31, 144(%r1) ; save R31
std %r3, 152(%r1) ; save hidden
; save CR
mfcr r0
std r0, 160(r1)
; save LR
mflr r0
std r0, 168(r1)
; save LR as PC
std r0, 176(r1)
; store RSP (pointing to context-data) in R6
mr %r6, %r1
; restore RSP (pointing to context-data) from R4
mr r1, r4
ld %r14, 8(%r1) ; restore R14
ld %r15, 16(%r1) ; restore R15
ld %r16, 24(%r1) ; restore R16
ld %r17, 32(%r1) ; restore R17
ld %r18, 40(%r1) ; restore R18
ld %r19, 48(%r1) ; restore R19
ld %r20, 56(%r1) ; restore R20
ld %r21, 64(%r1) ; restore R21
ld %r22, 72(%r1) ; restore R22
ld %r23, 80(%r1) ; restore R23
ld %r24, 88(%r1) ; restore R24
ld %r25, 96(%r1) ; restore R25
ld %r26, 104(%r1) ; restore R26
ld %r27, 112(%r1) ; restore R27
ld %r28, 120(%r1) ; restore R28
ld %r29, 128(%r1) ; restore R29
ld %r30, 136(%r1) ; restore R30
ld %r31, 144(%r1) ; restore R31
ld %r3, 152(%r1) ; restore hidden
; restore CR
ld r0, 160(r1)
mtcr r0
; restore LR
ld r0, 168(r1)
mtlr r0
; load PC
ld r0, 176(r1)
; restore CTR
mtctr r0
; adjust stack
addi r1, r1, 184
; return transfer_t
std %r6, 0(%r3)
std %r5, 8(%r3)
; jump to context
bctr
.align 2
.globl .jump_fcontext
.jump_fcontext:
# reserve space on stack
subi 1, 1, 184
std 13, 0(1) # save R13
std 14, 8(1) # save R14
std 15, 16(1) # save R15
std 16, 24(1) # save R16
std 17, 32(1) # save R17
std 18, 40(1) # save R18
std 19, 48(1) # save R19
std 20, 56(1) # save R20
std 21, 64(1) # save R21
std 22, 72(1) # save R22
std 23, 80(1) # save R23
std 24, 88(1) # save R24
std 25, 96(1) # save R25
std 26, 104(1) # save R26
std 27, 112(1) # save R27
std 29, 120(1) # save R28
std 29, 128(1) # save R29
std 30, 136(1) # save R30
std 31, 144(1) # save R31
std 3, 152(1) # save hidden
# save CR
mfcr 0
std 0, 160(1)
# save LR
mflr 0
std 0, 168(1)
# save LR as PC
std 0, 176(1)
# store RSP (pointing to context-data) in R6
mr 6, 1
# restore RSP (pointing to context-data) from R4
mr 1, 4
ld 13, 0(1) # restore R13
ld 14, 8(1) # restore R14
ld 15, 16(1) # restore R15
ld 16, 24(1) # restore R16
ld 17, 32(1) # restore R17
ld 18, 40(1) # restore R18
ld 19, 48(1) # restore R19
ld 20, 56(1) # restore R20
ld 21, 64(1) # restore R21
ld 22, 72(1) # restore R22
ld 23, 80(1) # restore R23
ld 24, 88(1) # restore R24
ld 25, 96(1) # restore R25
ld 26, 104(1) # restore R26
ld 27, 112(1) # restore R27
ld 28, 120(1) # restore R28
ld 29, 128(1) # restore R29
ld 30, 136(1) # restore R30
ld 31, 144(1) # restore R31
ld 3, 152(1) # restore hidden
# restore CR
ld 0, 160(1)
mtcr 0
# restore LR
ld 0, 168(1)
mtlr 0
# load PC
ld 0, 176(1)
# restore CTR
mtctr 0
# adjust stack
addi 1, 1, 184
# return transfer_t
std 6, 0(3)
std 5, 8(3)
# jump to context
bctr
/*
Copyright Oliver Kowalke 2009.
Copyright Thomas Sailer 2013.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
/**************************************************************************************
* *
* ---------------------------------------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* ---------------------------------------------------------------------------------- *
* | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | *
* ---------------------------------------------------------------------------------- *
* | fbr_strg | fc_dealloc | limit | base | *
* ---------------------------------------------------------------------------------- *
* ---------------------------------------------------------------------------------- *
* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
* ---------------------------------------------------------------------------------- *
* | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | *
* ---------------------------------------------------------------------------------- *
* | R12 | R13 | R14 | R15 | *
* ---------------------------------------------------------------------------------- *
* ---------------------------------------------------------------------------------- *
* | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | *
* ---------------------------------------------------------------------------------- *
* | 0xe40 | 0x44 | 0x48 | 0x4c | 0x50 | 0x54 | 0x58 | 0x5c | *
* ---------------------------------------------------------------------------------- *
* | RDI | RSI | RBX | RBP | *
* ---------------------------------------------------------------------------------- *
* ---------------------------------------------------------------------------------- *
* | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | *
* ---------------------------------------------------------------------------------- *
* | 0x60 | 0x64 | 0x68 | 0x6c | 0x70 | 0x74 | 0x78 | 0x7c | *
* ---------------------------------------------------------------------------------- *
* | hidden | RIP | EXIT | parameter area | *
* ---------------------------------------------------------------------------------- *
* ---------------------------------------------------------------------------------- *
* | 32 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | *
* ---------------------------------------------------------------------------------- *
* | 0x80 | 0x84 | 0x88 | 0x8c | 0x90 | 0x94 | 0x98 | 0x9c | *
* ---------------------------------------------------------------------------------- *
* | parameter area | FCTX | *
* ---------------------------------------------------------------------------------- *
* ---------------------------------------------------------------------------------- *
* | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | *
* ---------------------------------------------------------------------------------- *
* | 0xa0 | 0xa4 | 0xa8 | 0xac | 0xb0 | 0xb4 | 0xb8 | 0xbc | *
* ---------------------------------------------------------------------------------- *
* | DATA | | | | *
* ---------------------------------------------------------------------------------- *
* *
* ***********************************************************************************/
.file "jump_x86_64_ms_pe_gas.asm"
.text
.p2align 4,,15
.globl jump_fcontext
.def jump_fcontext; .scl 2; .type 32; .endef
.seh_proc jump_fcontext
jump_fcontext:
.seh_endprologue
pushq %rcx /* save hidden address of transport_t */
pushq %rbp /* save RBP */
pushq %rbx /* save RBX */
pushq %rsi /* save RSI */
pushq %rdi /* save RDI */
pushq %r15 /* save R15 */
pushq %r14 /* save R14 */
pushq %r13 /* save R13 */
pushq %r12 /* save R12 */
/* load NT_TIB */
movq %gs:(0x30), %r10
/* save current stack base */
movq 0x08(%r10), %rax
pushq %rax
/* save current stack limit */
movq 0x10(%r10), %rax
pushq %rax
/* save current deallocation stack */
movq 0x1478(%r10), %rax
pushq %rax
/* save fiber local storage */
movq 0x18(%r10), %rax
pushq %rax
/* preserve RSP (pointing to context-data) in R9 */
movq %rsp, %r9
/* restore RSP (pointing to context-data) from RDX */
movq %rdx, %rsp
/* load NT_TIB */
movq %gs:(0x30), %r10
/* restore fiber local storage */
popq %rax
movq %rax, 0x18(%r10)
/* restore deallocation stack */
popq %rax
movq %rax, 0x1478(%r10)
/* restore stack limit */
popq %rax
movq %rax, 0x10(%r10)
/* restore stack base */
popq %rax
movq %rax, 0x8(%r10)
popq %r12 /* restore R12 */
popq %r13 /* restore R13 */
popq %r14 /* restore R14 */
popq %r15 /* restore R15 */
popq %rdi /* restore RDI */
popq %rsi /* restore RSI */
popq %rbx /* restore RBX */
popq %rbp /* restore RBP */
popq %rax /* restore hidden address of transport_t */
/* restore return-address */
popq %r10
/* transport_t returned in RAX */
/* return parent fcontext_t */
movq %r9, (%rax)
/* return data */
movq %r8, 0x8(%rax)
/* transport_t as 1.arg of context-function */
movq %rax, %rcx
/* indirect jump to context */
jmp *%r10
.seh_endproc
.section .drectve
.ascii " -export:\"jump_fcontext\""
; Copyright Oliver Kowalke 2009.
; Distributed under the Boost Software License, Version 1.0.
; (See accompanying file LICENSE_1_0.txt or copy at
; http://www.boost.org/LICENSE_1_0.txt)
; ----------------------------------------------------------------------------------
; | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
; ----------------------------------------------------------------------------------
; | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c |
; ----------------------------------------------------------------------------------
; | fbr_strg | fc_dealloc | limit | base |
; ----------------------------------------------------------------------------------
; ----------------------------------------------------------------------------------
; | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 |
; ----------------------------------------------------------------------------------
; | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c |
; ----------------------------------------------------------------------------------
; | R12 | R13 | R14 | R15 |
; ----------------------------------------------------------------------------------
; ----------------------------------------------------------------------------------
; | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 |
; ----------------------------------------------------------------------------------
; | 0xe40 | 0x44 | 0x48 | 0x4c | 0x50 | 0x54 | 0x58 | 0x5c |
; ----------------------------------------------------------------------------------
; | RDI | RSI | RBX | RBP |
; ----------------------------------------------------------------------------------
; ----------------------------------------------------------------------------------
; | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 |
; ----------------------------------------------------------------------------------
; | 0x60 | 0x64 | 0x68 | 0x6c | 0x70 | 0x74 | 0x78 | 0x7c |
; ----------------------------------------------------------------------------------
; | hidden | RIP | EXIT | parameter area |
; ----------------------------------------------------------------------------------
; ----------------------------------------------------------------------------------
; | 32 | 32 | 33 | 34 | 35 | 36 | 37 | 38 |
; ----------------------------------------------------------------------------------
; | 0x80 | 0x84 | 0x88 | 0x8c | 0x90 | 0x94 | 0x98 | 0x9c |
; ----------------------------------------------------------------------------------
; | parameter area | FCTX |
; ----------------------------------------------------------------------------------
; ----------------------------------------------------------------------------------
; | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 |
; ----------------------------------------------------------------------------------
; | 0xa0 | 0xa4 | 0xa8 | 0xac | 0xb0 | 0xb4 | 0xb8 | 0xbc |
; ----------------------------------------------------------------------------------
; | DATA | | | |
; ----------------------------------------------------------------------------------
.code
jump_fcontext PROC BOOST_CONTEXT_EXPORT FRAME
.endprolog
push rcx ; save hidden address of transport_t
push rbp ; save RBP
push rbx ; save RBX
push rsi ; save RSI
push rdi ; save RDI
push r15 ; save R15
push r14 ; save R14
push r13 ; save R13
push r12 ; save R12
; load NT_TIB
mov r10, gs:[030h]
; save current stack base
mov rax, [r10+08h]
push rax
; save current stack limit
mov rax, [r10+010h]
push rax
; save current deallocation stack
mov rax, [r10+01478h]
push rax
; save fiber local storage
mov rax, [r10+018h]
push rax
; preserve RSP (pointing to context-data) in R9
mov r9, rsp
; restore RSP (pointing to context-data) from RDX
mov rsp, rdx
; load NT_TIB
mov r10, gs:[030h]
; restore fiber local storage
pop rax
mov [r10+018h], rax
; restore deallocation stack
pop rax
mov [r10+01478h], rax
; restore stack limit
pop rax
mov [r10+010h], rax
; restore stack base
pop rax
mov [r10+08h], rax
pop r12 ; restore R12
pop r13 ; restore R13
pop r14 ; restore R14
pop r15 ; restore R15
pop rdi ; restore RDI
pop rsi ; restore RSI
pop rbx ; restore RBX
pop rbp ; restore RBP
pop rax ; restore hidden address of transport_t
; restore return-address
pop r10
; transport_t returned in RAX
; return parent fcontext_t
mov [rax], r9
; return data
mov [rax+08h], r8
; transport_t as 1.arg of context-function
mov rcx, rax
; indirect jump to context
jmp r10
jump_fcontext ENDP
END
/*
Copyright Oliver Kowalke 2009.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
/****************************************************************************************
* *
* ---------------------------------------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* ---------------------------------------------------------------------------------- *
* | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | *
* ---------------------------------------------------------------------------------- *
* | R12 | R13 | R14 | R15 | *
* ---------------------------------------------------------------------------------- *
* ---------------------------------------------------------------------------------- *
* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
* ---------------------------------------------------------------------------------- *
* | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | *
* ---------------------------------------------------------------------------------- *
* | RBX | RBP | RIP | EXIT | *
* ---------------------------------------------------------------------------------- *
* *
****************************************************************************************/
.text
.globl jump_fcontext
.type jump_fcontext,@function
.align 16
jump_fcontext:
pushq %rbp /* save RBP */
pushq %rbx /* save RBX */
pushq %r15 /* save R15 */
pushq %r14 /* save R14 */
pushq %r13 /* save R13 */
pushq %r12 /* save R12 */
/* store RSP (pointing to context-data) in RAX */
movq %rsp, %rax
/* restore RSP (pointing to context-data) from RDI */
movq %rdi, %rsp
popq %r12 /* restrore R12 */
popq %r13 /* restrore R13 */
popq %r14 /* restrore R14 */
popq %r15 /* restrore R15 */
popq %rbx /* restrore RBX */
popq %rbp /* restrore RBP */
/* restore return-address */
popq %r8
/* return transfer_t from jump */
/* RAX == fctx, RDX == data */
movq %rsi, %rdx
/* pass transfer_t as first arg in context function */
/* RDI == fctx, RSI == data */
movq %rax, %rdi
/* indirect jump to context */
jmp *%r8
.size jump_fcontext,.-jump_fcontext
/* Mark that we don't need executable stack. */
.section .note.GNU-stack,"",%progbits
/*
Copyright Oliver Kowalke 2009.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
/****************************************************************************************
* *
* ---------------------------------------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* ---------------------------------------------------------------------------------- *
* | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | *
* ---------------------------------------------------------------------------------- *
* | R12 | R13 | R14 | R15 | *
* ---------------------------------------------------------------------------------- *
* ---------------------------------------------------------------------------------- *
* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
* ---------------------------------------------------------------------------------- *
* | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | *
* ---------------------------------------------------------------------------------- *
* | RBX | RBP | RIP | EXIT | *
* ---------------------------------------------------------------------------------- *
* *
****************************************************************************************/
.text
.globl _jump_fcontext
.align 8
_jump_fcontext:
pushq %rbp /* save RBP */
pushq %rbx /* save RBX */
pushq %r15 /* save R15 */
pushq %r14 /* save R14 */
pushq %r13 /* save R13 */
pushq %r12 /* save R12 */
/* store RSP (pointing to context-data) in RAX */
movq %rsp, %rax
/* restore RSP (pointing to context-data) from RDI */
movq %rdi, %rsp
popq %r12 /* restrore R12 */
popq %r13 /* restrore R13 */
popq %r14 /* restrore R14 */
popq %r15 /* restrore R15 */
popq %rbx /* restrore RBX */
popq %rbp /* restrore RBP */
/* restore return-address */
popq %r8
/* return transfer_t from jump */
/* RAX == fctx, RDX == data */
movq %rsi, %rdx
/* pass transfer_t as first arg in context function */
/* RDI == fctx, RSI == data */
movq %rax, %rdi
/* indirect jump to context */
jmp *%r8
/*
Copyright Edward Nevill + Oliver Kowalke 2015
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
/*******************************************************
* *
* ------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* ------------------------------------------------- *
* | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| *
* ------------------------------------------------- *
* | x19 | x20 | x21 | x22 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
* ------------------------------------------------- *
* | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| *
* ------------------------------------------------- *
* | x23 | x24 | x25 | x26 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | *
* ------------------------------------------------- *
* | 0x40| 0x44| 0x48| 0x4c| 0x50| 0x54| 0x58| 0x5c| *
* ------------------------------------------------- *
* | x27 | x28 | FP | LR | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | *
* ------------------------------------------------- *
* | 0x60| 0x64| 0x68| 0x6c| 0x70| 0x74| 0x78| 0x7c| *
* ------------------------------------------------- *
* | PC | align | | | *
* ------------------------------------------------- *
* *
*******************************************************/
.cpu generic+fp+simd
.text
.align 2
.global make_fcontext
.type make_fcontext, %function
make_fcontext:
# shift address in x0 (allocated stack) to lower 16 byte boundary
and x0, x0, ~0xF
# reserve space for context-data on context-stack
sub x0, x0, #0x70
# third arg of make_fcontext() == address of context-function
# store address as a PC to jump in
str x2, [x0, #0x60]
# save address of finish as return-address for context-function
# will be entered after context-function returns (LR register)
adr x1, finish
str x1, [x0, #0x58]
ret x30 // return pointer to context-data (x0)
finish:
# exit code is zero
mov x0, #0
# exit application
bl _exit
.size make_fcontext,.-make_fcontext
# Mark that we don't need executable stack.
.section .note.GNU-stack,"",%progbits
/*******************************************************
* *
* ------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* ------------------------------------------------- *
* | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| *
* ------------------------------------------------- *
* | x19 | x20 | x21 | x22 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
* ------------------------------------------------- *
* | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| *
* ------------------------------------------------- *
* | x23 | x24 | x25 | x26 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | *
* ------------------------------------------------- *
* | 0x40| 0x44| 0x48| 0x4c| 0x50| 0x54| 0x58| 0x5c| *
* ------------------------------------------------- *
* | x27 | x28 | FP | LR | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | *
* ------------------------------------------------- *
* | 0x60| 0x64| 0x68| 0x6c| 0x70| 0x74| 0x78| 0x7c| *
* ------------------------------------------------- *
* | PC | align | | | *
* ------------------------------------------------- *
* *
*******************************************************/
.text
.globl _make_fcontext
.balign 16
_make_fcontext:
; shift address in x0 (allocated stack) to lower 16 byte boundary
and x0, x0, ~0xF
; reserve space for context-data on context-stack
sub x0, x0, #0x70
; third arg of make_fcontext() == address of context-function
; store address as a PC to jump in
str x2, [x0, #0x60]
; compute abs address of label finish
; 0x0c = 3 instructions * size (4) before label 'finish'
; TODO: Numeric offset since llvm still does not support labels in ADR. Fix:
; http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20140407/212336.html
adr x1, 0x0c
; save address of finish as return-address for context-function
; will be entered after context-function returns (LR register)
str x1, [x0, #0x58]
ret lr ; return pointer to context-data (x0)
finish:
; exit code is zero
mov x0, #0
; exit application
bl __exit
/*
Copyright Oliver Kowalke 2009.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
/*******************************************************
* *
* ------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* ------------------------------------------------- *
* | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| *
* ------------------------------------------------- *
* |hiddn| v1 | v2 | v3 | v4 | v5 | v6 | v7 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
* ------------------------------------------------- *
* | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| *
* ------------------------------------------------- *
* | v8 | lr | pc | FCTX| DATA| | *
* ------------------------------------------------- *
* *
*******************************************************/
.text
.globl make_fcontext
.align 2
.type make_fcontext,%function
make_fcontext:
@ shift address in A1 to lower 16 byte boundary
bic a1, a1, #15
@ reserve space for context-data on context-stack
sub a1, a1, #60
@ third arg of make_fcontext() == address of context-function
str a3, [a1, #40]
@ compute address of returned transfer_t
add a2, a1, #44
mov a3, a2
str a3, [a1, #0]
@ compute abs address of label finish
adr a2, finish
@ save address of finish as return-address for context-function
@ will be entered after context-function returns
str a2, [a1, #36]
bx lr @ return pointer to context-data
finish:
@ exit code is zero
mov a1, #0
@ exit application
bl _exit@PLT
.size make_fcontext,.-make_fcontext
@ Mark that we don't need executable stack.
.section .note.GNU-stack,"",%progbits
/*
Copyright Oliver Kowalke 2009.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
/*******************************************************
* *
* ------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* ------------------------------------------------- *
* | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| *
* ------------------------------------------------- *
* | sjlj|hiddn| v1 | v2 | v3 | v4 | v5 | v6 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
* ------------------------------------------------- *
* | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| *
* ------------------------------------------------- *
* | v7 | v8 | lr | pc | FCTX| DATA| | *
* ------------------------------------------------- *
* *
*******************************************************/
.text
.globl _make_fcontext
.align 2
_make_fcontext:
@ shift address in A1 to lower 16 byte boundary
bic a1, a1, #15
@ reserve space for context-data on context-stack
sub a1, a1, #64
@ third arg of make_fcontext() == address of context-function
str a3, [a1, #44]
@ compute address of returned transfer_t
add a2, a1, #48
mov a3, a2
str a3, [a1, #4]
@ compute abs address of label finish
adr a2, finish
@ save address of finish as return-address for context-function
@ will be entered after context-function returns
str a2, [a1, #40]
bx lr @ return pointer to context-data
finish:
@ exit code is zero
mov a1, #0
@ exit application
bl __exit
;/*
; Copyright Oliver Kowalke 2009.
; Distributed under the Boost Software License, Version 1.0.
; (See accompanying file LICENSE_1_0.txt or copy at
; http://www.boost.org/LICENSE_1_0.txt)
;*/
; *******************************************************
; * *
; * ------------------------------------------------- *
; * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
; * ------------------------------------------------- *
; * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| *
; * ------------------------------------------------- *
; * |deall|limit| base|hiddn| v1 | v2 | v3 | v4 | *
; * ------------------------------------------------- *
; * ------------------------------------------------- *
; * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
; * ------------------------------------------------- *
; * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| *
; * ------------------------------------------------- *
; * | v5 | v6 | v7 | v8 | lr | pc | FCTX| DATA| *
; * ------------------------------------------------- *
; * *
; *******************************************************
AREA |.text|, CODE
ALIGN 4
EXPORT make_fcontext
IMPORT _exit
make_fcontext PROC
; first arg of make_fcontext() == top of context-stack
; save top of context-stack (base) A4
mov a4, a1
; shift address in A1 to lower 16 byte boundary
bic a1, a1, #0x0f
; reserve space for context-data on context-stack
sub a1, a1, #0x48
; save top address of context_stack as 'base'
str a4, [a1, #0x8]
; second arg of make_fcontext() == size of context-stack
; compute bottom address of context-stack (limit)
sub a4, a4, a2
; save bottom address of context-stack as 'limit'
str a4, [a1, #0x4]
; save bottom address of context-stack as 'dealloction stack'
str a4, [a1, #0x0]
; third arg of make_fcontext() == address of context-function
str a3, [a1, #0x34]
; compute address of returned transfer_t
add a2, a1, #0x38
mov a3, a2
str a3, [a1, #0xc]
; compute abs address of label finish
adr a2, finish
; save address of finish as return-address for context-function
; will be entered after context-function returns
str a2, [a1, #0x30]
bx lr ; return pointer to context-data
finish
; exit code is zero
mov a1, #0
; exit application
bl _exit
ENDP
END
/*
Copyright Sergue E. Leontiev 2013.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
// Stub file for universal binary
#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)
#if defined(__aarch64__)
#include "make_arm64_aapcs_macho_gas.S"
#else
#include "make_arm_aapcs_macho_gas.S"
#endif
#else
#if defined(__i386__)
#include "make_i386_sysv_macho_gas.S"
#elif defined(__x86_64__)
#include "make_x86_64_sysv_macho_gas.S"
#elif defined(__ppc__)
#include "make_ppc32_sysv_macho_gas.S"
#elif defined(__ppc64__)
#include "make_ppc64_sysv_macho_gas.S"
#else
#error "No arch's"
#endif
#endif
/*
Copyright Sergue E. Leontiev 2013.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
// Stub file for universal binary
#if defined(__i386__)
#include "make_i386_sysv_macho_gas.S"
#elif defined(__x86_64__)
#include "make_x86_64_sysv_macho_gas.S"
#elif defined(__ppc__)
#include "make_ppc32_sysv_macho_gas.S"
#elif defined(__ppc64__)
#include "make_ppc64_sysv_macho_gas.S"
#else
#error "No arch's"
#endif
/*
Copyright Oliver Kowalke 2009.
Copyright Thomas Sailer 2013.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
/*************************************************************************************
* --------------------------------------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* --------------------------------------------------------------------------------- *
* | 0h | 04h | 08h | 0ch | 010h | 014h | 018h | 01ch | *
* --------------------------------------------------------------------------------- *
* | fc_strg |fc_deallo| limit | base | fc_seh | EDI | ESI | EBX | *
* --------------------------------------------------------------------------------- *
* --------------------------------------------------------------------------------- *
* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
* --------------------------------------------------------------------------------- *
* | 020h | 024h | 028h | 02ch | 030h | 034h | 038h | 03ch | *
* --------------------------------------------------------------------------------- *
* | EBP | EIP | to | data | | EH NXT |SEH HNDLR| | *
* --------------------------------------------------------------------------------- *
*************************************************************************************/
.file "make_i386_ms_pe_gas.asm"
.text
.p2align 4,,15
.globl _make_fcontext
.def _make_fcontext; .scl 2; .type 32; .endef
_make_fcontext:
/* first arg of make_fcontext() == top of context-stack */
movl 0x04(%esp), %eax
/* reserve space for first argument of context-function */
/* EAX might already point to a 16byte border */
leal -0x08(%eax), %eax
/* shift address in EAX to lower 16 byte boundary */
andl $-16, %eax
/* reserve space for context-data on context-stack */
/* size for fc_mxcsr .. EIP + return-address for context-function */
/* on context-function entry: (ESP -0x4) % 8 == 0 */
/* additional space is required for SEH */
leal -0x48(%eax), %eax
/* first arg of make_fcontext() == top of context-stack */
movl 0x04(%esp), %ecx
/* save top address of context stack as 'base' */
movl %ecx, 0xc(%eax)
/* second arg of make_fcontext() == size of context-stack */
movl 0x08(%esp), %edx
/* negate stack size for LEA instruction (== substraction) */
negl %edx
/* compute bottom address of context stack (limit) */
leal (%ecx,%edx), %ecx
/* save bottom address of context-stack as 'limit' */
movl %ecx, 0x8(%eax)
/* save bottom address of context-stack as 'dealloction stack' */
movl %ecx, 0x4(%eax)
/* third arg of make_fcontext() == address of context-function */
/* stored in EBX */
movl 0xc(%esp), %ecx
movl %ecx, 0x1c(%eax)
/* compute abs address of label trampoline */
movl $trampoline, %ecx
/* save address of trampoline as return-address for context-function */
/* will be entered after calling jump_fcontext() first time */
movl %ecx, 0x24(%eax)
/* compute abs address of label finish */
movl $finish, %ecx
/* save address of finish as return-address for context-function */
/* will be entered after context-function returns */
movl %ecx, 0x20(%eax)
/* traverse current seh chain to get the last exception handler installed by Windows */
/* note that on Windows Server 2008 and 2008 R2, SEHOP is activated by default */
/* the exception handler chain is tested for the presence of ntdll.dll!FinalExceptionHandler */
/* at its end by RaiseException all seh andlers are disregarded if not present and the */
/* program is aborted */
/* load NT_TIB into ECX */
movl %fs:(0x0), %ecx
walk:
/* load 'next' member of current SEH into EDX */
movl (%ecx), %edx
/* test if 'next' of current SEH is last (== 0xffffffff) */
incl %edx
jz found
decl %edx
/* exchange content; ECX contains address of next SEH */
xchgl %ecx, %edx
/* inspect next SEH */
jmp walk
found:
/* load 'handler' member of SEH == address of last SEH handler installed by Windows */
movl 0x04(%ecx), %ecx
/* save address in ECX as SEH handler for context */
movl %ecx, 0x38(%eax)
/* set ECX to -1 */
movl $0xffffffff, %ecx
/* save ECX as next SEH item */
movl %ecx, 0x34(%eax)
/* load address of next SEH item */
leal 0x34(%eax), %ecx
/* save next SEH */
movl %ecx, 0x10(%eax)
/* return pointer to context-data */
ret
trampoline:
/* move transport_t for entering context-function */
/* FCTX == EAX, DATA == EDX */
movl %eax, (%esp)
movl %edx, 0x4(%esp)
/* label finish as return-address */
pushl %ebp
/* jump to context-function */
jmp *%ebx
finish:
/* ESP points to same address as ESP on entry of context function + 0x4 */
xorl %eax, %eax
/* exit code is zero */
movl %eax, (%esp)
/* exit application */
call __exit
hlt
.def __exit; .scl 2; .type 32; .endef /* standard C library function */
.section .drectve
.ascii " -export:\"make_fcontext\""
; Copyright Oliver Kowalke 2009.
; Distributed under the Boost Software License, Version 1.0.
; (See accompanying file LICENSE_1_0.txt or copy at
; http://www.boost.org/LICENSE_1_0.txt)
; ---------------------------------------------------------------------------------
; | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
; ---------------------------------------------------------------------------------
; | 0h | 04h | 08h | 0ch | 010h | 014h | 018h | 01ch |
; ---------------------------------------------------------------------------------
; | fc_strg |fc_deallo| limit | base | fc_seh | EDI | ESI | EBX |
; ---------------------------------------------------------------------------------
; ---------------------------------------------------------------------------------
; | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 |
; ---------------------------------------------------------------------------------
; | 020h | 024h | 028h | 02ch | 030h | 034h | 038h | 03ch |
; ---------------------------------------------------------------------------------
; | EBP | EIP | to | data | | EH NXT |SEH HNDLR| |
; ---------------------------------------------------------------------------------
.386
.XMM
.model flat, c
; standard C library function
_exit PROTO, value:SDWORD
.code
make_fcontext PROC BOOST_CONTEXT_EXPORT
; first arg of make_fcontext() == top of context-stack
mov eax, [esp+04h]
; reserve space for first argument of context-function
; EAX might already point to a 16byte border
lea eax, [eax-08h]
; shift address in EAX to lower 16 byte boundary
and eax, -16
; reserve space for context-data on context-stack
; on context-function entry: (ESP -0x4) % 8 == 0
; additional space is required for SEH
lea eax, [eax-048h]
; first arg of make_fcontext() == top of context-stack
mov ecx, [esp+04h]
; save top address of context stack as 'base'
mov [eax+0ch], ecx
; second arg of make_fcontext() == size of context-stack
mov edx, [esp+08h]
; negate stack size for LEA instruction (== substraction)
neg edx
; compute bottom address of context stack (limit)
lea ecx, [ecx+edx]
; save bottom address of context-stack as 'limit'
mov [eax+08h], ecx
; save bottom address of context-stack as 'dealloction stack'
mov [eax+04h], ecx
; third arg of make_fcontext() == address of context-function
; stored in EBX
mov ecx, [esp+0ch]
mov [eax+01ch], ecx
; compute abs address of label trampoline
mov ecx, trampoline
; save address of trampoline as return-address for context-function
; will be entered after calling jump_fcontext() first time
mov [eax+024h], ecx
; compute abs address of label finish
mov ecx, finish
; save address of finish as return-address for context-function
; will be entered after context-function returns
mov [eax+020h], ecx
; traverse current seh chain to get the last exception handler installed by Windows
; note that on Windows Server 2008 and 2008 R2, SEHOP is activated by default
; the exception handler chain is tested for the presence of ntdll.dll!FinalExceptionHandler
; at its end by RaiseException all seh-handlers are disregarded if not present and the
; program is aborted
assume fs:nothing
; load NT_TIB into ECX
mov ecx, fs:[0h]
assume fs:error
walk:
; load 'next' member of current SEH into EDX
mov edx, [ecx]
; test if 'next' of current SEH is last (== 0xffffffff)
inc edx
jz found
dec edx
; exchange content; ECX contains address of next SEH
xchg edx, ecx
; inspect next SEH
jmp walk
found:
; load 'handler' member of SEH == address of last SEH handler installed by Windows
mov ecx, [ecx+04h]
; save address in ECX as SEH handler for context
mov [eax+038h], ecx
; set ECX to -1
mov ecx, 0ffffffffh
; save ECX as next SEH item
mov [eax+034h], ecx
; load address of next SEH item
lea ecx, [eax+034h]
; save next SEH
mov [eax+010h], ecx
ret ; return pointer to context-data
trampoline:
; move transport_t for entering context-function
; FCTX == EAX, DATA == EDX
mov [esp], eax
mov [esp+04h], edx
push ebp
; jump to context-function
jmp ebx
finish:
; exit code is zero
xor eax, eax
mov [esp], eax
; exit application
call _exit
hlt
make_fcontext ENDP
END
/*
Copyright Oliver Kowalke 2009.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
/*****************************************************************************************
* *
* ----------------------------------------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* ----------------------------------------------------------------------------------- *
* | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | *
* ----------------------------------------------------------------------------------- *
* | EDI | ESI | EBX | EBP | EIP | hidden | to | data | *
* ----------------------------------------------------------------------------------- *
* *
*****************************************************************************************/
.text
.globl make_fcontext
.align 2
.type make_fcontext,@function
make_fcontext:
/* first arg of make_fcontext() == top of context-stack */
movl 0x4(%esp), %eax
/* reserve space for first argument of context-function
rax might already point to a 16byte border */
leal -0x8(%eax), %eax
/* shift address in EAX to lower 16 byte boundary */
andl $-16, %eax
/* reserve space for context-data on context-stack */
leal -0x28(%eax), %eax
/* third arg of make_fcontext() == address of context-function */
/* stored in EBX */
movl 0xc(%esp), %ecx
movl %ecx, 0x8(%eax)
/* return transport_t */
/* FCTX == EDI, DATA == ESI */
leal (%eax), %ecx
movl %ecx, 0x14(%eax)
/* compute abs address of label trampoline */
call 1f
/* address of trampoline 1 */
1: popl %ecx
/* compute abs address of label trampoline */
addl $trampoline-1b, %ecx
/* save address of trampoline as return address */
/* will be entered after calling jump_fcontext() first time */
movl %ecx, 0x10(%eax)
/* compute abs address of label finish */
call 2f
/* address of label 2 */
2: popl %ecx
/* compute abs address of label finish */
addl $finish-2b, %ecx
/* save address of finish as return-address for context-function */
/* will be entered after context-function returns */
movl %ecx, 0xc(%eax)
ret /* return pointer to context-data */
trampoline:
/* move transport_t for entering context-function */
movl %edi, (%esp)
movl %esi, 0x4(%esp)
pushl %ebp
/* jump to context-function */
jmp *%ebx
finish:
call 3f
/* address of label 3 */
3: popl %ebx
/* compute address of GOT and store it in EBX */
addl $_GLOBAL_OFFSET_TABLE_+[.-3b], %ebx
/* exit code is zero */
xorl %eax, %eax
movl %eax, (%esp)
/* exit application */
call _exit@PLT
hlt
.size make_fcontext,.-make_fcontext
/* Mark that we don't need executable stack. */
.section .note.GNU-stack,"",%progbits
/*
Copyright Oliver Kowalke 2009.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
/*****************************************************************************************
* *
* ----------------------------------------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* ----------------------------------------------------------------------------------- *
* | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | *
* ----------------------------------------------------------------------------------- *
* | EDI | ESI | EBX | EBP | EIP | hidden | to | data | *
* ----------------------------------------------------------------------------------- *
* *
*****************************************************************************************/
.text
.globl _make_fcontext
.align 2
_make_fcontext:
/* first arg of make_fcontext() == top of context-stack */
movl 0x4(%esp), %eax
/* reserve space for first argument of context-function
rax might already point to a 16byte border */
leal -0x8(%eax), %eax
/* shift address in EAX to lower 16 byte boundary */
andl $-16, %eax
/* reserve space for context-data on context-stack */
leal -0x28(%eax), %eax
/* thrid arg of make_fcontext() == address of context-function */
/* stored in EBX */
movl 0xc(%esp), %edx
movl %edx, 0x8(%eax)
/* return transport_t */
/* FCTX == EDI, DATA == ESI */
leal (%eax), %ecx
movl %ecx, 0x14(%eax)
/* compute abs address of label trampoline */
call 1f
/* address of trampoline 1 */
1: popl %ecx
/* compute abs address of label trampoline */
addl $trampoline-1b, %ecx
/* save address of trampoline as return address */
/* will be entered after calling jump_fcontext() first time */
movl %ecx, 0x10(%eax)
/* compute abs address of label finish */
call 2f
/* address of label 2 */
2: popl %ecx
/* compute abs address of label finish */
addl $finish-2b, %ecx
/* save address of finish as return-address for context-function */
/* will be entered after context-function returns */
movl %ecx, 0xc(%eax)
ret /* return pointer to context-data */
trampoline:
/* move transport_t for entering context-function */
movl %edi, (%esp)
movl %esi, 0x4(%esp)
pushl %ebp
/* jump to context-function */
jmp *%ebx
finish:
/* exit code is zero */
xorl %eax, %eax
movl %eax, (%esp)
/* exit application */
call __exit
hlt
/*
Copyright Sergue E. Leontiev 2013.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
// Stub file for universal binary
#if defined(__i386__)
#include "make_i386_sysv_macho_gas.S"
#elif defined(__x86_64__)
#include "make_x86_64_sysv_macho_gas.S"
#else
#error "No arch's"
#endif
/*
Copyright Oliver Kowalke 2009.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
/*******************************************************
* *
* ------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* ------------------------------------------------- *
* | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | *
* ------------------------------------------------- *
* | S0 | S1 | S2 | S3 | S4 | S5 | S6 | S7 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
* ------------------------------------------------- *
* | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | *
* ------------------------------------------------- *
* | FP |hiddn| RA | PC | GP | FCTX| DATA| | *
* ------------------------------------------------- *
* *
* *****************************************************/
.text
.globl make_fcontext
.align 2
.type make_fcontext,@function
.ent make_fcontext
make_fcontext:
#ifdef __PIC__
.set noreorder
.cpload $t9
.set reorder
#endif
# first arg of make_fcontext() == top address of context-stack
move $v0, $a0
# shift address in A0 to lower 16 byte boundary
move $v1, $v0
li $v0, -16 # 0xfffffffffffffff0
and $v0, $v1, $v0
# reserve space for context-data on context-stack
# including 48 byte of shadow space (sp % 16 == 0)
addiu $v0, $v0, -112
# third arg of make_fcontext() == address of context-function
sw $a2, 44($v0)
# save global pointer in context-data
sw $gp, 48($v0)
# compute address of returned transfer_t
addiu $t0, $v0, 52
sw $t0, 36($v0)
# compute abs address of label finish
la $t9, finish
# save address of finish as return-address for context-function
# will be entered after context-function returns
sw $t9, 40($v0)
jr $ra # return pointer to context-data
finish:
lw $gp, 0($sp)
# allocate stack space (contains shadow space for subroutines)
addiu $sp, $sp, -32
# save return address
sw $ra, 28($sp)
# restore GP (global pointer)
# move $gp, $s1
# exit code is zero
move $a0, $zero
# address of exit
lw $t9, %call16(_exit)($gp)
# exit application
jalr $t9
.end make_fcontext
.size make_fcontext, .-make_fcontext
/* Mark that we don't need executable stack. */
.section .note.GNU-stack,"",%progbits
/*
Copyright Sergue E. Leontiev 2013.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
// Stub file for universal binary
#if defined(__ppc__)
#include "make_ppc32_sysv_macho_gas.S"
#elif defined(__ppc64__)
#include "make_ppc64_sysv_macho_gas.S"
#else
#error "No arch's"
#endif
/*
Copyright Oliver Kowalke 2009.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
/******************************************************
* *
* ------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* ------------------------------------------------- *
* | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | *
* ------------------------------------------------- *
* | R13 | R14 | R15 | R16 | R17 | R18 | R19 | R20 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
* ------------------------------------------------- *
* | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | *
* ------------------------------------------------- *
* | R21 | R22 | R23 | R24 | R25 | R26 | R27 | R28 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | *
* ------------------------------------------------- *
* | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | *
* ------------------------------------------------- *
* | R29 | R30 | R31 |hiddn| CR | LR | PC |bchai| *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | *
* ------------------------------------------------- *
* | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | *
* ------------------------------------------------- *
* |linkr| FCTX| DATA| | *
* ------------------------------------------------- *
* *
*******************************************************/
.text
.globl make_fcontext
.align 2
.type make_fcontext,@function
make_fcontext:
# save return address into R6
mflr %r6
# first arg of make_fcontext() == top address of context-function
# shift address in R3 to lower 16 byte boundary
clrrwi %r3, %r3, 4
# reserve space for context-data on context-stack
# including 64 byte of linkage + parameter area (R1 % 16 == 0)
subi %r3, %r3, 172
# third arg of make_fcontext() == address of context-function
stw %r5, 88(%r3)
# set back-chain to zero
li %r0, 0
stw %r0, 92(%r3)
# compute address of returned transfer_t
addi %r0, %r3, 100
mr %r4, %r0
stw %r4, 76(%r3)
# load LR
mflr %r0
# jump to label 1
bl 1f
1:
# load LR into R4
mflr %r4
# compute abs address of label finish
addi %r4, %r4, finish - 1b
# restore LR
mtlr %r0
# save address of finish as return-address for context-function
# will be entered after context-function returns
stw %r4, 84(%r3)
# restore return address from R6
mtlr %r6
blr # return pointer to context-data
finish:
# save return address into R0
mflr %r0
# save return address on stack, set up stack frame
stw %r0, 4(%r1)
# allocate stack space, R1 % 16 == 0
stwu %r1, -16(%r1)
# exit code is zero
li %r3, 0
# exit application
bl _exit@plt
.size make_fcontext, .-make_fcontext
/* Mark that we don't need executable stack. */
.section .note.GNU-stack,"",%progbits
/*
Copyright Oliver Kowalke 2009.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
/******************************************************
* *
* ------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* ------------------------------------------------- *
* | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | *
* ------------------------------------------------- *
* | R13 | R14 | R15 | R16 | R17 | R18 | R19 | R20 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
* ------------------------------------------------- *
* | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | *
* ------------------------------------------------- *
* | R21 | R22 | R23 | R24 | R25 | R26 | R27 | R28 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | *
* ------------------------------------------------- *
* | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | *
* ------------------------------------------------- *
* | R29 | R30 | R31 |hiddn| CR | LR | PC |bchai| *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | *
* ------------------------------------------------- *
* | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | *
* ------------------------------------------------- *
* |linkr| FCTX| DATA| | *
* ------------------------------------------------- *
* *
*******************************************************/
.text
.globl _make_fcontext
.align 2
_make_fcontext:
; save return address into R6
mflr r6
; first arg of make_fcontext() == top address of context-function
; shift address in R3 to lower 16 byte boundary
clrrwi r3, r3, 4
; reserve space for context-data on context-stack
; including 64 byte of linkage + parameter area (R1 % 16 == 0)
subi r3, r3, 172
; third arg of make_fcontext() == address of context-function
stw r5, 88(r3)
; set back-chain to zero
li r0, 0
stw r0, 92(r3)
; compute address of returned transfer_t
addi r0, r3, 100
mr r4, r0
stw r4, 76(r3)
; load LR
mflr r0
; jump to label 1
bl l1
l1:
; load LR into R4
mflr r4
; compute abs address of label finish
addi r4, r4, lo16((finish - .)+4)
# restore LR
mtlr r0
; save address of finish as return-address for context-function
; will be entered after context-function returns
stw r4, 84(r3)
; restore return address from R6
mtlr r6
blr ; return pointer to context-data
finish:
; save return address into R0
mflr r0
; save return address on stack, set up stack frame
stw r0, 4(r1)
; allocate stack space, R1 % 16 == 0
stwu r1, -16(r1)
; exit code is zero
li r3, 0
; exit application
bl __exit
.globl make_fcontext[DS]
.globl .make_fcontext[PR]
.align 2
.csect make_fcontext[DS]
make_fcontext:
.long .make_fcontext[PR]
.csect .make_fcontext[PR], 3
#.make_fcontext:
# save return address into R6
mflr 6
# first arg of make_fcontext() == top address of context-function
# shift address in R3 to lower 16 byte boundary
clrrwi 3, 3, 4
# reserve space for context-data on context-stack
# including 64 byte of linkage + parameter area (R1 % 16 == 0)
subi 3, 3, 172
# third arg of make_fcontext() == address of context-function
stw 5, 88(3)
# set back-chain to zero
li 0, 0
stw 0, 92(3)
# compute address of returned transfer_t
addi 0, 3, 100
mr 4, 0
stw 4, 76(3)
# load LR
mflr 0
# jump to label 1
bl .Label
.Label:
# load LR into R4
mflr 4
# compute abs address of label .L_finish
addi 4, 4, .L_finish - .Label
# restore LR
mtlr 0
# save address of finish as return-address for context-function
# will be entered after context-function returns
stw 4, 84(3)
# restore return address from R6
mtlr 6
blr # return pointer to context-data
.L_finish:
# save return address into R0
mflr 0
# save return address on stack, set up stack frame
stw 0, 4(1)
# allocate stack space, R1 % 16 == 0
stwu 1, -16(1)
# exit code is zero
li 3, 0
# exit application
bl ._exit
nop
/*
Copyright Oliver Kowalke 2009.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
/*******************************************************
* *
* ------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* ------------------------------------------------- *
* | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | *
* ------------------------------------------------- *
* | TOC | R14 | R15 | R16 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
* ------------------------------------------------- *
* | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | *
* ------------------------------------------------- *
* | R17 | R18 | R19 | R20 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | *
* ------------------------------------------------- *
* | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | *
* ------------------------------------------------- *
* | R21 | R22 | R23 | R24 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | *
* ------------------------------------------------- *
* | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | *
* ------------------------------------------------- *
* | R25 | R26 | R27 | R28 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | *
* ------------------------------------------------- *
* | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | *
* ------------------------------------------------- *
* | R29 | R30 | R31 | hidden | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | *
* ------------------------------------------------- *
* | 160 | 164 | 168 | 172 | 176 | 180 | 184 | 188 | *
* ------------------------------------------------- *
* | CR | LR | PC | back-chain| *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | *
* ------------------------------------------------- *
* | 192 | 196 | 200 | 204 | 208 | 212 | 216 | 220 | *
* ------------------------------------------------- *
* | cr saved | lr saved | compiler | linker | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | *
* ------------------------------------------------- *
* | 224 | 228 | 232 | 236 | 240 | 244 | 248 | 252 | *
* ------------------------------------------------- *
* | TOC saved | FCTX | DATA | | *
* ------------------------------------------------- *
* *
*******************************************************/
.globl make_fcontext
#if _CALL_ELF == 2
.text
.align 2
make_fcontext:
addis %r2, %r12, .TOC.-make_fcontext@ha
addi %r2, %r2, .TOC.-make_fcontext@l
.localentry make_fcontext, . - make_fcontext
#else
.section ".opd","aw"
.align 3
make_fcontext:
# ifdef _CALL_LINUX
.quad .L.make_fcontext,.TOC.@tocbase,0
.type make_fcontext,@function
.text
.align 2
.L.make_fcontext:
# else
.hidden .make_fcontext
.globl .make_fcontext
.quad .make_fcontext,.TOC.@tocbase,0
.size make_fcontext,24
.type .make_fcontext,@function
.text
.align 2
.make_fcontext:
# endif
#endif
# save return address into R6
mflr %r6
# first arg of make_fcontext() == top address of context-stack
# shift address in R3 to lower 16 byte boundary
clrrdi %r3, %r3, 4
# reserve space for context-data on context-stack
# including 64 byte of linkage + parameter area (R1 % 16 == 0)
subi %r3, %r3, 248
# third arg of make_fcontext() == address of context-function
# entry point (ELFv2) or descriptor (ELFv1)
#if _CALL_ELF == 2
# save address of context-function entry point
std %r5, 176(%r3)
#else
# save address of context-function entry point
ld %r4, 0(%r5)
std %r4, 176(%r3)
# save TOC of context-function
ld %r4, 8(%r5)
std %r4, 0(%r3)
#endif
# set back-chain to zero
li %r0, 0
std %r0, 184(%r3)
# compute address of returned transfer_t
addi %r0, %r3, 232
mr %r4, %r0
std %r4, 152(%r3)
# load LR
mflr %r0
# jump to label 1
bl 1f
1:
# load LR into R4
mflr %r4
# compute abs address of label finish
addi %r4, %r4, finish - 1b
# restore LR
mtlr %r0
# save address of finish as return-address for context-function
# will be entered after context-function returns
std %r4, 168(%r3)
# restore return address from R6
mtlr %r6
blr # return pointer to context-data
finish:
# save return address into R0
mflr %r0
# save return address on stack, set up stack frame
std %r0, 8(%r1)
# allocate stack space, R1 % 16 == 0
stdu %r1, -32(%r1)
# exit code is zero
li %r3, 0
# exit application
bl _exit
nop
#if _CALL_ELF == 2
.size make_fcontext, .-make_fcontext
#else
# ifdef _CALL_LINUX
.size .make_fcontext, .-.L.make_fcontext
# else
.size .make_fcontext, .-.make_fcontext
# endif
#endif
/* Mark that we don't need executable stack. */
.section .note.GNU-stack,"",%progbits
/*
Copyright Oliver Kowalke 2009.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
/*******************************************************
* *
* ------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* ------------------------------------------------- *
* | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | *
* ------------------------------------------------- *
* | TOC | R14 | R15 | R16 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
* ------------------------------------------------- *
* | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | *
* ------------------------------------------------- *
* | R17 | R18 | R19 | R20 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | *
* ------------------------------------------------- *
* | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | *
* ------------------------------------------------- *
* | R21 | R22 | R23 | R24 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | *
* ------------------------------------------------- *
* | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | *
* ------------------------------------------------- *
* | R25 | R26 | R27 | R28 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | *
* ------------------------------------------------- *
* | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | *
* ------------------------------------------------- *
* | R29 | R30 | R31 | hidden | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | *
* ------------------------------------------------- *
* | 160 | 164 | 168 | 172 | 176 | 180 | 184 | 188 | *
* ------------------------------------------------- *
* | CR | LR | PC | back-chain| *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | *
* ------------------------------------------------- *
* | 192 | 196 | 200 | 204 | 208 | 212 | 216 | 220 | *
* ------------------------------------------------- *
* | cr saved | lr saved | compiler | linker | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | *
* ------------------------------------------------- *
* | 224 | 228 | 232 | 236 | 240 | 244 | 248 | 252 | *
* ------------------------------------------------- *
* | TOC saved | FCTX | DATA | | *
* ------------------------------------------------- *
* *
.text
.globl _make_fcontext
_make_fcontext:
; save return address into R6
mflr r6
; first arg of make_fcontext() == top address of context-function
; shift address in R3 to lower 16 byte boundary
clrrwi r3, r3, 4
; reserve space for context-data on context-stack
; including 64 byte of linkage + parameter area (R1 16 == 0)
subi r3, r3, 248
; third arg of make_fcontext() == address of context-function
stw r5, 176(r3)
; set back-chain to zero
li %r0, 0
std %r0, 184(%r3)
; compute address of returned transfer_t
addi %r0, %r3, 232
mr %r4, %r0
std %r4, 152(%r3)
; load LR
mflr r0
; jump to label 1
bl l1
l1:
; load LR into R4
mflr r4
; compute abs address of label finish
addi r4, r4, lo16((finish - .) + 4)
; restore LR
mtlr r0
; save address of finish as return-address for context-function
; will be entered after context-function returns
std r4, 168(r3)
; restore return address from R6
mtlr r6
blr ; return pointer to context-data
finish:
; save return address into R0
mflr r0
; save return address on stack, set up stack frame
stw r0, 8(r1)
; allocate stack space, R1 16 == 0
stwu r1, -32(r1)
; set return value to zero
li r3, 0
; exit application
bl __exit
nop
.globl make_fcontext[DS]
.globl .make_fcontext[PR]
.align 2
.csect .make_fcontext[PR], 3
.globl _make_fcontext
#._make_fcontext:
# save return address into R6
mflr 6
# first arg of make_fcontext() == top address of context-function
# shift address in R3 to lower 16 byte boundary
clrrwi 3, 3, 4
# reserve space for context-data on context-stack
# including 64 byte of linkage + parameter area (R1 % 16 == 0)
subi 3, 3, 248
# third arg of make_fcontext() == address of context-function
stw 5, 176(3)
# set back-chain to zero
li 0, 0
std 0, 184(3)
# compute address of returned transfer_t
addi 0, 3, 232
mr 4, 0
std 4, 152(3)
# load LR
mflr 0
# jump to label 1
bl .Label
.Label:
# load LR into R4
mflr 4
# compute abs address of label .L_finish
addi 4, 4, .L_finish - .Label
# restore LR
mtlr 0
# save address of finish as return-address for context-function
# will be entered after context-function returns
stw 4, 168(3)
# restore return address from R6
mtlr 6
blr # return pointer to context-data
.L_finish:
# save return address into R0
mflr 0
# save return address on stack, set up stack frame
stw 0, 8(1)
# allocate stack space, R1 % 16 == 0
stwu 1, -32(1)
# exit code is zero
li 3, 0
# exit application
bl ._exit
nop
/*
Copyright Oliver Kowalke 2009.
Copyright Thomas Sailer 2013.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
/**************************************************************************************
* *
* ---------------------------------------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* ---------------------------------------------------------------------------------- *
* | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | *
* ---------------------------------------------------------------------------------- *
* | fbr_strg | fc_dealloc | limit | base | *
* ---------------------------------------------------------------------------------- *
* ---------------------------------------------------------------------------------- *
* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
* ---------------------------------------------------------------------------------- *
* | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | *
* ---------------------------------------------------------------------------------- *
* | R12 | R13 | R14 | R15 | *
* ---------------------------------------------------------------------------------- *
* ---------------------------------------------------------------------------------- *
* | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | *
* ---------------------------------------------------------------------------------- *
* | 0xe40 | 0x44 | 0x48 | 0x4c | 0x50 | 0x54 | 0x58 | 0x5c | *
* ---------------------------------------------------------------------------------- *
* | RDI | RSI | RBX | RBP | *
* ---------------------------------------------------------------------------------- *
* ---------------------------------------------------------------------------------- *
* | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | *
* ---------------------------------------------------------------------------------- *
* | 0x60 | 0x64 | 0x68 | 0x6c | 0x70 | 0x74 | 0x78 | 0x7c | *
* ---------------------------------------------------------------------------------- *
* | hidden | RIP | EXIT | parameter area | *
* ---------------------------------------------------------------------------------- *
* ---------------------------------------------------------------------------------- *
* | 32 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | *
* ---------------------------------------------------------------------------------- *
* | 0x80 | 0x84 | 0x88 | 0x8c | 0x90 | 0x94 | 0x98 | 0x9c | *
* ---------------------------------------------------------------------------------- *
* | parameter area | FCTX | *
* ---------------------------------------------------------------------------------- *
* ---------------------------------------------------------------------------------- *
* | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | *
* ---------------------------------------------------------------------------------- *
* | 0xa0 | 0xa4 | 0xa8 | 0xac | 0xb0 | 0xb4 | 0xb8 | 0xbc | *
* ---------------------------------------------------------------------------------- *
* | DATA | | | | *
* ---------------------------------------------------------------------------------- *
* *
* ***********************************************************************************/
.file "make_x86_64_ms_pe_gas.asm"
.text
.p2align 4,,15
.globl make_fcontext
.def make_fcontext; .scl 2; .type 32; .endef
.seh_proc make_fcontext
make_fcontext:
.seh_endprologue
/* first arg of make_fcontext() == top of context-stack */
movq %rcx, %rax
/* shift address in RAX to lower 16 byte boundary */
/* == pointer to fcontext_t and address of context stack */
andq $-16, %rax
/* reserve space for context-data on context-stack */
/* on context-function entry: (RSP -0x8) % 16 == 0 */
leaq -0xb8(%rax), %rax
/* third arg of make_fcontext() == address of context-function */
movq %r8, 0x68(%rax)
/* first arg of make_fcontext() == top of context-stack */
/* save top address of context stack as 'base' */
movq %rcx, 0x18(%rax)
/* second arg of make_fcontext() == size of context-stack */
/* negate stack size for LEA instruction (== substraction) */
negq %rdx
/* compute bottom address of context stack (limit) */
leaq (%rcx,%rdx), %rcx
/* save bottom address of context stack as 'limit' */
movq %rcx, 0x10(%rax)
/* save address of context stack limit as 'dealloction stack' */
movq %rcx, 0x8(%rax)
/* compute address of transport_t */
leaq 0x98(%rax), %rcx
/* store address of transport_t in hidden field */
movq %rcx, 0x60(%rax)
/* compute abs address of label finish */
leaq finish(%rip), %rcx
/* save address of finish as return-address for context-function */
/* will be entered after context-function returns */
movq %rcx, 0x70(%rax)
ret /* return pointer to context-data */
finish:
/* 32byte shadow-space for _exit() */
andq $-32, %rsp
/* 32byte shadow-space for _exit() are */
/* already reserved by make_fcontext() */
/* exit code is zero */
xorq %rcx, %rcx
/* exit application */
call _exit
hlt
.seh_endproc
.def _exit; .scl 2; .type 32; .endef /* standard C library function */
.section .drectve
.ascii " -export:\"make_fcontext\""
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment