Commit 3ae41f42 by FritzFlorian

Fix legacy components breaking CI.

parent dfe88369
Pipeline #1431 failed with stages
in 41 seconds
...@@ -37,12 +37,9 @@ add_subdirectory(lib/pls) ...@@ -37,12 +37,9 @@ add_subdirectory(lib/pls)
# Include examples # Include examples
add_subdirectory(app/playground) add_subdirectory(app/playground)
add_subdirectory(app/test_for_new)
add_subdirectory(app/benchmark_fft) add_subdirectory(app/benchmark_fft)
add_subdirectory(app/benchmark_unbalanced) add_subdirectory(app/benchmark_unbalanced)
add_subdirectory(app/benchmark_matrix) add_subdirectory(app/benchmark_matrix)
add_subdirectory(app/benchmark_prefix)
add_subdirectory(app/benchmark_pipeline)
add_subdirectory(app/benchmark_fib) add_subdirectory(app/benchmark_fib)
add_subdirectory(app/context_switch) add_subdirectory(app/context_switch)
......
add_executable(benchmark_pipeline main.cpp)
target_link_libraries(benchmark_pipeline pls)
if (EASY_PROFILER)
target_link_libraries(benchmark_pipeline easy_profiler)
endif ()
#include <pls/pls.h>
#include <pls/dataflow/dataflow.h>
#include <pls/internal/helpers/profiler.h>
#include <pls/internal/helpers/mini_benchmark.h>
#include <iostream>
#include <complex>
#include <vector>
#include <tuple>
#include <atomic>
static constexpr int INPUT_SIZE = 8192;
typedef std::vector<std::complex<double>> complex_vector;
using namespace pls::dataflow;
void divide(complex_vector::iterator data, int n) {
complex_vector tmp_odd_elements(n / 2);
for (int i = 0; i < n / 2; i++) {
tmp_odd_elements[i] = data[i * 2 + 1];
}
for (int i = 0; i < n / 2; i++) {
data[i] = data[i * 2];
}
for (int i = 0; i < n / 2; i++) {
data[i + n / 2] = tmp_odd_elements[i];
}
}
void combine(complex_vector::iterator data, int n) {
for (int i = 0; i < n / 2; i++) {
std::complex<double> even = data[i];
std::complex<double> odd = data[i + n / 2];
// w is the "twiddle-factor".
// this could be cached, but we run the same 'data_structures' algorithm parallel/serial,
// so it won't impact the performance comparison.
std::complex<double> w = exp(std::complex<double>(0, -2. * M_PI * i / n));
data[i] = even + w * odd;
data[i + n / 2] = even - w * odd;
}
}
void fft(complex_vector::iterator data, int n) {
if (n < 2) {
return;
}
divide(data, n);
fft(data, n / 2);
fft(data + n / 2, n / 2);
combine(data, n);
}
complex_vector prepare_input(int input_size) {
std::vector<double> known_frequencies{2, 11, 52, 88, 256};
complex_vector data(input_size);
// Set our input data to match a time series of the known_frequencies.
// When applying fft to this time-series we should find these frequencies.
for (int i = 0; i < input_size; i++) {
data[i] = std::complex<double>(0.0, 0.0);
for (auto frequencie : known_frequencies) {
data[i] += sin(2 * M_PI * frequencie * i / input_size);
}
}
return data;
}
int main() {
PROFILE_ENABLE
pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 18u};
pls::scheduler scheduler{&my_scheduler_memory, 4};
graph<inputs<int>, outputs<int>> graph;
std::atomic<int> count{0};
auto lambda = [&](const int &in, int &out) {
PROFILE_WORK_BLOCK("Work Lambda")
auto tmp = in;
out = tmp;
complex_vector input = prepare_input(INPUT_SIZE);
fft(input.begin(), input.size());
count++;
};
function_node<inputs<int>, outputs<int>, decltype(lambda)> step_1{lambda};
function_node<inputs<int>, outputs<int>, decltype(lambda)> step_2{lambda};
function_node<inputs<int>, outputs<int>, decltype(lambda)> step_3{lambda};
function_node<inputs<int>, outputs<int>, decltype(lambda)> step_4{lambda};
graph >> step_1 >> step_2 >> step_3 >> step_4 >> graph;
graph.build();
const int num_elements = 10;
std::vector<std::tuple<int>> results(num_elements);
pls::internal::helpers::run_mini_benchmark([&] {
PROFILE_WORK_BLOCK("Top Level")
for (int j = 0; j < num_elements; j++) {
graph.run(std::tuple<int>{j}, &results[j]);
}
pls::scheduler::wait_for_all();
}, 8, 1000);
PROFILE_SAVE("test_profile.prof")
}
//int main() {
// PROFILE_ENABLE
// pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 18u};
// pls::scheduler scheduler{&my_scheduler_memory, 4};
//
// graph<inputs<int>, outputs<int>> graph;
// std::atomic<int> count{0};
// auto lambda = [&](const int &in, int &out) {
// PROFILE_WORK_BLOCK("Work Lambda")
// out = in;
// complex_vector input = prepare_input(INPUT_SIZE);
// fft(input.begin(), input.size());
// count++;
// };
// function_node<inputs<int>, outputs<int>, decltype(lambda)> step_1{lambda};
// function_node<inputs<int>, outputs<int>, decltype(lambda)> step_2{lambda};
// function_node<inputs<int>, outputs<int>, decltype(lambda)> step_3{lambda};
// function_node<inputs<int>, outputs<int>, decltype(lambda)> step_4{lambda};
//
// graph >> step_1 >> step_2 >> step_3 >> step_4 >> graph;
// graph.build();
//
// const int num_elements = 10;
// std::vector<std::tuple<int>> results(num_elements);
//
// scheduler.perform_work([&] {
// PROFILE_MAIN_THREAD
// for (int i = 0; i < 10; i++) {
// PROFILE_WORK_BLOCK("Top Level")
// for (int j = 0; j < num_elements; j++) {
// graph.run(std::tuple<int>{j}, &results[j]);
// }
// pls::scheduler::wait_for_all();
// }
// });
//
// std::cout << count << std::endl;
//
// PROFILE_SAVE("test_profile.prof")
//}
add_executable(benchmark_prefix main.cpp)
target_link_libraries(benchmark_prefix pls)
if (EASY_PROFILER)
target_link_libraries(benchmark_prefix easy_profiler)
endif ()
#include <pls/pls.h>
#include <pls/internal/helpers/profiler.h>
#include <pls/internal/helpers/mini_benchmark.h>
#include <iostream>
#include <vector>
#include <functional>
static constexpr int INPUT_SIZE = 10e7;
int main() {
PROFILE_ENABLE
std::vector<double> vec(INPUT_SIZE, 1);
std::vector<double> out(INPUT_SIZE);
for (int i = 0; i < INPUT_SIZE; i++) {
vec[i] = i;
}
pls::internal::helpers::run_mini_benchmark([&] {
pls::scan(vec.begin(), vec.end(), out.begin(), std::plus<double>(), 0.0);
}, 8, 1000);
PROFILE_SAVE("test_profile.prof")
}
//int main() {
// PROFILE_ENABLE
// pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 18};
// pls::scheduler scheduler{&my_scheduler_memory, 8};
//
// std::vector<double> vec(INPUT_SIZE, 1);
// std::vector<double> out(INPUT_SIZE);
//
// for (int i = 0; i < INPUT_SIZE; i++) {
// vec[i] = 1;
// }
//
// scheduler.perform_work([&] {
// PROFILE_MAIN_THREAD
// for (int i = 0; i < 100; i++) {
// pls::scan(vec.begin(), vec.end(), out.begin(), std::plus<double>(), 0.0);
// }
// });
//
// PROFILE_SAVE("test_profile.prof")
//}
add_executable(playground add_executable(playground
barrier.h barrier.cpp
main.cpp) main.cpp)
# Example for adding the library to your app (as a cmake project dependency) # Example for adding the library to your app (as a cmake project dependency)
target_link_libraries(playground pls context_switcher Threads::Threads) target_link_libraries(playground pls Threads::Threads)
#include "barrier.h"
barrier::barrier(const unsigned int count) : barrier_{} {
pthread_barrier_init(&barrier_, nullptr, count);
}
barrier::~barrier() {
pthread_barrier_destroy(&barrier_);
}
void barrier::wait() {
pthread_barrier_wait(&barrier_);
}
#ifndef PLS_BARRIER_H
#define PLS_BARRIER_H
#include <pthread.h>
/**
* Provides standard barrier behaviour.
* `count` threads have to call `wait()` before any of the `wait()` calls returns,
* thus blocking all threads until everyone reached the barrier.
*
* PORTABILITY:
* Current implementation is based on pthreads.
*/
class barrier {
pthread_barrier_t barrier_;
public:
explicit barrier(unsigned int count);
~barrier();
void wait();
};
#endif //PLS_BARRIER_H
#include <sys/types.h>
#include <unistd.h>
#include <fstream>
#include <sstream>
#include <string>
#include <mutex>
#include "tsan_support.h"
using namespace std;
long count_memory_mappings() {
pid_t my_pid = getpid();
ifstream proc_file{"/proc/" + to_string(my_pid) + "/maps"};
string line;
long line_count{0};
while (getline(proc_file, line)) {
line_count++;
}
return line_count;
}
int main() { int main() {
mutex mut;
int count = 0;
while (true) {
printf("iteration: %d, mappings: %ld\n", count++, count_memory_mappings());
void *main_fiber = __tsan_get_current_fiber();
void *other_fiber = __tsan_create_fiber(0);
__tsan_switch_to_fiber(other_fiber, 0);
mut.lock();
mut.unlock();
__tsan_switch_to_fiber(main_fiber, 0);
__tsan_destroy_fiber(other_fiber);
}
return 0; return 0;
} }
#ifndef CONTEXT_SWITCHER_TSAN_SUPPORT
#define CONTEXT_SWITCHER_TSAN_SUPPORT
extern "C" {
// Fiber switching API.
// - TSAN context for fiber can be created by __tsan_create_fiber
// and freed by __tsan_destroy_fiber.
// - TSAN context of current fiber or thread can be obtained
// by calling __tsan_get_current_fiber.
// - __tsan_switch_to_fiber should be called immediatly before switch
// to fiber, such as call of swapcontext.
// - Fiber name can be set by __tsan_set_fiber_name.
void *__tsan_get_current_fiber(void);
void *__tsan_create_fiber(unsigned flags);
void __tsan_destroy_fiber(void *fiber);
void __tsan_switch_to_fiber(void *fiber, unsigned flags);
void __tsan_set_fiber_name(void *fiber, const char *name);
};
#endif //CONTEXT_SWITCHER_TSAN_SUPPORT
add_executable(test_for_new main.cpp)
# Example for adding the library to your app (as a cmake project dependency)
target_link_libraries(test_for_new pls)
#include <pls/internal/base/thread.h>
#include <pls/internal/helpers/prohibit_new.h>
using namespace pls::internal::base;
int global = 0;
int main() {
// Try to use every feature, to trigger the prohibited use of new if found somewhere
thread t1{[]() {}};
t1.join();
}
...@@ -25,7 +25,6 @@ add_library(pls STATIC ...@@ -25,7 +25,6 @@ add_library(pls STATIC
include/pls/internal/helpers/prohibit_new.h include/pls/internal/helpers/prohibit_new.h
include/pls/internal/helpers/profiler.h include/pls/internal/helpers/profiler.h
include/pls/internal/helpers/mini_benchmark.h
include/pls/internal/helpers/unique_id.h include/pls/internal/helpers/unique_id.h
include/pls/internal/helpers/range.h include/pls/internal/helpers/range.h
include/pls/internal/helpers/seqence.h include/pls/internal/helpers/seqence.h
......
...@@ -4,10 +4,11 @@ ...@@ -4,10 +4,11 @@
#include "pls/internal/base/system_details.h" #include "pls/internal/base/system_details.h"
#include "pls/internal/helpers/profiler.h" #include "pls/internal/helpers/profiler.h"
#include "pls/internal/base/thread.h"
#include <random> #include <random>
#include <math.h> #include <thread>
#include <chrono>
#include <cmath>
namespace pls::internal::base { namespace pls::internal::base {
...@@ -34,7 +35,8 @@ class backoff { ...@@ -34,7 +35,8 @@ class backoff {
if (current_ >= YELD_ITERS) { if (current_ >= YELD_ITERS) {
PROFILE_LOCK("Yield") PROFILE_LOCK("Yield")
this_thread::sleep(5); using namespace std::chrono_literals;
std::this_thread::sleep_for(5us);
} }
current_ = std::min(current_ * 2, MAX_ITERS); current_ = std::min(current_ * 2, MAX_ITERS);
......
...@@ -7,8 +7,6 @@ ...@@ -7,8 +7,6 @@
#include <atomic> #include <atomic>
#include <iostream> #include <iostream>
#include "pls/internal/base/thread.h"
namespace pls { namespace pls {
namespace internal { namespace internal {
namespace base { namespace base {
......
...@@ -5,7 +5,6 @@ ...@@ -5,7 +5,6 @@
#include <atomic> #include <atomic>
#include <iostream> #include <iostream>
#include "pls/internal/base/thread.h"
#include "pls/internal/base/backoff.h" #include "pls/internal/base/backoff.h"
namespace pls { namespace pls {
......
#ifndef PLS_MINI_BENCHMARK_H
#define PLS_MINI_BENCHMARK_H
#include "pls/internal/scheduling/scheduler_memory.h"
#include "pls/internal/scheduling/scheduler.h"
#include <chrono>
#include <iostream>
namespace pls {
namespace internal {
namespace helpers {
// TODO: Clean up (separate into small functions and .cpp file)
template<typename Function>
void run_mini_benchmark(const Function &lambda,
size_t max_threads,
unsigned long max_runtime_ms = 1000,
unsigned long warmup_time_ms = 100) {
using namespace std;
using namespace pls::internal::scheduling;
malloc_scheduler_memory scheduler_memory{max_threads, 2u << 17u};
for (unsigned int num_threads = 1; num_threads <= max_threads; num_threads++) {
scheduler local_scheduler{&scheduler_memory, num_threads};
chrono::high_resolution_clock::time_point start_time;
chrono::high_resolution_clock::time_point end_time;
long max_local_time = 0;
long total_time = 0;
long iterations = 0;
local_scheduler.perform_work([&] {
start_time = chrono::high_resolution_clock::now();
end_time = start_time;
chrono::high_resolution_clock::time_point planned_end_time = start_time + chrono::milliseconds(max_runtime_ms);
chrono::high_resolution_clock::time_point planned_warmup_time = start_time + chrono::milliseconds(warmup_time_ms);
while (end_time < planned_end_time) {
if (end_time < planned_warmup_time) {
lambda();
} else {
auto local_start_time = chrono::high_resolution_clock::now();
lambda();
auto local_end_time = chrono::high_resolution_clock::now();
long local_time = chrono::duration_cast<chrono::microseconds>(local_end_time - local_start_time).count();
total_time += local_time;
max_local_time = std::max(local_time, max_local_time);
iterations++;
}
end_time = chrono::high_resolution_clock::now();
}
});
double time_per_iteration = (double) total_time / iterations;
std::cout << (long) time_per_iteration << " (" << max_local_time << ")";
if (num_threads < max_threads) {
std::cout << "\t\t";
}
}
std::cout << std::endl;
}
}
}
}
#endif //PLS_MINI_BENCHMARK_H
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment