Commit f0f3b80e by FritzFlorian

Add basic 'mini_benchmark_runner'.

parent 44ea144a
Pipeline #1136 passed with stages
in 3 minutes 27 seconds
...@@ -32,6 +32,7 @@ add_subdirectory(lib/pls) ...@@ -32,6 +32,7 @@ add_subdirectory(lib/pls)
add_subdirectory(app/playground) add_subdirectory(app/playground)
add_subdirectory(app/test_for_new) add_subdirectory(app/test_for_new)
add_subdirectory(app/invoke_parallel) add_subdirectory(app/invoke_parallel)
add_subdirectory(app/benchmark_fft)
# Add optional tests # Add optional tests
option(PACKAGE_TESTS "Build the tests" ON) option(PACKAGE_TESTS "Build the tests" ON)
......
add_executable(benchmark_fft main.cpp)
target_link_libraries(benchmark_fft pls)
if(EASY_PROFILER)
target_link_libraries(benchmark_fft easy_profiler)
endif()
#include <pls/pls.h>
#include <pls/internal/helpers/profiler.h>
#include <pls/internal/helpers/mini_benchmark.h>
#include <iostream>
#include <complex>
#include <vector>
static constexpr int CUTOFF = 10;
static constexpr int NUM_ITERATIONS = 1000;
static constexpr int INPUT_SIZE = 2064;
typedef std::vector<std::complex<double>> complex_vector;
void divide(complex_vector::iterator data, int n) {
complex_vector tmp_odd_elements(n / 2);
for (int i = 0; i < n / 2; i++) {
tmp_odd_elements[i] = data[i * 2 + 1];
}
for (int i = 0; i < n / 2; i++) {
data[i] = data[i * 2];
}
for (int i = 0; i < n / 2; i++) {
data[i + n / 2] = tmp_odd_elements[i];
}
}
void combine(complex_vector::iterator data, int n) {
for (int i = 0; i < n / 2; i++) {
std::complex<double> even = data[i];
std::complex<double> odd = data[i + n / 2];
// w is the "twiddle-factor".
// this could be cached, but we run the same 'base' algorithm parallel/serial,
// so it won't impact the performance comparison.
std::complex<double> w = exp(std::complex<double>(0, -2. * M_PI * i / n));
data[i] = even + w * odd;
data[i + n / 2] = even - w * odd;
}
}
void fft(complex_vector::iterator data, int n) {
if (n < 2) {
return;
}
divide(data, n);
if (n <= CUTOFF) {
fft(data, n / 2);
fft(data + n / 2, n / 2);
} else {
pls::invoke_parallel(
[&] { fft(data, n / 2); },
[&] { fft(data + n / 2, n / 2); }
);
}
combine(data, n);
}
complex_vector prepare_input(int input_size) {
std::vector<double> known_frequencies{2, 11, 52, 88, 256};
complex_vector data(input_size);
// Set our input data to match a time series of the known_frequencies.
// When applying fft to this time-series we should find these frequencies.
for (int i = 0; i < input_size; i++) {
data[i] = std::complex<double>(0.0, 0.0);
for (auto frequencie : known_frequencies) {
data[i] += sin(2 * M_PI * frequencie * i / input_size);
}
}
return data;
}
int main() {
PROFILE_ENABLE
complex_vector initial_input = prepare_input(INPUT_SIZE);
pls::internal::helpers::run_mini_benchmark([&] {
complex_vector input = initial_input;
fft(input.begin(), input.size());
}, 8);
PROFILE_SAVE("test_profile.prof")
}
...@@ -17,7 +17,8 @@ add_library(pls STATIC ...@@ -17,7 +17,8 @@ add_library(pls STATIC
src/algorithms/invoke_parallel.cpp include/pls/algorithms/invoke_parallel.h src/algorithms/invoke_parallel.cpp include/pls/algorithms/invoke_parallel.h
include/pls/internal/base/error_handling.h include/pls/internal/base/error_handling.h
include/pls/internal/scheduling/scheduler_memory.h src/internal/scheduling/scheduler_memory.cpp include/pls/internal/scheduling/scheduler_memory.h src/internal/scheduling/scheduler_memory.cpp
include/pls/internal/helpers/profiler.h) include/pls/internal/helpers/profiler.h
include/pls/internal/helpers/mini_benchmark.h)
# Add everything in `./include` to be in the include path of this project # Add everything in `./include` to be in the include path of this project
target_include_directories(pls target_include_directories(pls
......
#ifndef PLS_MINI_BENCHMARK_H
#define PLS_MINI_BENCHMARK_H
#include "pls/internal/scheduling/scheduler_memory.h"
#include "pls/internal/scheduling/scheduler.h"
#include <chrono>
#include <iostream>
namespace pls {
namespace internal {
namespace helpers {
// TODO: Clean up (separate into small functions and .cpp file)
template<typename Function>
void run_mini_benchmark(const Function& lambda, size_t max_threads, long max_runtime_ms=1000) {
using namespace std;
using namespace pls::internal::scheduling;
malloc_scheduler_memory scheduler_memory{max_threads};
for (unsigned int num_threads = 1; num_threads <= max_threads; num_threads++) {
scheduler local_scheduler{&scheduler_memory, num_threads};
chrono::high_resolution_clock::time_point start_time;
chrono::high_resolution_clock::time_point end_time;
unsigned long iterations = 0;
local_scheduler.perform_work([&] {
start_time = chrono::high_resolution_clock::now();
end_time = start_time;
chrono::high_resolution_clock::time_point planned_end_time = start_time + chrono::milliseconds(max_runtime_ms);
while (end_time < planned_end_time) {
lambda();
end_time = chrono::high_resolution_clock::now();
iterations++;
}
});
long time = chrono::duration_cast<chrono::microseconds>(end_time - start_time).count();
double time_per_iteration = (double)time / iterations;
std::cout << time_per_iteration;
if (num_threads < max_threads) {
std::cout << ", ";
}
}
std::cout << std::endl;
}
}
}
}
#endif //PLS_MINI_BENCHMARK_H
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment