Commit e2f584c4 by FritzFlorian

Allow flexible problem sizes and periodic execution for benchmarks.

parent d32bba10
Pipeline #1513 passed with stages
in 4 minutes 33 seconds
......@@ -31,35 +31,44 @@ constexpr int MAX_NUM_TASKS = 16;
constexpr int MAX_STACK_SIZE = 4096 * 1;
int main(int argc, char **argv) {
int num_threads;
string directory;
benchmark_runner::read_args(argc, argv, num_threads, directory);
auto settings = benchmark_runner::parse_parameters(argc, argv);
string test_name = to_string(num_threads) + ".csv";
string full_directory = directory + "/PLS_v3/";
fft::complex_vector data(settings.size_);
fft::complex_vector swap_array(settings.size_);
fft::fill_input(data);
string test_name = to_string(settings.num_threads_) + ".csv";
string full_directory = settings.output_directory_ + "/PLS_v3/";
benchmark_runner runner{full_directory, test_name};
runner.enable_memory_stats();
runner.pre_allocate_stats();
fft::complex_vector data(fft::SIZE);
fft::complex_vector swap_array(fft::SIZE);
fft::fill_input(data);
pls::scheduler scheduler{(unsigned) settings.num_threads_, MAX_NUM_TASKS, MAX_STACK_SIZE};
pls::scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE};
if (settings.type_ == benchmark_runner::benchmark_settings::ISOLATED) {
printf("Running isolated measurement...\n");
runner.enable_memory_stats();
runner.pre_allocate_stats();
// scheduler.get_profiler().disable_memory_measure();
runner.run_iterations(fft::NUM_ITERATIONS, [&]() {
scheduler.perform_work([&]() {
pls_conquer(data.begin(), swap_array.begin(), fft::SIZE);;
runner.run_iterations(settings.iterations_, [&]() {
scheduler.perform_work([&]() {
pls_conquer(data.begin(), swap_array.begin(), settings.size_);;
});
}, [&]() {
fft::fill_input(data); // Reset data before each run
});
// scheduler.get_profiler().current_run().print_stats();
}, fft::NUM_WARMUP_ITERATIONS, [&]() {
fft::fill_input(data); // Reset data before each run
});
// scheduler.get_profiler().current_run().print_dag(std::cout);
// scheduler.get_profiler().current_run().print_stats();
runner.commit_results(true);
} else {
printf("Running periodic measurement...\n");
runner.enable_wall_time_stats();
runner.pre_allocate_stats();
runner.commit_results(true);
runner.run_periodic(settings.iterations_, settings.interval_period_, settings.interval_deadline_, [&]() {
scheduler.perform_work([&]() {
pls_conquer(data.begin(), swap_array.begin(), settings.size_);;
});
fft::fill_input(data); // Reset data before each run
});
runner.commit_results(true);
}
return 0;
}
......@@ -8,7 +8,7 @@ using namespace comparison_benchmarks::base;
constexpr int MAX_NUM_TASKS = 32;
constexpr int MAX_STACK_SIZE = 4096 * 1;
int pls_fib(int n, int d) {
int pls_fib(int n) {
if (n == 0) {
return 0;
}
......@@ -17,11 +17,11 @@ int pls_fib(int n, int d) {
}
int a, b;
pls::spawn([n, d, &a]() {
a = pls_fib(n - 1, d + 1);
pls::spawn([n, &a]() {
a = pls_fib(n - 1);
});
pls::spawn([n, d, &b]() {
b = pls_fib(n - 2, d + 1);
pls::spawn([n, &b]() {
b = pls_fib(n - 2);
});
pls::sync();
......@@ -29,29 +29,38 @@ int pls_fib(int n, int d) {
}
int main(int argc, char **argv) {
int num_threads;
string directory;
benchmark_runner::read_args(argc, argv, num_threads, directory);
auto settings = benchmark_runner::parse_parameters(argc, argv);
string test_name = to_string(num_threads) + ".csv";
string full_directory = directory + "/PLS_v3/";
string test_name = to_string(settings.num_threads_) + ".csv";
string full_directory = settings.output_directory_ + "/PLS_v3/";
benchmark_runner runner{full_directory, test_name};
runner.enable_memory_stats();
runner.pre_allocate_stats();
pls::scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE};
pls::scheduler scheduler{(unsigned) settings.num_threads_, MAX_NUM_TASKS, MAX_STACK_SIZE};
volatile int res;
// scheduler.get_profiler().disable_memory_measure();
runner.run_iterations(fib::NUM_ITERATIONS, [&]() {
scheduler.perform_work([&]() {
res = pls_fib(fib::INPUT_N, 0);
if (settings.type_ == benchmark_runner::benchmark_settings::ISOLATED) {
printf("Running isolated measurement...\n");
runner.enable_memory_stats();
runner.pre_allocate_stats();
runner.run_iterations(settings.iterations_, [&]() {
scheduler.perform_work([&]() {
res = pls_fib(settings.size_);
});
});
}, fib::NUM_WARMUP_ITERATIONS);
// scheduler.get_profiler().current_run().print_dag(std::cout);
// scheduler.get_profiler().current_run().print_stats();
runner.commit_results(true);
} else {
printf("Running periodic measurement...\n");
runner.enable_wall_time_stats();
runner.pre_allocate_stats();
runner.commit_results(true);
runner.run_periodic(settings.iterations_, settings.interval_period_, settings.interval_deadline_, [&]() {
scheduler.perform_work([&]() {
res = pls_fib(settings.size_);
});
});
runner.commit_results(true);
}
return 0;
}
......@@ -20,34 +20,44 @@ class pls_matrix : public matrix::matrix<T> {
}
};
constexpr int MAX_NUM_TASKS = 32;
constexpr int MAX_NUM_TASKS = 10;
constexpr int MAX_STACK_SIZE = 4096 * 1;
int main(int argc, char **argv) {
int num_threads;
string directory;
benchmark_runner::read_args(argc, argv, num_threads, directory);
auto settings = benchmark_runner::parse_parameters(argc, argv);
string test_name = to_string(num_threads) + ".csv";
string full_directory = directory + "/PLS_v3/";
pls_matrix<double> a{settings.size_};
pls_matrix<double> b{settings.size_};
pls_matrix<double> result{settings.size_};
string test_name = to_string(settings.num_threads_) + ".csv";
string full_directory = settings.output_directory_ + "/PLS_v3/";
benchmark_runner runner{full_directory, test_name};
runner.enable_memory_stats();
runner.pre_allocate_stats();
pls_matrix<double> a{matrix::MATRIX_SIZE};
pls_matrix<double> b{matrix::MATRIX_SIZE};
pls_matrix<double> result{matrix::MATRIX_SIZE};
pls::scheduler scheduler{(unsigned) settings.num_threads_, MAX_NUM_TASKS, MAX_STACK_SIZE};
scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE};
if (settings.type_ == benchmark_runner::benchmark_settings::ISOLATED) {
printf("Running isolated measurement...\n");
runner.enable_memory_stats();
runner.pre_allocate_stats();
// scheduler.get_profiler().disable_memory_measure();
runner.run_iterations(matrix::NUM_ITERATIONS, [&]() {
scheduler.perform_work([&]() {
result.multiply(a, b);
runner.run_iterations(settings.iterations_, [&]() {
scheduler.perform_work([&]() {
result.multiply(a, b);
});
});
runner.commit_results(true);
} else {
printf("Running periodic measurement...\n");
runner.enable_wall_time_stats();
runner.pre_allocate_stats();
runner.run_periodic(settings.iterations_, settings.interval_period_, settings.interval_deadline_, [&]() {
scheduler.perform_work([&]() {
result.multiply(a, b);
});
});
}, matrix::WARMUP_ITERATIONS);
// scheduler.get_profiler().current_run().print_dag(std::cout);
// scheduler.get_profiler().current_run().print_stats();
runner.commit_results(true);
}
runner.commit_results(true);
}
......@@ -99,21 +99,13 @@ void multiply_div_conquer(const std::vector<std::vector<std::vector<std::unique_
}
}
constexpr int MAX_NUM_TASKS = 32;
constexpr int MAX_NUM_TASKS = 16;
constexpr int MAX_STACK_SIZE = 4096 * 2;
int main(int argc, char **argv) {
const size_t size = matrix_div_conquer::MATRIX_SIZE;
auto settings = benchmark_runner::parse_parameters(argc, argv);
const size_t size = settings.size_;
int num_threads;
string directory;
benchmark_runner::read_args(argc, argv, num_threads, directory);
string test_name = to_string(num_threads) + ".csv";
string full_directory = directory + "/PLS_v3/";
benchmark_runner runner{full_directory, test_name};
runner.enable_memory_stats();
runner.pre_allocate_stats();
// Only run on one version to avoid copy
std::unique_ptr<double[]> result_data{new double[size * size]};
......@@ -145,16 +137,36 @@ int main(int argc, char **argv) {
max_depth++;
remaining_size = remaining_size / 2;
}
pls::strain_local_resource local_indices{(unsigned) num_threads, (unsigned) max_depth};
pls::strain_local_resource local_indices{(unsigned) settings.num_threads_, (unsigned) max_depth};
string test_name = to_string(settings.num_threads_) + ".csv";
string full_directory = settings.output_directory_ + "/PLS_v3/";
benchmark_runner runner{full_directory, test_name};
scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE};
pls::scheduler scheduler{(unsigned) settings.num_threads_, MAX_NUM_TASKS, MAX_STACK_SIZE};
runner.run_iterations(1, [&]() {
scheduler.perform_work([&]() {
multiply_div_conquer(div_conquer_temp_arrays, local_indices, size, 0, result, a, b);
if (settings.type_ == benchmark_runner::benchmark_settings::ISOLATED) {
printf("Running isolated measurement...\n");
runner.enable_memory_stats();
runner.pre_allocate_stats();
runner.run_iterations(settings.iterations_, [&]() {
scheduler.perform_work([&]() {
multiply_div_conquer(div_conquer_temp_arrays, local_indices, size, 0, result, a, b);
});
});
runner.commit_results(true);
} else {
printf("Running periodic measurement...\n");
runner.enable_wall_time_stats();
runner.pre_allocate_stats();
runner.run_periodic(settings.iterations_, settings.interval_period_, settings.interval_deadline_, [&]() {
scheduler.perform_work([&]() {
multiply_div_conquer(div_conquer_temp_arrays, local_indices, size, 0, result, a, b);
});
});
}, 0);
runner.commit_results(true);
runner.commit_results(true);
}
scheduler.terminate();
}
#include "pls/internal/scheduling/scheduler.h"
#include "pls/pls.h"
using namespace pls::internal::scheduling;
......@@ -16,12 +16,12 @@ int count_child_nodes(unbalanced::node &node) {
std::atomic<int> count{1};
for (int i = 0; i < node.get_num_children(); i++) {
scheduler::spawn([i, &count, &node] {
pls::spawn([i, &count, &node] {
unbalanced::node child_node = node.spawn_child_node(i);
count.fetch_add(count_child_nodes(child_node));
});
}
scheduler::sync();
pls::sync();
return count;
}
......@@ -32,49 +32,47 @@ int unbalanced_tree_search(int seed, int root_children, double q, int normal_chi
}
constexpr int MAX_NUM_TASKS = 256;
constexpr int MAX_STACK_SIZE = 1024 * 2;
constexpr int MAX_STACK_SIZE = 4096 * 1;
int main(int argc, char **argv) {
int num_threads;
string directory;
benchmark_runner::read_args(argc, argv, num_threads, directory);
auto settings = benchmark_runner::parse_parameters(argc, argv);
string test_name = to_string(num_threads) + ".csv";
string full_directory = directory + "/PLS_v3/";
string test_name = to_string(settings.num_threads_) + ".csv";
string full_directory = settings.output_directory_ + "/PLS_v3/";
benchmark_runner runner{full_directory, test_name};
runner.enable_memory_stats();
runner.pre_allocate_stats();
scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE};
// scheduler.get_profiler().disable_memory_measure();
runner.run_iterations(1, [&]() {
scheduler.perform_work([&]() {
unbalanced_tree_search(unbalanced::SEED,
unbalanced::ROOT_CHILDREN,
unbalanced::Q,
unbalanced::NORMAL_CHILDREN);
pls::scheduler scheduler{(unsigned) settings.num_threads_, MAX_NUM_TASKS, MAX_STACK_SIZE};
if (settings.type_ == benchmark_runner::benchmark_settings::ISOLATED) {
printf("Running isolated measurement...\n");
runner.enable_memory_stats();
runner.pre_allocate_stats();
runner.run_iterations(settings.iterations_, [&]() {
scheduler.perform_work([&]() {
unbalanced_tree_search(unbalanced::SEED,
unbalanced::ROOT_CHILDREN,
unbalanced::Q,
unbalanced::NORMAL_CHILDREN);
});
});
}, 0);
// scheduler.get_profiler().current_run().print_dag(std::cout);
// scheduler.get_profiler().current_run().print_stats();
runner.commit_results(true);
runner.commit_results(true);
} else {
printf("Running periodic measurement...\n");
runner.enable_wall_time_stats();
runner.pre_allocate_stats();
runner.run_periodic(settings.iterations_, settings.interval_period_, settings.interval_deadline_, [&]() {
scheduler.perform_work([&]() {
unbalanced_tree_search(unbalanced::SEED,
unbalanced::ROOT_CHILDREN,
unbalanced::Q,
unbalanced::NORMAL_CHILDREN);
});
});
runner.commit_results(true);
}
}
//int main() {
// PROFILE_ENABLE
// pls::malloc_scheduler_memory my_scheduler_memory{8, 2u << 18};
// pls::scheduler scheduler{&my_scheduler_memory, 8};
//
// scheduler.perform_work([&] {
// PROFILE_MAIN_THREAD
// for (int i = 0; i < 50; i++) {
// PROFILE_WORK_BLOCK("Top Level")
// int result = unbalanced_tree_search(SEED, ROOT_CHILDREN, Q, NORMAL_CHILDREN);
// std::cout << result << std::endl;
// }
// });
//
// PROFILE_SAVE("test_profile.prof")
//}
......@@ -12,6 +12,7 @@
#include <bits/stdc++.h>
#include <thread>
#include <map>
#include <time.h>
#include <tuple>
#include <unistd.h>
......@@ -35,6 +36,12 @@ class benchmark_runner {
unsigned long memory_pre_run_;
unsigned long memory_post_run_;
bool wall_time_enabled_{false};
const string WALL_TIME_PRE_RUN = "wall_time_pre_run_us";
const string WALL_TIME_POST_RUN = "wall_time_post_run_us";
unsigned long wall_time_pre_run_;
unsigned long wall_time_post_run_;
map<string, vector<long>> custom_stats_;
void print_statistics() {
......@@ -108,6 +115,12 @@ class benchmark_runner {
add_custom_stats_field(MEMORY_POST_RUN);
}
void enable_wall_time_stats() {
wall_time_enabled_ = true;
add_custom_stats_field(WALL_TIME_PRE_RUN);
add_custom_stats_field(WALL_TIME_POST_RUN);
}
void pre_allocate_stats(size_t num = 100000) {
times_.reserve(num);
memset(times_.data(), 'a', num * sizeof(long));
......@@ -117,16 +130,54 @@ class benchmark_runner {
}
}
static void read_args(int argc, char **argv, int &num_threads, string &path) {
if (argc < 3) {
cout << "Must Specifiy concurrency and output directory! (usage: `benchmark <output_directory> <num_threads>`)"
<< endl;
struct benchmark_settings {
enum TYPE { ISOLATED, PERIODIC };
string output_directory_;
size_t size_;
unsigned num_threads_;
TYPE type_;
size_t iterations_;
unsigned long interval_period_;
unsigned long interval_deadline_;
};
static benchmark_settings parse_parameters(int argc, char **argv) {
benchmark_settings result;
string tmp;
if (argc != 5 && argc != 7) {
printf("usage 1: `benchmark <output_directory> <size> <num_threads> <iterations>`\n");
printf("usage 2: `benchmark <output_directory> <size> <num_threads> <iterations> <period> <deadline>`\n");
exit(1);
}
string tmp = argv[1];
path = tmp;
num_threads = atoi(argv[2]);
result.output_directory_ = argv[1];
tmp = argv[2];
result.size_ = std::stoi(tmp);
tmp = argv[3];
result.num_threads_ = std::stoi(tmp);
if (argc == 5) {
result.type_ = benchmark_settings::ISOLATED;
tmp = argv[4];
result.iterations_ = std::stoi(tmp);
} else {
result.type_ = benchmark_settings::PERIODIC;
tmp = argv[4];
result.iterations_ = std::stoi(tmp);
tmp = argv[5];
result.interval_period_ = std::stoi(tmp);
tmp = argv[6];
result.interval_deadline_ = std::stoi(tmp);
}
return result;
}
void start_iteration() {
......@@ -134,6 +185,11 @@ class benchmark_runner {
auto memory_stats = query_process_memory_pages();
memory_pre_run_ = memory_stats.first;
}
if (wall_time_enabled_) {
wall_time_pre_run_ =
std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::time_point_cast<std::chrono::microseconds>(
std::chrono::system_clock::now()).time_since_epoch()).count();
}
last_start_time_ = chrono::steady_clock::now();
}
......@@ -155,6 +211,13 @@ class benchmark_runner {
custom_stats_[MEMORY_PRE_RUN][iteration_index] = memory_pre_run_;
custom_stats_[MEMORY_POST_RUN][iteration_index] = memory_post_run_;
}
if (wall_time_enabled_) {
wall_time_post_run_ =
std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::time_point_cast<std::chrono::microseconds>(
std::chrono::system_clock::now()).time_since_epoch()).count();
custom_stats_[WALL_TIME_PRE_RUN][iteration_index] = wall_time_pre_run_;
custom_stats_[WALL_TIME_POST_RUN][iteration_index] = wall_time_post_run_;
}
}
void store_custom_stat(const string &name, long value) {
......@@ -164,14 +227,8 @@ class benchmark_runner {
void run_iterations(int count,
const function<void(void)> measure,
int warmup_count,
const function<void(void)> prepare = []() {},
const function<void(void)> finish = []() {}) {
for (int i = 0; i < warmup_count; i++) {
prepare();
measure();
}
for (int i = 0; i < count; i++) {
using namespace std::literals;
this_thread::sleep_for(100us);
......@@ -183,6 +240,90 @@ class benchmark_runner {
}
}
void add_to_timespec(timespec &timespec, size_t seconds, size_t nanoseconds) {
timespec.tv_sec += seconds;
timespec.tv_nsec += nanoseconds;
while (timespec.tv_nsec > 1000l * 1000l * 1000l) {
timespec.tv_nsec -= 1000l * 1000l * 1000l;
timespec.tv_sec++;
}
}
void run_periodic(size_t count,
size_t period_us,
size_t deadline_us,
const function<void(void)> measure) {
size_t period_nanoseconds = 1000lu * period_us;
size_t period_seconds = period_nanoseconds / (1000lu * 1000lu * 1000lu);
period_nanoseconds = period_nanoseconds - period_seconds * 1000lu * 1000lu * 1000lu;
size_t deadline_nanoseconds = 1000lu * deadline_us;
size_t deadline_seconds = deadline_nanoseconds / (1000lu * 1000lu * 1000lu);
deadline_nanoseconds = deadline_nanoseconds - deadline_seconds * 1000lu * 1000lu * 1000lu;
// Prepare basic time spec for first iteration
timespec iteration_start, iteration_end, deadline_end, finish_time;
if (clock_gettime(CLOCK_MONOTONIC, &iteration_start) == -1) {
perror("clock_gettime");
exit(1);
}
add_to_timespec(iteration_start, period_seconds, period_nanoseconds);
size_t current_iteration = 0;
while (current_iteration < count) {
// Sleep until the next iteration
long sleep_error = clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &iteration_start, nullptr);
if (sleep_error) {
printf("Sleep Error %ld\n", sleep_error);
}
// Invoke iteration
start_iteration();
measure();
end_iteration();
// Calculate all relevant time points for this iteration
if (clock_gettime(CLOCK_MONOTONIC, &finish_time) == -1) {
perror("clock_gettime");
exit(1);
}
iteration_end = iteration_start;
add_to_timespec(iteration_end, period_seconds, period_nanoseconds);
deadline_end = iteration_start;
add_to_timespec(deadline_end, deadline_seconds, deadline_nanoseconds);
// Store 'actual' wall time instead of iteration time (we want to include sleeping here!)
long wall_time_us = 0;
wall_time_us += (finish_time.tv_sec - iteration_start.tv_sec) * 1000l * 1000l;
wall_time_us += ((long) finish_time.tv_nsec - (long) iteration_start.tv_nsec) / 1000l;
printf("Difference: %d\n", wall_time_us - times_[current_iteration]);
times_[current_iteration] = wall_time_us;
if (finish_time.tv_sec >= deadline_end.tv_sec && finish_time.tv_nsec > deadline_end.tv_nsec) {
printf("Deadline Miss!\n"); // TODO: Remove
}
// Skip iterations if their start time is later than the current time (skipping)
while (finish_time.tv_sec >= iteration_end.tv_sec && finish_time.tv_nsec > iteration_end.tv_nsec) {
iteration_start = iteration_end;
iteration_end = iteration_start;
add_to_timespec(iteration_end, period_seconds, period_nanoseconds);
current_iteration++;
start_iteration();
end_iteration();
times_[current_iteration] = 0;
}
// Progress to next iteration (normally)
current_iteration++;
iteration_start = iteration_end;
}
}
void commit_results(bool print_stats) {
if (print_stats) {
print_statistics();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment