Commit b79c00dd by FritzFlorian

Add pre-allocation to memory measures.

This avoids issues with doubling the measurement vector sizes influencing memory measures.
parent e3f8205e
Pipeline #1505 passed with stages
in 4 minutes 21 seconds
......@@ -39,6 +39,7 @@ int main(int argc, char **argv) {
string full_directory = directory + "/PLS_v3/";
benchmark_runner runner{full_directory, test_name};
runner.enable_memory_stats();
runner.pre_allocate_stats();
fft::complex_vector data(fft::SIZE);
fft::complex_vector swap_array(fft::SIZE);
......@@ -47,12 +48,12 @@ int main(int argc, char **argv) {
pls::scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE};
// scheduler.get_profiler().disable_memory_measure();
runner.run_iterations(10, [&]() {
runner.run_iterations(fft::NUM_ITERATIONS, [&]() {
scheduler.perform_work([&]() {
pls_conquer(data.begin(), swap_array.begin(), fft::SIZE);;
});
// scheduler.get_profiler().current_run().print_stats();
}, 1, [&]() {
}, fft::NUM_WARMUP_ITERATIONS, [&]() {
fft::fill_input(data); // Reset data before each run
});
// scheduler.get_profiler().current_run().print_dag(std::cout);
......
......@@ -37,6 +37,7 @@ int main(int argc, char **argv) {
string full_directory = directory + "/PLS_v3/";
benchmark_runner runner{full_directory, test_name};
runner.enable_memory_stats();
runner.pre_allocate_stats();
pls::scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE};
......
......@@ -32,6 +32,7 @@ int main(int argc, char **argv) {
string full_directory = directory + "/PLS_v3/";
benchmark_runner runner{full_directory, test_name};
runner.enable_memory_stats();
runner.pre_allocate_stats();
pls_matrix<double, matrix::MATRIX_SIZE> a;
pls_matrix<double, matrix::MATRIX_SIZE> b;
......
......@@ -113,6 +113,7 @@ int main(int argc, char **argv) {
string full_directory = directory + "/PLS_v3/";
benchmark_runner runner{full_directory, test_name};
runner.enable_memory_stats();
runner.pre_allocate_stats();
// Only run on one version to avoid copy
std::unique_ptr<double[]> result_data{new double[size * size]};
......
......@@ -43,6 +43,7 @@ int main(int argc, char **argv) {
string full_directory = directory + "/PLS_v3/";
benchmark_runner runner{full_directory, test_name};
runner.enable_memory_stats();
runner.pre_allocate_stats();
scheduler scheduler{(unsigned) num_threads, MAX_NUM_TASKS, MAX_STACK_SIZE};
......
......@@ -11,7 +11,7 @@ namespace fft {
const int SIZE = 8192;
const int NUM_ITERATIONS = 1000;
const int NUM_WARMUP_ITERATIONS = 100;
const int NUM_WARMUP_ITERATIONS = 0;
const int RECURSIVE_CUTOFF = 32;
typedef std::vector<std::complex<double>> complex_vector;
......
......@@ -108,6 +108,15 @@ class benchmark_runner {
add_custom_stats_field(MEMORY_POST_RUN);
}
void pre_allocate_stats(size_t num = 100000) {
times_.reserve(num);
memset(times_.data(), 'a', num * sizeof(long));
for (auto &iter : custom_stats_) {
iter.second.reserve(num);
memset(iter.second.data(), 'a', num * sizeof(long));
}
}
static void read_args(int argc, char **argv, int &num_threads, string &path) {
if (argc < 3) {
cout << "Must Specifiy concurrency and output directory! (usage: `benchmark <output_directory> <num_threads>`)"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment