Commit 7796022f by FritzFlorian

Fix matrix multiplication benchmark for new scheduler.

parent 01596ff3
Pipeline #1393 failed with stages
in 26 seconds
#include "pls/internal/scheduling/scheduler.h"
#include "pls/internal/scheduling/parallel_result.h"
#include "pls/internal/scheduling/scheduler_memory.h"
#include "pls/internal/scheduling/static_scheduler_memory.h"
#include "pls/algorithms/for_each.h"
using namespace pls::internal::scheduling;
......@@ -15,17 +14,20 @@ class pls_matrix : public matrix::matrix<T, SIZE> {
public:
pls_matrix() : matrix::matrix<T, SIZE>() {}
parallel_result<int> pls_multiply(const matrix::matrix<T, SIZE> &a, const matrix::matrix<T, SIZE> &b) {
return pls::algorithm::for_each_range(0, SIZE, [this, &a, &b](int i) {
void pls_multiply(const matrix::matrix<T, SIZE> &a, const matrix::matrix<T, SIZE> &b) {
pls::algorithm::for_each_range(0, SIZE, [this, &a, &b](int i) {
this->multiply_column(i, a, b);
});
}
};
constexpr size_t MAX_NUM_THREADS = 8;
constexpr size_t MAX_NUM_TASKS = 32;
constexpr size_t MAX_NUM_CONTS = 32;
constexpr size_t MAX_CONT_SIZE = 512;
constexpr int MAX_NUM_THREADS = 8;
constexpr int MAX_NUM_TASKS = 32;
constexpr int MAX_STACK_SIZE = 1024 * 1;
static_scheduler_memory<MAX_NUM_THREADS,
MAX_NUM_TASKS,
MAX_STACK_SIZE> global_scheduler_memory;
int main(int argc, char **argv) {
int num_threads;
......@@ -40,40 +42,20 @@ int main(int argc, char **argv) {
pls_matrix<double, matrix::MATRIX_SIZE> b;
pls_matrix<double, matrix::MATRIX_SIZE> result;
static_scheduler_memory<MAX_NUM_THREADS,
MAX_NUM_TASKS,
MAX_NUM_CONTS,
MAX_CONT_SIZE> static_scheduler_memory;
scheduler scheduler{static_scheduler_memory, (unsigned int) num_threads};
for (int i = 0; i < matrix::WARMUP_ITERATIONS; i++) {
scheduler scheduler{global_scheduler_memory, (unsigned) num_threads};
scheduler.perform_work([&]() {
return scheduler::par([&]() {
return result.pls_multiply(a, b);
}, []() {
return parallel_result<int>{0};
}).then([&](int, int) {
return parallel_result<int>{0};
});
});
}
scheduler.perform_work([&]() {
for (int i = 0; i < matrix::WARMUP_ITERATIONS; i++) {
result.pls_multiply(a, b);
}
});
for (int i = 0; i < matrix::NUM_ITERATIONS; i++) {
scheduler.perform_work([&]() {
scheduler.perform_work([&]() {
for (int i = 0; i < matrix::NUM_ITERATIONS; i++) {
runner.start_iteration();
return scheduler::par([&]() {
return result.pls_multiply(a, b);
}, []() {
return parallel_result<int>{0};
}).then([&](int, int) {
runner.end_iteration();
return parallel_result<int>{0};
});
});
}
result.pls_multiply(a, b);
runner.end_iteration();
}
});
runner.commit_results(true);
}
......@@ -2,8 +2,6 @@
#ifndef PLS_PARALLEL_FOR_H
#define PLS_PARALLEL_FOR_H
#include "pls/internal/scheduling/parallel_result.h"
namespace pls {
namespace algorithm {
......@@ -11,26 +9,26 @@ class fixed_strategy;
class dynamic_strategy;
template<typename Function, typename ExecutionStrategy>
pls::internal::scheduling::parallel_result<int> for_each_range(unsigned long first,
unsigned long last,
const Function &function,
ExecutionStrategy &execution_strategy);
void for_each_range(unsigned long first,
unsigned long last,
const Function &function,
ExecutionStrategy &execution_strategy);
template<typename Function>
pls::internal::scheduling::parallel_result<int> for_each_range(unsigned long first,
unsigned long last,
const Function &function);
void for_each_range(unsigned long first,
unsigned long last,
const Function &function);
template<typename RandomIt, typename Function, typename ExecutionStrategy>
pls::internal::scheduling::parallel_result<int> for_each(RandomIt first,
RandomIt last,
const Function &function,
ExecutionStrategy execution_strategy);
void for_each(RandomIt first,
RandomIt last,
const Function &function,
ExecutionStrategy execution_strategy);
template<typename RandomIt, typename Function>
pls::internal::scheduling::parallel_result<int> for_each(RandomIt first,
RandomIt last,
const Function &function);
void for_each(RandomIt first,
RandomIt last,
const Function &function);
}
}
......
......@@ -11,10 +11,10 @@ namespace algorithm {
namespace internal {
template<typename RandomIt, typename Function>
pls::internal::scheduling::parallel_result<int> for_each(const RandomIt first,
const RandomIt last,
const Function function,
const long min_elements) {
void for_each(const RandomIt first,
const RandomIt last,
const Function function,
const long min_elements) {
using namespace ::pls::internal::scheduling;
const long num_elements = std::distance(first, last);
......@@ -23,25 +23,23 @@ pls::internal::scheduling::parallel_result<int> for_each(const RandomIt first,
for (auto current = first; current != last; current++) {
function(*current);
}
return parallel_result<int>{0};
} else {
// Cut in half recursively
const long middle_index = num_elements / 2;
return scheduler::par([first, middle_index, last, function, min_elements] {
scheduler::spawn([first, middle_index, last, &function, min_elements] {
return internal::for_each(first,
first + middle_index,
function,
min_elements);
}, [first, middle_index, last, function, min_elements] {
});
scheduler::spawn([first, middle_index, last, &function, min_elements] {
return internal::for_each(first + middle_index,
last,
function,
min_elements);
}).then([](int, int) {
return parallel_result<int>{0};
});
scheduler::sync();
}
}
......@@ -52,7 +50,7 @@ class dynamic_strategy {
explicit dynamic_strategy(const unsigned int tasks_per_thread = 4) : tasks_per_thread_{tasks_per_thread} {};
long calculate_min_elements(long num_elements) const {
const long num_threads = pls::internal::scheduling::thread_state::get().scheduler_->num_threads();
const long num_threads = pls::internal::scheduling::thread_state::get().get_scheduler().num_threads();
return num_elements / (num_threads * tasks_per_thread_);
}
private:
......@@ -71,32 +69,38 @@ class fixed_strategy {
};
template<typename RandomIt, typename Function, typename ExecutionStrategy>
pls::internal::scheduling::parallel_result<int> for_each(RandomIt first,
RandomIt last,
const Function &function,
ExecutionStrategy execution_strategy) {
void for_each(RandomIt
first,
RandomIt last,
const Function &function,
ExecutionStrategy
execution_strategy) {
long num_elements = std::distance(first, last);
return internal::for_each(first, last, function, execution_strategy.calculate_min_elements(num_elements));
return
internal::for_each(first, last, function, execution_strategy
.
calculate_min_elements(num_elements)
);
}
template<typename RandomIt, typename Function>
pls::internal::scheduling::parallel_result<int> for_each(RandomIt first, RandomIt last, const Function &function) {
void for_each(RandomIt first, RandomIt last, const Function &function) {
return for_each(first, last, function, dynamic_strategy{4});
}
template<typename Function, typename ExecutionStrategy>
pls::internal::scheduling::parallel_result<int> for_each_range(unsigned long first,
unsigned long last,
const Function &function,
ExecutionStrategy execution_strategy) {
void for_each_range(unsigned long first,
unsigned long last,
const Function &function,
ExecutionStrategy execution_strategy) {
auto range = pls::internal::helpers::range(first, last);
return for_each(range.begin(), range.end(), function, execution_strategy);
}
template<typename Function>
pls::internal::scheduling::parallel_result<int> for_each_range(unsigned long first,
unsigned long last,
const Function &function) {
void for_each_range(unsigned long first,
unsigned long last,
const Function &function) {
auto range = pls::internal::helpers::range(first, last);
return for_each(range.begin(), range.end(), function);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment