Commit 7c227cd8 by FritzFlorian

Unify interface of algorithms.

Now all algorithms are used without the parallel pre-/suffix and the for_each method has an specialization for integer ranges.
parent 9cf034e4
Pipeline #1265 passed with stages
in 3 minutes 59 seconds
......@@ -49,7 +49,7 @@ void fft(complex_vector::iterator data, int n) {
fft(data, n / 2);
fft(data + n / 2, n / 2);
} else {
pls::invoke_parallel(
pls::invoke(
[&] { fft(data, n / 2); },
[&] { fft(data + n / 2, n / 2); }
);
......
......@@ -9,12 +9,12 @@ class matrix {
public:
T data[SIZE][SIZE];
matrix(T i = 1) {
explicit matrix(T i = 1) {
std::fill(&data[0][0], &data[0][0] + SIZE * SIZE, i);
}
void multiply(const matrix<T, SIZE> &a, const matrix<T, SIZE> &b) {
pls::algorithm::parallel_for(0, SIZE, [&](int i) {
pls::algorithm::for_each_range(0, SIZE, [&](int i) {
this->multiply_column(i, a, b);
});
}
......
......@@ -18,7 +18,7 @@ int main() {
}
pls::internal::helpers::run_mini_benchmark([&] {
pls::parallel_scan(vec.begin(), vec.end(), out.begin(), std::plus<double>(), 0.0);
pls::scan(vec.begin(), vec.end(), out.begin(), std::plus<double>(), 0.0);
}, 8, 1000);
PROFILE_SAVE("test_profile.prof")
......
......@@ -22,9 +22,11 @@ int count_child_nodes(uts::node &node) {
std::vector<int> results(children.size());
for (size_t i = 0; i < children.size(); i++) {
size_t index = i;
auto lambda = [&, index] { results[index] = count_child_nodes(children[index]); };
pls::lambda_task_by_value<typeof(lambda)> sub_task(lambda);
pls::scheduler::spawn_child(sub_task);
auto lambda = [&, index] {
results[index] = count_child_nodes(children[index]);
};
using child_type = pls::lambda_task_by_value<typeof(lambda)>;
pls::scheduler::spawn_child<child_type>(lambda);
}
pls::scheduler::wait_for_all();
for (auto result : results) {
......@@ -41,8 +43,8 @@ int unbalanced_tree_search(int seed, int root_children, double q, int normal_chi
uts::node root(seed, root_children, q, normal_children);
result = count_child_nodes(root);
};
pls::lambda_task_by_reference<typeof(lambda)> sub_task(lambda);
pls::scheduler::spawn_child(sub_task);
using child_type = pls::lambda_task_by_reference<typeof(lambda)>;
pls::scheduler::spawn_child<child_type>(lambda);
pls::scheduler::wait_for_all();
return result;
......
......@@ -54,7 +54,7 @@ void fft(complex_vector::iterator data, int n) {
fft(data, n / 2);
fft(data + n / 2, n / 2);
} else {
pls::invoke_parallel(
pls::invoke(
[&] { fft(data, n / 2); },
[&] { fft(data + n / 2, n / 2); }
);
......
......@@ -10,5 +10,7 @@
#include <pls/pls.h>
int main() {
auto range = boost::irange(0, 10);
bool test = std::is_integral<typename std::remove_reference<decltype(range.begin())>::type>::value;
std::cout << test << std::endl;
}
......@@ -2,12 +2,12 @@
add_library(pls STATIC
include/pls/pls.h src/pls.cpp
include/pls/algorithms/invoke_parallel.h
include/pls/algorithms/invoke_parallel_impl.h
include/pls/algorithms/parallel_for.h
include/pls/algorithms/parallel_for_impl.h
include/pls/algorithms/parallel_scan.h
include/pls/algorithms/parallel_scan_impl.h
include/pls/algorithms/invoke.h
include/pls/algorithms/invoke_impl.h
include/pls/algorithms/for_each.h
include/pls/algorithms/for_each_impl.h
include/pls/algorithms/scan.h
include/pls/algorithms/scan_impl.h
include/pls/internal/base/spin_lock.h
include/pls/internal/base/tas_spin_lock.h src/internal/base/tas_spin_lock.cpp
......
......@@ -2,23 +2,17 @@
#ifndef PLS_PARALLEL_FOR_H
#define PLS_PARALLEL_FOR_H
// TODO: Replace with own integer iterator to remove dependency
#include <boost/range/irange.hpp>
namespace pls {
namespace algorithm {
template<typename RandomIt, typename Function>
void parallel_for(RandomIt first, RandomIt last, const Function &function);
template<typename Function>
void parallel_for(size_t first, size_t last, const Function &function) {
auto range = boost::irange(first, last);
parallel_for(range.begin(), range.end(), function);
}
void for_each_range(size_t first, size_t last, const Function &function);
template<typename RandomIt, typename Function>
void for_each(RandomIt first, RandomIt last, const Function &function);
}
}
#include "parallel_for_impl.h"
#include "for_each_impl.h"
#endif //PLS_PARALLEL_FOR_H
......@@ -7,11 +7,15 @@
#include "pls/internal/helpers/unique_id.h"
// TODO: Replace with own integer iterator to remove dependency
#include <boost/range/irange.hpp>
namespace pls {
namespace algorithm {
namespace internal {
template<typename RandomIt, typename Function>
void parallel_for(RandomIt first, RandomIt last, const Function &function) {
void for_each(RandomIt first, RandomIt last, const Function &function) {
using namespace ::pls::internal::scheduling;
constexpr long min_elements = 4;
......@@ -26,18 +30,31 @@ void parallel_for(RandomIt first, RandomIt last, const Function &function) {
long middle_index = num_elements / 2;
auto second_half_body =
[first, middle_index, last, &function] { parallel_for(first + middle_index, last, function); };
[first, middle_index, last, &function] { internal::for_each(first + middle_index, last, function); };
using second_half_t = lambda_task_by_reference<decltype(second_half_body)>;
scheduler::spawn_child<second_half_t>(std::move(second_half_body));
auto first_half_body =
[first, middle_index, last, &function] { parallel_for(first, first + middle_index, function); };
[first, middle_index, last, &function] { internal::for_each(first, first + middle_index, function); };
using first_half_t = lambda_task_by_reference<decltype(first_half_body)>;
scheduler::spawn_child_and_wait<first_half_t>(std::move(first_half_body));
}
}
}
template<typename Function>
void for_each_range(size_t first, size_t last, const Function &function) {
auto range = boost::irange(first, last);
internal::for_each(range.begin(), range.end(), function);
}
template<typename RandomIt, typename Function>
void for_each(RandomIt first, RandomIt last, const Function &function) {
internal::for_each(first, last, function);
}
}
}
#endif //PLS_INVOKE_PARALLEL_IMPL_H
......@@ -9,15 +9,15 @@ namespace pls {
namespace algorithm {
template<typename Function1, typename Function2>
void invoke_parallel(const Function1 &function1, const Function2 &function2);
void invoke(const Function1 &function1, const Function2 &function2);
template<typename Function1, typename Function2, typename Function3>
void invoke_parallel(const Function1 &function1, const Function2 &function2, const Function3 &function3);
void invoke(const Function1 &function1, const Function2 &function2, const Function3 &function3);
// ...and so on, add more if we decide to keep this design
}
}
#include "invoke_parallel_impl.h"
#include "invoke_impl.h"
#endif //PLS_PARALLEL_INVOKE_H
......@@ -11,7 +11,7 @@ namespace pls {
namespace algorithm {
template<typename Function1, typename Function2>
void invoke_parallel(Function1 &&function1, Function2 &&function2) {
void invoke(Function1 &&function1, Function2 &&function2) {
using namespace ::pls::internal::scheduling;
using task_1_t = lambda_task_by_value<Function1>;
......@@ -22,7 +22,7 @@ void invoke_parallel(Function1 &&function1, Function2 &&function2) {
}
template<typename Function1, typename Function2, typename Function3>
void invoke_parallel(Function1 &&function1, Function2 &&function2, Function3 &&function3) {
void invoke(Function1 &&function1, Function2 &&function2, Function3 &&function3) {
using namespace ::pls::internal::scheduling;
using task_1_t = lambda_task_by_value<Function1>;
......
......@@ -6,10 +6,10 @@ namespace pls {
namespace algorithm {
template<typename InIter, typename OutIter, typename BinaryOp, typename Type>
void parallel_scan(InIter in_start, const InIter in_end, OutIter out, BinaryOp op, Type neutral_elem);
void scan(InIter in_start, const InIter in_end, OutIter out, BinaryOp op, Type neutral_elem);
}
}
#include "parallel_scan_impl.h"
#include "scan_impl.h"
#endif //PLS_PARALLEL_SCAN_H_
......@@ -28,7 +28,7 @@ void serial_scan(InIter input_start, const InIter input_end, OutIter output, Bin
}
template<typename InIter, typename OutIter, typename BinaryOp, typename Type>
void parallel_scan(InIter in_start, const InIter in_end, OutIter out, BinaryOp op, Type neutral_elem) {
void scan(InIter in_start, const InIter in_end, OutIter out, BinaryOp op, Type neutral_elem) {
constexpr auto chunks_per_thread = 4;
using namespace pls::internal::scheduling;
......@@ -43,7 +43,7 @@ void parallel_scan(InIter in_start, const InIter in_end, OutIter out, BinaryOp o
Type *chunk_sums = reinterpret_cast<Type *>(memory);
// First Pass = calculate each chunks individual prefix sum
parallel_for(0, chunks, [&](int i) {
for_each_range(0, chunks, [&](int i) {
auto chunk_start = in_start + items_per_chunk * i;
auto chunk_end = std::min(in_end, chunk_start + items_per_chunk);
auto chunk_output = out + items_per_chunk * i;
......@@ -59,7 +59,7 @@ void parallel_scan(InIter in_start, const InIter in_end, OutIter out, BinaryOp o
// Second Pass = Use results from first pass to correct each chunks sum
auto output_start = out;
auto output_end = out + size;
parallel_for(1, chunks, [&](int i) {
for_each_range(1, chunks, [&](int i) {
auto chunk_start = output_start + items_per_chunk * i;
auto chunk_end = std::min(output_end, chunk_start + items_per_chunk);
......
#ifndef PLS_LIBRARY_H
#define PLS_LIBRARY_H
#include "pls/algorithms/invoke_parallel.h"
#include "pls/algorithms/parallel_for.h"
#include "pls/algorithms/parallel_scan.h"
#include "pls/algorithms/invoke.h"
#include "pls/algorithms/for_each.h"
#include "pls/algorithms/scan.h"
#include "pls/internal/scheduling/task.h"
#include "pls/internal/scheduling/scheduler.h"
#include "pls/internal/helpers/unique_id.h"
......@@ -22,9 +22,9 @@ using internal::scheduling::lambda_task_by_reference;
using internal::scheduling::lambda_task_by_value;
using internal::scheduling::task;
using algorithm::invoke_parallel;
using algorithm::parallel_for;
using algorithm::parallel_scan;
using algorithm::invoke;
using algorithm::for_each;
using algorithm::scan;
}
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment