Commit 7c227cd8 by FritzFlorian

Unify interface of algorithms.

Now all algorithms are used without the parallel pre-/suffix and the for_each method has an specialization for integer ranges.
parent 9cf034e4
Pipeline #1265 passed with stages
in 3 minutes 59 seconds
...@@ -49,7 +49,7 @@ void fft(complex_vector::iterator data, int n) { ...@@ -49,7 +49,7 @@ void fft(complex_vector::iterator data, int n) {
fft(data, n / 2); fft(data, n / 2);
fft(data + n / 2, n / 2); fft(data + n / 2, n / 2);
} else { } else {
pls::invoke_parallel( pls::invoke(
[&] { fft(data, n / 2); }, [&] { fft(data, n / 2); },
[&] { fft(data + n / 2, n / 2); } [&] { fft(data + n / 2, n / 2); }
); );
......
...@@ -9,12 +9,12 @@ class matrix { ...@@ -9,12 +9,12 @@ class matrix {
public: public:
T data[SIZE][SIZE]; T data[SIZE][SIZE];
matrix(T i = 1) { explicit matrix(T i = 1) {
std::fill(&data[0][0], &data[0][0] + SIZE * SIZE, i); std::fill(&data[0][0], &data[0][0] + SIZE * SIZE, i);
} }
void multiply(const matrix<T, SIZE> &a, const matrix<T, SIZE> &b) { void multiply(const matrix<T, SIZE> &a, const matrix<T, SIZE> &b) {
pls::algorithm::parallel_for(0, SIZE, [&](int i) { pls::algorithm::for_each_range(0, SIZE, [&](int i) {
this->multiply_column(i, a, b); this->multiply_column(i, a, b);
}); });
} }
......
...@@ -18,7 +18,7 @@ int main() { ...@@ -18,7 +18,7 @@ int main() {
} }
pls::internal::helpers::run_mini_benchmark([&] { pls::internal::helpers::run_mini_benchmark([&] {
pls::parallel_scan(vec.begin(), vec.end(), out.begin(), std::plus<double>(), 0.0); pls::scan(vec.begin(), vec.end(), out.begin(), std::plus<double>(), 0.0);
}, 8, 1000); }, 8, 1000);
PROFILE_SAVE("test_profile.prof") PROFILE_SAVE("test_profile.prof")
......
...@@ -22,9 +22,11 @@ int count_child_nodes(uts::node &node) { ...@@ -22,9 +22,11 @@ int count_child_nodes(uts::node &node) {
std::vector<int> results(children.size()); std::vector<int> results(children.size());
for (size_t i = 0; i < children.size(); i++) { for (size_t i = 0; i < children.size(); i++) {
size_t index = i; size_t index = i;
auto lambda = [&, index] { results[index] = count_child_nodes(children[index]); }; auto lambda = [&, index] {
pls::lambda_task_by_value<typeof(lambda)> sub_task(lambda); results[index] = count_child_nodes(children[index]);
pls::scheduler::spawn_child(sub_task); };
using child_type = pls::lambda_task_by_value<typeof(lambda)>;
pls::scheduler::spawn_child<child_type>(lambda);
} }
pls::scheduler::wait_for_all(); pls::scheduler::wait_for_all();
for (auto result : results) { for (auto result : results) {
...@@ -41,8 +43,8 @@ int unbalanced_tree_search(int seed, int root_children, double q, int normal_chi ...@@ -41,8 +43,8 @@ int unbalanced_tree_search(int seed, int root_children, double q, int normal_chi
uts::node root(seed, root_children, q, normal_children); uts::node root(seed, root_children, q, normal_children);
result = count_child_nodes(root); result = count_child_nodes(root);
}; };
pls::lambda_task_by_reference<typeof(lambda)> sub_task(lambda); using child_type = pls::lambda_task_by_reference<typeof(lambda)>;
pls::scheduler::spawn_child(sub_task); pls::scheduler::spawn_child<child_type>(lambda);
pls::scheduler::wait_for_all(); pls::scheduler::wait_for_all();
return result; return result;
......
...@@ -54,7 +54,7 @@ void fft(complex_vector::iterator data, int n) { ...@@ -54,7 +54,7 @@ void fft(complex_vector::iterator data, int n) {
fft(data, n / 2); fft(data, n / 2);
fft(data + n / 2, n / 2); fft(data + n / 2, n / 2);
} else { } else {
pls::invoke_parallel( pls::invoke(
[&] { fft(data, n / 2); }, [&] { fft(data, n / 2); },
[&] { fft(data + n / 2, n / 2); } [&] { fft(data + n / 2, n / 2); }
); );
......
...@@ -10,5 +10,7 @@ ...@@ -10,5 +10,7 @@
#include <pls/pls.h> #include <pls/pls.h>
int main() { int main() {
auto range = boost::irange(0, 10);
bool test = std::is_integral<typename std::remove_reference<decltype(range.begin())>::type>::value;
std::cout << test << std::endl;
} }
...@@ -2,12 +2,12 @@ ...@@ -2,12 +2,12 @@
add_library(pls STATIC add_library(pls STATIC
include/pls/pls.h src/pls.cpp include/pls/pls.h src/pls.cpp
include/pls/algorithms/invoke_parallel.h include/pls/algorithms/invoke.h
include/pls/algorithms/invoke_parallel_impl.h include/pls/algorithms/invoke_impl.h
include/pls/algorithms/parallel_for.h include/pls/algorithms/for_each.h
include/pls/algorithms/parallel_for_impl.h include/pls/algorithms/for_each_impl.h
include/pls/algorithms/parallel_scan.h include/pls/algorithms/scan.h
include/pls/algorithms/parallel_scan_impl.h include/pls/algorithms/scan_impl.h
include/pls/internal/base/spin_lock.h include/pls/internal/base/spin_lock.h
include/pls/internal/base/tas_spin_lock.h src/internal/base/tas_spin_lock.cpp include/pls/internal/base/tas_spin_lock.h src/internal/base/tas_spin_lock.cpp
......
...@@ -2,23 +2,17 @@ ...@@ -2,23 +2,17 @@
#ifndef PLS_PARALLEL_FOR_H #ifndef PLS_PARALLEL_FOR_H
#define PLS_PARALLEL_FOR_H #define PLS_PARALLEL_FOR_H
// TODO: Replace with own integer iterator to remove dependency
#include <boost/range/irange.hpp>
namespace pls { namespace pls {
namespace algorithm { namespace algorithm {
template<typename RandomIt, typename Function>
void parallel_for(RandomIt first, RandomIt last, const Function &function);
template<typename Function> template<typename Function>
void parallel_for(size_t first, size_t last, const Function &function) { void for_each_range(size_t first, size_t last, const Function &function);
auto range = boost::irange(first, last);
parallel_for(range.begin(), range.end(), function); template<typename RandomIt, typename Function>
} void for_each(RandomIt first, RandomIt last, const Function &function);
} }
} }
#include "parallel_for_impl.h" #include "for_each_impl.h"
#endif //PLS_PARALLEL_FOR_H #endif //PLS_PARALLEL_FOR_H
...@@ -7,11 +7,15 @@ ...@@ -7,11 +7,15 @@
#include "pls/internal/helpers/unique_id.h" #include "pls/internal/helpers/unique_id.h"
// TODO: Replace with own integer iterator to remove dependency
#include <boost/range/irange.hpp>
namespace pls { namespace pls {
namespace algorithm { namespace algorithm {
namespace internal {
template<typename RandomIt, typename Function> template<typename RandomIt, typename Function>
void parallel_for(RandomIt first, RandomIt last, const Function &function) { void for_each(RandomIt first, RandomIt last, const Function &function) {
using namespace ::pls::internal::scheduling; using namespace ::pls::internal::scheduling;
constexpr long min_elements = 4; constexpr long min_elements = 4;
...@@ -26,18 +30,31 @@ void parallel_for(RandomIt first, RandomIt last, const Function &function) { ...@@ -26,18 +30,31 @@ void parallel_for(RandomIt first, RandomIt last, const Function &function) {
long middle_index = num_elements / 2; long middle_index = num_elements / 2;
auto second_half_body = auto second_half_body =
[first, middle_index, last, &function] { parallel_for(first + middle_index, last, function); }; [first, middle_index, last, &function] { internal::for_each(first + middle_index, last, function); };
using second_half_t = lambda_task_by_reference<decltype(second_half_body)>; using second_half_t = lambda_task_by_reference<decltype(second_half_body)>;
scheduler::spawn_child<second_half_t>(std::move(second_half_body)); scheduler::spawn_child<second_half_t>(std::move(second_half_body));
auto first_half_body = auto first_half_body =
[first, middle_index, last, &function] { parallel_for(first, first + middle_index, function); }; [first, middle_index, last, &function] { internal::for_each(first, first + middle_index, function); };
using first_half_t = lambda_task_by_reference<decltype(first_half_body)>; using first_half_t = lambda_task_by_reference<decltype(first_half_body)>;
scheduler::spawn_child_and_wait<first_half_t>(std::move(first_half_body)); scheduler::spawn_child_and_wait<first_half_t>(std::move(first_half_body));
} }
} }
} }
template<typename Function>
void for_each_range(size_t first, size_t last, const Function &function) {
auto range = boost::irange(first, last);
internal::for_each(range.begin(), range.end(), function);
}
template<typename RandomIt, typename Function>
void for_each(RandomIt first, RandomIt last, const Function &function) {
internal::for_each(first, last, function);
}
}
} }
#endif //PLS_INVOKE_PARALLEL_IMPL_H #endif //PLS_INVOKE_PARALLEL_IMPL_H
...@@ -9,15 +9,15 @@ namespace pls { ...@@ -9,15 +9,15 @@ namespace pls {
namespace algorithm { namespace algorithm {
template<typename Function1, typename Function2> template<typename Function1, typename Function2>
void invoke_parallel(const Function1 &function1, const Function2 &function2); void invoke(const Function1 &function1, const Function2 &function2);
template<typename Function1, typename Function2, typename Function3> template<typename Function1, typename Function2, typename Function3>
void invoke_parallel(const Function1 &function1, const Function2 &function2, const Function3 &function3); void invoke(const Function1 &function1, const Function2 &function2, const Function3 &function3);
// ...and so on, add more if we decide to keep this design // ...and so on, add more if we decide to keep this design
} }
} }
#include "invoke_parallel_impl.h" #include "invoke_impl.h"
#endif //PLS_PARALLEL_INVOKE_H #endif //PLS_PARALLEL_INVOKE_H
...@@ -11,7 +11,7 @@ namespace pls { ...@@ -11,7 +11,7 @@ namespace pls {
namespace algorithm { namespace algorithm {
template<typename Function1, typename Function2> template<typename Function1, typename Function2>
void invoke_parallel(Function1 &&function1, Function2 &&function2) { void invoke(Function1 &&function1, Function2 &&function2) {
using namespace ::pls::internal::scheduling; using namespace ::pls::internal::scheduling;
using task_1_t = lambda_task_by_value<Function1>; using task_1_t = lambda_task_by_value<Function1>;
...@@ -22,7 +22,7 @@ void invoke_parallel(Function1 &&function1, Function2 &&function2) { ...@@ -22,7 +22,7 @@ void invoke_parallel(Function1 &&function1, Function2 &&function2) {
} }
template<typename Function1, typename Function2, typename Function3> template<typename Function1, typename Function2, typename Function3>
void invoke_parallel(Function1 &&function1, Function2 &&function2, Function3 &&function3) { void invoke(Function1 &&function1, Function2 &&function2, Function3 &&function3) {
using namespace ::pls::internal::scheduling; using namespace ::pls::internal::scheduling;
using task_1_t = lambda_task_by_value<Function1>; using task_1_t = lambda_task_by_value<Function1>;
......
...@@ -6,10 +6,10 @@ namespace pls { ...@@ -6,10 +6,10 @@ namespace pls {
namespace algorithm { namespace algorithm {
template<typename InIter, typename OutIter, typename BinaryOp, typename Type> template<typename InIter, typename OutIter, typename BinaryOp, typename Type>
void parallel_scan(InIter in_start, const InIter in_end, OutIter out, BinaryOp op, Type neutral_elem); void scan(InIter in_start, const InIter in_end, OutIter out, BinaryOp op, Type neutral_elem);
} }
} }
#include "parallel_scan_impl.h" #include "scan_impl.h"
#endif //PLS_PARALLEL_SCAN_H_ #endif //PLS_PARALLEL_SCAN_H_
...@@ -28,7 +28,7 @@ void serial_scan(InIter input_start, const InIter input_end, OutIter output, Bin ...@@ -28,7 +28,7 @@ void serial_scan(InIter input_start, const InIter input_end, OutIter output, Bin
} }
template<typename InIter, typename OutIter, typename BinaryOp, typename Type> template<typename InIter, typename OutIter, typename BinaryOp, typename Type>
void parallel_scan(InIter in_start, const InIter in_end, OutIter out, BinaryOp op, Type neutral_elem) { void scan(InIter in_start, const InIter in_end, OutIter out, BinaryOp op, Type neutral_elem) {
constexpr auto chunks_per_thread = 4; constexpr auto chunks_per_thread = 4;
using namespace pls::internal::scheduling; using namespace pls::internal::scheduling;
...@@ -43,7 +43,7 @@ void parallel_scan(InIter in_start, const InIter in_end, OutIter out, BinaryOp o ...@@ -43,7 +43,7 @@ void parallel_scan(InIter in_start, const InIter in_end, OutIter out, BinaryOp o
Type *chunk_sums = reinterpret_cast<Type *>(memory); Type *chunk_sums = reinterpret_cast<Type *>(memory);
// First Pass = calculate each chunks individual prefix sum // First Pass = calculate each chunks individual prefix sum
parallel_for(0, chunks, [&](int i) { for_each_range(0, chunks, [&](int i) {
auto chunk_start = in_start + items_per_chunk * i; auto chunk_start = in_start + items_per_chunk * i;
auto chunk_end = std::min(in_end, chunk_start + items_per_chunk); auto chunk_end = std::min(in_end, chunk_start + items_per_chunk);
auto chunk_output = out + items_per_chunk * i; auto chunk_output = out + items_per_chunk * i;
...@@ -59,7 +59,7 @@ void parallel_scan(InIter in_start, const InIter in_end, OutIter out, BinaryOp o ...@@ -59,7 +59,7 @@ void parallel_scan(InIter in_start, const InIter in_end, OutIter out, BinaryOp o
// Second Pass = Use results from first pass to correct each chunks sum // Second Pass = Use results from first pass to correct each chunks sum
auto output_start = out; auto output_start = out;
auto output_end = out + size; auto output_end = out + size;
parallel_for(1, chunks, [&](int i) { for_each_range(1, chunks, [&](int i) {
auto chunk_start = output_start + items_per_chunk * i; auto chunk_start = output_start + items_per_chunk * i;
auto chunk_end = std::min(output_end, chunk_start + items_per_chunk); auto chunk_end = std::min(output_end, chunk_start + items_per_chunk);
......
#ifndef PLS_LIBRARY_H #ifndef PLS_LIBRARY_H
#define PLS_LIBRARY_H #define PLS_LIBRARY_H
#include "pls/algorithms/invoke_parallel.h" #include "pls/algorithms/invoke.h"
#include "pls/algorithms/parallel_for.h" #include "pls/algorithms/for_each.h"
#include "pls/algorithms/parallel_scan.h" #include "pls/algorithms/scan.h"
#include "pls/internal/scheduling/task.h" #include "pls/internal/scheduling/task.h"
#include "pls/internal/scheduling/scheduler.h" #include "pls/internal/scheduling/scheduler.h"
#include "pls/internal/helpers/unique_id.h" #include "pls/internal/helpers/unique_id.h"
...@@ -22,9 +22,9 @@ using internal::scheduling::lambda_task_by_reference; ...@@ -22,9 +22,9 @@ using internal::scheduling::lambda_task_by_reference;
using internal::scheduling::lambda_task_by_value; using internal::scheduling::lambda_task_by_value;
using internal::scheduling::task; using internal::scheduling::task;
using algorithm::invoke_parallel; using algorithm::invoke;
using algorithm::parallel_for; using algorithm::for_each;
using algorithm::parallel_scan; using algorithm::scan;
} }
#endif #endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment