Commit 1ed7535d by Tobias Fuchs

algorithms_cpp: minimized amount of tasks spawned recursively in ForEach, Reduce, Scan.

parent 7a2f4425
...@@ -41,6 +41,8 @@ namespace internal { ...@@ -41,6 +41,8 @@ namespace internal {
template<typename RAI, typename Function> template<typename RAI, typename Function>
class ForEachFunctor { class ForEachFunctor {
private:
typedef ForEachFunctor<RAI, Function> self_t;
public: public:
/** /**
* Constructs a for-each functor with arguments. * Constructs a for-each functor with arguments.
...@@ -51,29 +53,33 @@ class ForEachFunctor { ...@@ -51,29 +53,33 @@ class ForEachFunctor {
block_size_(block_size) { block_size_(block_size) {
} }
void Action(mtapi::TaskContext&) { void Action(mtapi::TaskContext& context) {
size_t distance = static_cast<size_t>(std::distance(first_, last_)); size_t distance = static_cast<size_t>(std::distance(first_, last_));
if (distance == 0) return; if (distance == 0) return;
if (distance <= block_size_) { // leaf case -> do work if (distance <= block_size_) { // leaf case -> do work
for (RAI curIter(first_); curIter != last_; ++curIter) { for (RAI it = first_; it != last_; ++it) {
unary_(*curIter); unary_(*it);
} }
} else { // recurse further } else { // recurse further
ChunkPartitioner<RAI> partitioner(first_, last_, 2); ChunkPartitioner<RAI> partitioner(first_, last_, 2);
ForEachFunctor<RAI, Function> functorL(partitioner[0].GetFirst(), ChunkDescriptor<RAI> chunk_l = partitioner[0];
partitioner[0].GetLast(), unary_, policy_, block_size_); ChunkDescriptor<RAI> chunk_r = partitioner[1];
ForEachFunctor<RAI, Function> functorR(partitioner[1].GetFirst(), self_t functor_l(chunk_l.GetFirst(),
partitioner[1].GetLast(), unary_, policy_, block_size_); chunk_l.GetLast(),
unary_, policy_, block_size_);
mtapi::Node& node = mtapi::Node::GetInstance(); self_t functor_r(chunk_r.GetFirst(),
mtapi::Task taskL = node.Spawn(mtapi::Action(base::MakeFunction( chunk_r.GetLast(),
functorL, &ForEachFunctor<RAI, Function>::Action), unary_, policy_, block_size_);
// Spawn tasks for right partition first:
mtapi::Task task_r = mtapi::Node::GetInstance().Spawn(
mtapi::Action(
base::MakeFunction(
functor_r, &ForEachFunctor<RAI, Function>::Action),
policy_)); policy_));
mtapi::Task taskR = node.Spawn(mtapi::Action(base::MakeFunction( // Recurse on left partition:
functorR, &ForEachFunctor<RAI, Function>::Action), functor_l.Action(context);
policy_)); // Wait for tasks on right partition to complete:
taskL.Wait(MTAPI_INFINITE); task_r.Wait(MTAPI_INFINITE);
taskR.Wait(MTAPI_INFINITE);
} }
} }
...@@ -95,7 +101,9 @@ void ForEachRecursive(RAI first, RAI last, Function unary, ...@@ -95,7 +101,9 @@ void ForEachRecursive(RAI first, RAI last, Function unary,
const embb::mtapi::ExecutionPolicy& policy, size_t block_size) { const embb::mtapi::ExecutionPolicy& policy, size_t block_size) {
typedef typename std::iterator_traits<RAI>::difference_type difference_type; typedef typename std::iterator_traits<RAI>::difference_type difference_type;
difference_type distance = std::distance(first, last); difference_type distance = std::distance(first, last);
assert(distance > 0); if (distance == 0) {
EMBB_THROW(embb::base::ErrorException, "Distance for ForEach is 0");
}
mtapi::Node& node = mtapi::Node::GetInstance(); mtapi::Node& node = mtapi::Node::GetInstance();
// Determine actually used block size // Determine actually used block size
if (block_size == 0) { if (block_size == 0) {
...@@ -127,7 +135,7 @@ void ForEachIteratorCheck(RAI first, RAI last, Function unary, ...@@ -127,7 +135,7 @@ void ForEachIteratorCheck(RAI first, RAI last, Function unary,
} // namespace internal } // namespace internal
template<typename RAI, typename Function> template<typename RAI, typename Function>
void ForEach(RAI first, RAI last, Function unary, void ForEach(RAI first, const RAI last, Function unary,
const embb::mtapi::ExecutionPolicy& policy, size_t block_size) { const embb::mtapi::ExecutionPolicy& policy, size_t block_size) {
typename std::iterator_traits<RAI>::iterator_category category; typename std::iterator_traits<RAI>::iterator_category category;
internal::ForEachIteratorCheck(first, last, unary, policy, block_size, internal::ForEachIteratorCheck(first, last, unary, policy, block_size,
......
...@@ -71,8 +71,8 @@ const ChunkDescriptor<ForwardIterator> ...@@ -71,8 +71,8 @@ const ChunkDescriptor<ForwardIterator>
ForwardIterator last_new = first_new; ForwardIterator last_new = first_new;
if (index == elements_count / chunkSize) { if (index >= chunks - 1) {
std::advance(last_new, elements_count % chunkSize); last_new = last;
} else { } else {
std::advance(last_new, chunkSize); std::advance(last_new, chunkSize);
} }
......
...@@ -53,7 +53,7 @@ class ReduceFunctor { ...@@ -53,7 +53,7 @@ class ReduceFunctor {
block_size_(block_size), result_(result) { block_size_(block_size), result_(result) {
} }
void Action(mtapi::TaskContext&) { void Action(mtapi::TaskContext& context) {
if (first_ == last_) { if (first_ == last_) {
return; return;
} }
...@@ -66,22 +66,26 @@ class ReduceFunctor { ...@@ -66,22 +66,26 @@ class ReduceFunctor {
result_ = result; result_ = result;
} else { // recurse further } else { // recurse further
internal::ChunkPartitioner<RAI> partitioner(first_, last_, 2); internal::ChunkPartitioner<RAI> partitioner(first_, last_, 2);
ChunkDescriptor<RAI> chunk_l = partitioner[0];
ChunkDescriptor<RAI> chunk_r = partitioner[1];
ReturnType result_l(neutral_); ReturnType result_l(neutral_);
ReturnType result_r(neutral_); ReturnType result_r(neutral_);
ReduceFunctor functor_l(partitioner[0].GetFirst(), ReduceFunctor functor_l(chunk_l.GetFirst(),
partitioner[0].GetLast(), chunk_l.GetLast(),
neutral_, reduction_, transformation_, policy_, neutral_, reduction_, transformation_, policy_,
block_size_, result_l); block_size_, result_l);
ReduceFunctor functor_r(partitioner[1].GetFirst(), ReduceFunctor functor_r(chunk_r.GetFirst(),
partitioner[1].GetLast(), chunk_r.GetLast(),
neutral_, reduction_, transformation_, policy_, neutral_, reduction_, transformation_, policy_,
block_size_, result_r); block_size_, result_r);
mtapi::Node& node = mtapi::Node::GetInstance(); // Spawn tasks for right partition first:
mtapi::Task task_l = node.Spawn(mtapi::Action(base::MakeFunction( mtapi::Task task_r = mtapi::Node::GetInstance().Spawn(
functor_l, &ReduceFunctor::Action), policy_)); mtapi::Action(base::MakeFunction(
mtapi::Task task_r = node.Spawn(mtapi::Action(base::MakeFunction( functor_r, &ReduceFunctor::Action),
functor_r, &ReduceFunctor::Action), policy_)); policy_));
task_l.Wait(MTAPI_INFINITE); // Recurse on left partition:
functor_l.Action(context);
// Wait for tasks on right partition to complete:
task_r.Wait(MTAPI_INFINITE); task_r.Wait(MTAPI_INFINITE);
result_ = reduction_(result_l, result_r); result_ = reduction_(result_l, result_r);
} }
...@@ -97,6 +101,9 @@ class ReduceFunctor { ...@@ -97,6 +101,9 @@ class ReduceFunctor {
size_t block_size_; size_t block_size_;
ReturnType& result_; ReturnType& result_;
/**
* Disables assignment and copy-construction.
*/
ReduceFunctor& operator=(const ReduceFunctor&); ReduceFunctor& operator=(const ReduceFunctor&);
ReduceFunctor(const ReduceFunctor&); ReduceFunctor(const ReduceFunctor&);
}; };
...@@ -110,21 +117,22 @@ ReturnType ReduceRecursive(RAI first, RAI last, ReturnType neutral, ...@@ -110,21 +117,22 @@ ReturnType ReduceRecursive(RAI first, RAI last, ReturnType neutral,
size_t block_size) { size_t block_size) {
typedef typename std::iterator_traits<RAI>::difference_type difference_type; typedef typename std::iterator_traits<RAI>::difference_type difference_type;
difference_type distance = std::distance(first, last); difference_type distance = std::distance(first, last);
assert(distance > 0); if (distance == 0) {
EMBB_THROW(embb::base::ErrorException, "Distance for Reduce is 0");
}
// Determine actually used block size
mtapi::Node& node = mtapi::Node::GetInstance(); mtapi::Node& node = mtapi::Node::GetInstance();
size_t used_block_size = block_size; size_t used_block_size = block_size;
if (used_block_size == 0) { if (used_block_size == 0) {
used_block_size = static_cast<size_t>(distance) / node.GetCoreCount(); used_block_size = static_cast<size_t>(distance) / node.GetCoreCount();
if (used_block_size == 0) used_block_size = 1; if (used_block_size == 0) used_block_size = 1;
} }
// Perform check of task number sufficiency
if (((distance / used_block_size) * 2) + 1 > MTAPI_NODE_MAX_TASKS_DEFAULT) { if (((distance / used_block_size) * 2) + 1 > MTAPI_NODE_MAX_TASKS_DEFAULT) {
EMBB_THROW(embb::base::ErrorException, EMBB_THROW(embb::base::ErrorException,
"Number of computation tasks required in reduction would " "Number of computation tasks required in reduction would "
"exceed MTAPI maximum number of tasks"); "exceed MTAPI maximum number of tasks");
} }
ReturnType result = neutral; ReturnType result = neutral;
typedef ReduceFunctor<RAI, ReturnType, ReductionFunction, typedef ReduceFunctor<RAI, ReturnType, ReductionFunction,
TransformationFunction> Functor; TransformationFunction> Functor;
......
...@@ -54,7 +54,7 @@ class ScanFunctor { ...@@ -54,7 +54,7 @@ class ScanFunctor {
node_id_(node_id), parent_value_(neutral), is_first_pass_(going_down) { node_id_(node_id), parent_value_(neutral), is_first_pass_(going_down) {
} }
void Action(mtapi::TaskContext&) { void Action(mtapi::TaskContext& context) {
if (first_ == last_) { if (first_ == last_) {
return; return;
} }
...@@ -67,23 +67,19 @@ class ScanFunctor { ...@@ -67,23 +67,19 @@ class ScanFunctor {
*iter_out = result; *iter_out = result;
++iter_in; ++iter_in;
++iter_out; ++iter_out;
while (iter_in != last_) { for (; iter_in != last_; ++iter_in, ++iter_out) {
result = scan_(result, transformation_(*iter_in)); result = scan_(result, transformation_(*iter_in));
*iter_out = result; *iter_out = result;
++iter_in;
++iter_out;
} }
SetTreeValue(result); SetTreeValue(result);
} else { // Second pass } else { // Second pass
RAIIn iter_in = first_; RAIIn iter_in = first_;
RAIOut iter_out = output_iterator_; RAIOut iter_out = output_iterator_;
while (iter_in != last_) { for (; iter_in != last_; ++iter_in, ++iter_out) {
*iter_out = scan_(parent_value_, *iter_out); *iter_out = scan_(parent_value_, *iter_out);
++iter_in;
++iter_out;
} }
} }
} else { } else { // recurse further
internal::ChunkPartitioner<RAIIn> partitioner(first_, last_, 2); internal::ChunkPartitioner<RAIIn> partitioner(first_, last_, 2);
ScanFunctor functor_l(partitioner[0].GetFirst(), partitioner[0].GetLast(), ScanFunctor functor_l(partitioner[0].GetFirst(), partitioner[0].GetLast(),
output_iterator_, neutral_, scan_, transformation_, output_iterator_, neutral_, scan_, transformation_,
...@@ -102,13 +98,13 @@ class ScanFunctor { ...@@ -102,13 +98,13 @@ class ScanFunctor {
functor_r.parent_value_ = functor_l.GetTreeValue() + parent_value_; functor_r.parent_value_ = functor_l.GetTreeValue() + parent_value_;
} }
mtapi::Node& node = mtapi::Node::GetInstance(); mtapi::Node& node = mtapi::Node::GetInstance();
mtapi::Task task_l = node.Spawn(mtapi::Action(base::MakeFunction( // Spawn tasks for right partition first:
functor_l, &ScanFunctor::Action), mtapi::Task task_r = node.Spawn(mtapi::Action(base::MakeFunction(
functor_r, &ScanFunctor::Action),
policy_)); policy_));
mtapi::Task task_r = node.Spawn(mtapi::Action(base::MakeFunction( // Recurse on left partition:
functor_r, &ScanFunctor::Action), functor_l.Action(context);
policy_)); // Wait for tasks on right partition to complete:
task_l.Wait(MTAPI_INFINITE);
task_r.Wait(MTAPI_INFINITE); task_r.Wait(MTAPI_INFINITE);
SetTreeValue(scan_(functor_l.GetTreeValue(), functor_r.GetTreeValue())); SetTreeValue(scan_(functor_l.GetTreeValue(), functor_r.GetTreeValue()));
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment