Commit 8a46bcb3 by Tobias Fuchs

algorithms_cpp: using block size partitioner in Scan

parent 9a3b71bb
...@@ -50,7 +50,7 @@ class ForEachFunctor { ...@@ -50,7 +50,7 @@ class ForEachFunctor {
ForEachFunctor(size_t chunk_first, size_t chunk_last, Function unary, ForEachFunctor(size_t chunk_first, size_t chunk_last, Function unary,
const embb::mtapi::ExecutionPolicy& policy, const embb::mtapi::ExecutionPolicy& policy,
const BlockSizePartitioner<RAI>& partitioner) const BlockSizePartitioner<RAI>& partitioner)
: chunk_first_(chunk_first), chunk_last_(chunk_last), : chunk_first_(chunk_first), chunk_last_(chunk_last),
unary_(unary), policy_(policy), partitioner_(partitioner) { unary_(unary), policy_(policy), partitioner_(partitioner) {
} }
...@@ -76,13 +76,11 @@ class ForEachFunctor { ...@@ -76,13 +76,11 @@ class ForEachFunctor {
unary_, policy_, partitioner_); unary_, policy_, partitioner_);
mtapi::Task task_l = mtapi::Node::GetInstance().Spawn( mtapi::Task task_l = mtapi::Node::GetInstance().Spawn(
mtapi::Action( mtapi::Action(
base::MakeFunction( base::MakeFunction(functor_l, &self_t::Action),
functor_l, &self_t::Action),
policy_)); policy_));
mtapi::Task task_r = mtapi::Node::GetInstance().Spawn( mtapi::Task task_r = mtapi::Node::GetInstance().Spawn(
mtapi::Action( mtapi::Action(
base::MakeFunction( base::MakeFunction(functor_r, &self_t::Action),
functor_r, &self_t::Action),
policy_)); policy_));
task_l.Wait(MTAPI_INFINITE); task_l.Wait(MTAPI_INFINITE);
task_r.Wait(MTAPI_INFINITE); task_r.Wait(MTAPI_INFINITE);
...@@ -120,8 +118,8 @@ void ForEachRecursive(RAI first, RAI last, Function unary, ...@@ -120,8 +118,8 @@ void ForEachRecursive(RAI first, RAI last, Function unary,
} }
// Perform check of task number sufficiency // Perform check of task number sufficiency
if (((distance / block_size) * 2) + 1 > MTAPI_NODE_MAX_TASKS_DEFAULT) { if (((distance / block_size) * 2) + 1 > MTAPI_NODE_MAX_TASKS_DEFAULT) {
EMBB_THROW(embb::base::ErrorException, "Not enough MTAPI tasks available " EMBB_THROW(embb::base::ErrorException,
"to perform the parallel foreach loop"); "Not enough MTAPI tasks available for parallel foreach");
} }
BlockSizePartitioner<RAI> partitioner(first, last, block_size); BlockSizePartitioner<RAI> partitioner(first, last, block_size);
......
...@@ -41,70 +41,86 @@ template<typename RAIIn, typename RAIOut, typename ReturnType, ...@@ -41,70 +41,86 @@ template<typename RAIIn, typename RAIOut, typename ReturnType,
typename ScanFunction, typename TransformationFunction> typename ScanFunction, typename TransformationFunction>
class ScanFunctor { class ScanFunctor {
public: public:
ScanFunctor(RAIIn first, RAIIn last, RAIOut output_iterator, ScanFunctor(size_t chunk_first, size_t chunk_last, RAIOut output_iterator,
ReturnType neutral, ScanFunction scan, ReturnType neutral, ScanFunction scan,
TransformationFunction transformation, TransformationFunction transformation,
const embb::mtapi::ExecutionPolicy& policy, const embb::mtapi::ExecutionPolicy& policy,
size_t block_size, ReturnType* tree_values, size_t node_id, const BlockSizePartitioner<RAIIn>& partitioner,
ReturnType* tree_values, size_t node_id,
bool going_down) bool going_down)
: policy_(policy), first_(first), last_(last), : policy_(policy), chunk_first_(chunk_first), chunk_last_(chunk_last),
output_iterator_(output_iterator), scan_(scan), output_iterator_(output_iterator), scan_(scan),
transformation_(transformation), transformation_(transformation),
neutral_(neutral), block_size_(block_size), tree_values_(tree_values), neutral_(neutral), partitioner_(partitioner), tree_values_(tree_values),
node_id_(node_id), parent_value_(neutral), is_first_pass_(going_down) { node_id_(node_id), parent_value_(neutral), is_first_pass_(going_down) {
} }
void Action(mtapi::TaskContext& context) { void Action(mtapi::TaskContext&) {
if (first_ == last_) { if (chunk_first_ == chunk_last_) {
return; // leaf case -> do work
}
size_t distance = static_cast<size_t>(std::distance(first_, last_));
if (distance <= block_size_) { // leaf case -> do work
if (is_first_pass_) { if (is_first_pass_) {
RAIIn iter_in = first_; ChunkDescriptor<RAIIn> chunk = partitioner_[chunk_first_];
RAIIn iter_in = chunk.GetFirst();
RAIIn last_in = chunk.GetLast();
RAIOut iter_out = output_iterator_; RAIOut iter_out = output_iterator_;
ReturnType result = transformation_(*first_); ReturnType result = transformation_(*iter_in);
*iter_out = result; *iter_out = result;
++iter_in; ++iter_in;
++iter_out; ++iter_out;
for (; iter_in != last_; ++iter_in, ++iter_out) { for (; iter_in != last_in; ++iter_in, ++iter_out) {
result = scan_(result, transformation_(*iter_in)); result = scan_(result, transformation_(*iter_in));
*iter_out = result; *iter_out = result;
} }
SetTreeValue(result); SetTreeValue(result);
} else { // Second pass }
RAIIn iter_in = first_; else {
// Second pass
ChunkDescriptor<RAIIn> chunk = partitioner_[chunk_first_];
RAIIn iter_in = chunk.GetFirst();
RAIIn last_in = chunk.GetLast();
RAIOut iter_out = output_iterator_; RAIOut iter_out = output_iterator_;
for (; iter_in != last_; ++iter_in, ++iter_out) { for (; iter_in != last_in; ++iter_in, ++iter_out) {
*iter_out = scan_(parent_value_, *iter_out); *iter_out = scan_(parent_value_, *iter_out);
} }
} }
} else { // recurse further }
internal::ChunkPartitioner<RAIIn> partitioner(first_, last_, 2); else {
ScanFunctor functor_l(partitioner[0].GetFirst(), partitioner[0].GetLast(), // recurse further
output_iterator_, neutral_, scan_, transformation_, size_t chunk_split_index = (chunk_first_ + chunk_last_) / 2;
policy_, block_size_, tree_values_, node_id_, // Split chunks into left / right branches:
is_first_pass_); ScanFunctor functor_l(
ScanFunctor functor_r(partitioner[1].GetFirst(), partitioner[1].GetLast(), chunk_first_, chunk_split_index,
output_iterator_, neutral_, scan_, transformation_, output_iterator_, neutral_, scan_, transformation_,
policy_, block_size_, tree_values_, node_id_, policy_, partitioner_, tree_values_, node_id_,
is_first_pass_); is_first_pass_);
functor_l.SetID(1); ScanFunctor functor_r(
functor_r.SetID(2); chunk_split_index + 1, chunk_last_,
std::advance(functor_r.output_iterator_, output_iterator_, neutral_, scan_, transformation_,
std::distance(functor_l.first_, functor_r.first_)); policy_, partitioner_, tree_values_, node_id_,
is_first_pass_);
functor_l.SetID(LEFT);
functor_r.SetID(RIGHT);
// Advance output iterator of right branch:
ChunkDescriptor<RAIIn> chunk_left = partitioner_[chunk_first_];
ChunkDescriptor<RAIIn> chunk_right = partitioner_[chunk_split_index + 1];
long long dist = std::distance(chunk_left.GetFirst(), chunk_right.GetFirst());
std::advance(functor_r.output_iterator_, dist);
if (!is_first_pass_) { if (!is_first_pass_) {
functor_l.parent_value_ = parent_value_; functor_l.parent_value_ = parent_value_;
functor_r.parent_value_ = functor_l.GetTreeValue() + parent_value_; functor_r.parent_value_ = functor_l.GetTreeValue() + parent_value_;
} }
mtapi::Node& node = mtapi::Node::GetInstance(); // Spawn tasks to recurse:
// Spawn tasks for right partition first: mtapi::Node& node = mtapi::Node::GetInstance();
mtapi::Task task_r = node.Spawn(mtapi::Action(base::MakeFunction( mtapi::Task task_l = node.Spawn(
functor_r, &ScanFunctor::Action), mtapi::Action(
policy_)); base::MakeFunction(functor_l, &ScanFunctor::Action),
// Recurse on left partition: policy_));
functor_l.Action(context); mtapi::Task task_r = node.Spawn(
// Wait for tasks on right partition to complete: mtapi::Action(
base::MakeFunction(functor_r, &ScanFunctor::Action),
policy_));
// Wait for tasks to complete:
task_l.Wait(MTAPI_INFINITE);
task_r.Wait(MTAPI_INFINITE); task_r.Wait(MTAPI_INFINITE);
SetTreeValue(scan_(functor_l.GetTreeValue(), functor_r.GetTreeValue())); SetTreeValue(scan_(functor_l.GetTreeValue(), functor_r.GetTreeValue()));
} }
...@@ -119,23 +135,26 @@ class ScanFunctor { ...@@ -119,23 +135,26 @@ class ScanFunctor {
} }
private: private:
static const int LEFT = 1;
static const int RIGHT = 2;
const embb::mtapi::ExecutionPolicy& policy_; const embb::mtapi::ExecutionPolicy& policy_;
RAIIn first_; size_t chunk_first_;
RAIIn last_; size_t chunk_last_;
RAIOut output_iterator_; RAIOut output_iterator_;
ScanFunction scan_; ScanFunction scan_;
TransformationFunction transformation_; TransformationFunction transformation_;
ReturnType neutral_; ReturnType neutral_;
size_t block_size_; const BlockSizePartitioner<RAIIn>& partitioner_;
ReturnType* tree_values_; ReturnType* tree_values_;
size_t node_id_; size_t node_id_;
ReturnType parent_value_; ReturnType parent_value_;
bool is_first_pass_; bool is_first_pass_;
void SetID(int is_left) { void SetID(int branch) {
if (is_left == 1) { if (branch == LEFT) {
node_id_ = 2 * node_id_ + 1; node_id_ = 2 * node_id_ + 1;
} else if (is_left == 2) { }
else if (branch == RIGHT) {
node_id_ = 2 * node_id_ + 2; node_id_ = 2 * node_id_ + 2;
} }
} }
...@@ -166,23 +185,25 @@ void ScanIteratorCheck(RAIIn first, RAIIn last, RAIOut output_iterator, ...@@ -166,23 +185,25 @@ void ScanIteratorCheck(RAIIn first, RAIIn last, RAIOut output_iterator,
} }
mtapi::Node& node = mtapi::Node::GetInstance(); mtapi::Node& node = mtapi::Node::GetInstance();
ReturnType values[MTAPI_NODE_MAX_TASKS_DEFAULT]; ReturnType values[MTAPI_NODE_MAX_TASKS_DEFAULT];
size_t used_block_size = block_size;
if (block_size == 0) { if (block_size == 0) {
used_block_size = static_cast<size_t>(distance) / node.GetCoreCount(); block_size = static_cast<size_t>(distance) / node.GetCoreCount();
if (used_block_size == 0) used_block_size = 1; if (block_size == 0) {
block_size = 1;
}
} }
if (((distance / used_block_size) * 2) + 1 > MTAPI_NODE_MAX_TASKS_DEFAULT) { if (((distance / block_size) * 2) + 1 > MTAPI_NODE_MAX_TASKS_DEFAULT) {
EMBB_THROW(embb::base::ErrorException, EMBB_THROW(embb::base::ErrorException,
"Number of computation tasks required in scan " "Not enough MTAPI tasks available for parallel scan");
"exceeds MTAPI maximum number of tasks");
} }
// first pass. Calculates prefix sums for leaves and when recursion returns // first pass. Calculates prefix sums for leaves and when recursion returns
// it creates the tree. // it creates the tree.
typedef ScanFunctor<RAIIn, RAIOut, ReturnType, ScanFunction, typedef ScanFunctor<RAIIn, RAIOut, ReturnType, ScanFunction,
TransformationFunction> Functor; TransformationFunction> Functor;
Functor functor_down(first, last, output_iterator, neutral, scan,
transformation, policy, used_block_size, values, 0, BlockSizePartitioner<RAIIn> partitioner_down(first, last, block_size);
Functor functor_down(0, partitioner_down.Size() - 1, output_iterator, neutral,
scan, transformation, policy, partitioner_down, values, 0,
true); true);
mtapi::Task task_down = node.Spawn(mtapi::Action(base::MakeFunction( mtapi::Task task_down = node.Spawn(mtapi::Action(base::MakeFunction(
functor_down, &Functor::Action), functor_down, &Functor::Action),
...@@ -190,8 +211,10 @@ void ScanIteratorCheck(RAIIn first, RAIIn last, RAIOut output_iterator, ...@@ -190,8 +211,10 @@ void ScanIteratorCheck(RAIIn first, RAIIn last, RAIOut output_iterator,
task_down.Wait(MTAPI_INFINITE); task_down.Wait(MTAPI_INFINITE);
// Second pass. Gives to each leaf the part of the prefix missing // Second pass. Gives to each leaf the part of the prefix missing
Functor functor_up(first, last, output_iterator, neutral, scan, BlockSizePartitioner<RAIIn> partitioner_up(first, last, block_size);
transformation, policy, used_block_size, values, 0, false); Functor functor_up(0, partitioner_up.Size() - 1, output_iterator, neutral,
scan, transformation, policy, partitioner_up, values, 0,
false);
mtapi::Task task_up = node.Spawn(mtapi::Action(base::MakeFunction( mtapi::Task task_up = node.Spawn(mtapi::Action(base::MakeFunction(
functor_up, &Functor::Action), functor_up, &Functor::Action),
policy)); policy));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment