From c47fcd25fba53d966b62e0f77885383848d589e2 Mon Sep 17 00:00:00 2001 From: Tobias Fuchs Date: Fri, 27 Feb 2015 12:45:50 +0100 Subject: [PATCH] mtapi_cpp, algorithms_cpp: resolving number of cores from affinity now --- algorithms_cpp/include/embb/algorithms/internal/for_each-inl.h | 8 ++++++-- algorithms_cpp/include/embb/algorithms/internal/merge_sort-inl.h | 62 +++++++++++++++++++++++++++++++++----------------------------- algorithms_cpp/include/embb/algorithms/internal/partition-inl.h | 2 +- algorithms_cpp/include/embb/algorithms/internal/quick_sort-inl.h | 13 ++++++++++--- algorithms_cpp/include/embb/algorithms/internal/reduce-inl.h | 6 +++++- algorithms_cpp/include/embb/algorithms/internal/scan-inl.h | 9 +++++++-- algorithms_cpp/test/count_test.cc | 2 -- algorithms_cpp/test/for_each_test.cc | 8 +------- algorithms_cpp/test/merge_sort_test.cc | 7 ------- algorithms_cpp/test/quick_sort_test.cc | 7 ------- algorithms_cpp/test/reduce_test.cc | 2 -- algorithms_cpp/test/scan_test.cc | 9 --------- mtapi_cpp/include/embb/mtapi/execution_policy.h | 7 +++++++ mtapi_cpp/include/embb/mtapi/node.h | 10 ++++++++++ mtapi_cpp/src/execution_policy.cc | 5 +++++ mtapi_cpp/src/node.cc | 1 + 16 files changed, 86 insertions(+), 72 deletions(-) diff --git a/algorithms_cpp/include/embb/algorithms/internal/for_each-inl.h b/algorithms_cpp/include/embb/algorithms/internal/for_each-inl.h index cd945fc..a6cd15f 100644 --- a/algorithms_cpp/include/embb/algorithms/internal/for_each-inl.h +++ b/algorithms_cpp/include/embb/algorithms/internal/for_each-inl.h @@ -106,12 +106,16 @@ void ForEachRecursive(RAI first, RAI last, Function unary, typedef typename std::iterator_traits::difference_type difference_type; difference_type distance = std::distance(first, last); if (distance == 0) { - EMBB_THROW(embb::base::ErrorException, "Distance for ForEach is 0"); + return; + } + unsigned int num_cores = policy.GetCoreCount(); + if (num_cores == 0) { + EMBB_THROW(embb::base::ErrorException, "No cores in execution policy"); } mtapi::Node& node = mtapi::Node::GetInstance(); // Determine actually used block size if (block_size == 0) { - block_size = (static_cast(distance) / node.GetCoreCount()); + block_size = (static_cast(distance) / num_cores); if (block_size == 0) { block_size = 1; } diff --git a/algorithms_cpp/include/embb/algorithms/internal/merge_sort-inl.h b/algorithms_cpp/include/embb/algorithms/internal/merge_sort-inl.h index 39bcd31..7e7b0f7 100644 --- a/algorithms_cpp/include/embb/algorithms/internal/merge_sort-inl.h +++ b/algorithms_cpp/include/embb/algorithms/internal/merge_sort-inl.h @@ -89,21 +89,21 @@ class MergeSortFunctor { task_l.Wait(MTAPI_INFINITE); task_r.Wait(MTAPI_INFINITE); - ChunkDescriptor chunk_f = partitioner_[chunk_first_]; - ChunkDescriptor chunk_m = partitioner_[chunk_split_index + 1]; - ChunkDescriptor chunk_l = partitioner_[chunk_last_]; + ChunkDescriptor ck_f = partitioner_[chunk_first_]; + ChunkDescriptor ck_m = partitioner_[chunk_split_index + 1]; + ChunkDescriptor ck_l = partitioner_[chunk_last_]; if(CloneBackToInput(depth_)) { // Merge from temp into input: - difference_type first = std::distance(global_first_, chunk_f.GetFirst()); - difference_type mid = std::distance(global_first_, chunk_m.GetFirst()); - difference_type last = std::distance(global_first_, chunk_l.GetLast()); + difference_type first = std::distance(global_first_, ck_f.GetFirst()); + difference_type mid = std::distance(global_first_, ck_m.GetFirst()); + difference_type last = std::distance(global_first_, ck_l.GetLast()); SerialMerge(temp_first_ + first, temp_first_ + mid, temp_first_ + last, - chunk_f.GetFirst(), + ck_f.GetFirst(), comparison_); } else { // Merge from input into temp: - SerialMerge(chunk_f.GetFirst(), chunk_m.GetFirst(), chunk_l.GetLast(), - temp_first_ + std::distance(global_first_, chunk_f.GetFirst()), + SerialMerge(ck_f.GetFirst(), ck_m.GetFirst(), ck_l.GetLast(), + temp_first_ + std::distance(global_first_, ck_f.GetFirst()), comparison_); } } @@ -129,31 +129,30 @@ class MergeSortFunctor { // Recurse further. Use binary split, ignoring chunk size as this // recursion is serial and has leaf size 1: ChunkPartitioner partitioner(first, last, 2); - ChunkDescriptor chunk_l = partitioner[0]; - ChunkDescriptor chunk_r = partitioner[1]; + ChunkDescriptor ck_l = partitioner[0]; + ChunkDescriptor ck_r = partitioner[1]; MergeSortChunk( - chunk_l.GetFirst(), - chunk_l.GetLast(), + ck_l.GetFirst(), + ck_l.GetLast(), depth + 1); MergeSortChunk( - chunk_r.GetFirst(), - chunk_r.GetLast(), + ck_r.GetFirst(), + ck_r.GetLast(), depth + 1); if (CloneBackToInput(depth)) { // Merge from temp into input: - difference_type d_first = std::distance(global_first_, chunk_l.GetFirst()); - difference_type d_mid = std::distance(global_first_, chunk_r.GetFirst()); - difference_type d_last = std::distance(global_first_, chunk_r.GetLast()); + difference_type d_first = std::distance(global_first_, ck_l.GetFirst()); + difference_type d_mid = std::distance(global_first_, ck_r.GetFirst()); + difference_type d_last = std::distance(global_first_, ck_r.GetLast()); SerialMerge( temp_first_ + d_first, temp_first_ + d_mid, temp_first_ + d_last, - chunk_l.GetFirst(), + ck_l.GetFirst(), comparison_); - } - else { + } else { // Merge from input into temp: SerialMerge( - chunk_l.GetFirst(), chunk_r.GetFirst(), chunk_r.GetLast(), - temp_first_ + std::distance(global_first_, chunk_l.GetFirst()), + ck_l.GetFirst(), ck_r.GetFirst(), ck_r.GetLast(), + temp_first_ + std::distance(global_first_, ck_l.GetFirst()), comparison_); } } @@ -226,15 +225,19 @@ void MergeSort( size_t block_size ) { typedef typename std::iterator_traits::difference_type difference_type; - typedef internal::MergeSortFunctor functor_t; + typedef internal::MergeSortFunctor + functor_t; difference_type distance = std::distance(first, last); if (distance == 0) { EMBB_THROW(embb::base::ErrorException, "Distance for ForEach is 0"); } - embb::mtapi::Node &node = embb::mtapi::Node::GetInstance(); + unsigned int num_cores = policy.GetCoreCount(); + if (num_cores == 0) { + EMBB_THROW(embb::base::ErrorException, "No cores in execution policy"); + } // Determine actually used block size if (block_size == 0) { - block_size = (static_cast(distance) / node.GetCoreCount()); + block_size = (static_cast(distance) / num_cores); if (block_size == 0) block_size = 1; } @@ -253,9 +256,10 @@ void MergeSort( partitioner, first, 0); - mtapi::Task task = node.Spawn(mtapi::Action(base::MakeFunction(functor, - &functor_t::Action), - policy)); + mtapi::Task task = embb::mtapi::Node::GetInstance().Spawn( + mtapi::Action( + base::MakeFunction(functor, &functor_t::Action), + policy)); task.Wait(MTAPI_INFINITE); } diff --git a/algorithms_cpp/include/embb/algorithms/internal/partition-inl.h b/algorithms_cpp/include/embb/algorithms/internal/partition-inl.h index cdb35bc..434af54 100644 --- a/algorithms_cpp/include/embb/algorithms/internal/partition-inl.h +++ b/algorithms_cpp/include/embb/algorithms/internal/partition-inl.h @@ -94,7 +94,7 @@ ChunkPartitioner::ChunkPartitioner(ForwardIterator first, } else { // if no concrete chunk size was given, use number of cores... mtapi::Node& node = mtapi::Node::GetInstance(); - size = node.GetCoreCount(); + size = node.GetWorkerThreadCount(); } elements_count = static_cast(std::distance(first, last)); diff --git a/algorithms_cpp/include/embb/algorithms/internal/quick_sort-inl.h b/algorithms_cpp/include/embb/algorithms/internal/quick_sort-inl.h index dc4f64a..4ce296f 100644 --- a/algorithms_cpp/include/embb/algorithms/internal/quick_sort-inl.h +++ b/algorithms_cpp/include/embb/algorithms/internal/quick_sort-inl.h @@ -192,10 +192,17 @@ template void QuickSort(RAI first, RAI last, ComparisonFunction comparison, const embb::mtapi::ExecutionPolicy& policy, size_t block_size) { embb::mtapi::Node& node = embb::mtapi::Node::GetInstance(); - typename std::iterator_traits::difference_type distance = last - first; - assert(distance > 0); + typedef typename std::iterator_traits::difference_type difference_type; + difference_type distance = std::distance(first, last); + if (distance <= 0) { + return; + } + unsigned int num_cores = policy.GetCoreCount(); + if (num_cores == 0) { + EMBB_THROW(embb::base::ErrorException, "No cores in execution policy"); + } if (block_size == 0) { - block_size = (static_cast(distance) / node.GetCoreCount()); + block_size = (static_cast(distance) / num_cores); if (block_size == 0) block_size = 1; } diff --git a/algorithms_cpp/include/embb/algorithms/internal/reduce-inl.h b/algorithms_cpp/include/embb/algorithms/internal/reduce-inl.h index c16a513..e8fb907 100644 --- a/algorithms_cpp/include/embb/algorithms/internal/reduce-inl.h +++ b/algorithms_cpp/include/embb/algorithms/internal/reduce-inl.h @@ -131,10 +131,14 @@ ReturnType ReduceRecursive(RAI first, RAI last, ReturnType neutral, if (distance == 0) { EMBB_THROW(embb::base::ErrorException, "Distance for Reduce is 0"); } + unsigned int num_cores = policy.GetCoreCount(); + if (num_cores == 0) { + EMBB_THROW(embb::base::ErrorException, "No cores in execution policy"); + } mtapi::Node& node = mtapi::Node::GetInstance(); // Determine actually used block size if (block_size == 0) { - block_size = (static_cast(distance) / node.GetCoreCount()); + block_size = (static_cast(distance) / num_cores); if (block_size == 0) { block_size = 1; } diff --git a/algorithms_cpp/include/embb/algorithms/internal/scan-inl.h b/algorithms_cpp/include/embb/algorithms/internal/scan-inl.h index 6964f52..ab2a21b 100644 --- a/algorithms_cpp/include/embb/algorithms/internal/scan-inl.h +++ b/algorithms_cpp/include/embb/algorithms/internal/scan-inl.h @@ -176,10 +176,14 @@ void ScanIteratorCheck(RAIIn first, RAIIn last, RAIOut output_iterator, if (distance <= 0) { return; } - mtapi::Node& node = mtapi::Node::GetInstance(); + unsigned int num_cores = policy.GetCoreCount(); + if (num_cores == 0) { + EMBB_THROW(embb::base::ErrorException, "No cores in execution policy"); + } + ReturnType values[MTAPI_NODE_MAX_TASKS_DEFAULT]; if (block_size == 0) { - block_size = static_cast(distance) / node.GetCoreCount(); + block_size = static_cast(distance) / num_cores; if (block_size == 0) { block_size = 1; } @@ -193,6 +197,7 @@ void ScanIteratorCheck(RAIIn first, RAIIn last, RAIOut output_iterator, // it creates the tree. typedef ScanFunctor Functor; + mtapi::Node& node = mtapi::Node::GetInstance(); BlockSizePartitioner partitioner_down(first, last, block_size); Functor functor_down(0, partitioner_down.Size() - 1, output_iterator, diff --git a/algorithms_cpp/test/count_test.cc b/algorithms_cpp/test/count_test.cc index e28acd0..fdc5993 100644 --- a/algorithms_cpp/test/count_test.cc +++ b/algorithms_cpp/test/count_test.cc @@ -128,8 +128,6 @@ void CountTest::TestPolicy() { PT_EXPECT_EQ(Count(vector.begin(), vector.end(), 10, ExecutionPolicy()), 3); PT_EXPECT_EQ(Count(vector.begin(), vector.end(), 10, ExecutionPolicy(true)), 3); - PT_EXPECT_EQ(Count(vector.begin(), vector.end(), 10, ExecutionPolicy(false)), - 3); PT_EXPECT_EQ(Count(vector.begin(), vector.end(), 10, ExecutionPolicy(true, 1)), 3); } diff --git a/algorithms_cpp/test/for_each_test.cc b/algorithms_cpp/test/for_each_test.cc index 1242244..3ea68aa 100644 --- a/algorithms_cpp/test/for_each_test.cc +++ b/algorithms_cpp/test/for_each_test.cc @@ -205,13 +205,7 @@ void ForEachTest::TestPolicy() { for (size_t i = 0; i < count; i++) { PT_EXPECT_EQ(vector[i], init[i]*init[i]); } - - vector = init; - ForEach(vector.begin(), vector.end(), Square(), ExecutionPolicy(false)); - for (size_t i = 0; i < count; i++) { - PT_EXPECT_EQ(vector[i], init[i]*init[i]); - } - + vector = init; ForEach(vector.begin(), vector.end(), Square(), ExecutionPolicy(true, 1)); for (size_t i = 0; i < count; i++) { diff --git a/algorithms_cpp/test/merge_sort_test.cc b/algorithms_cpp/test/merge_sort_test.cc index 85b671c..cbcbf0b 100644 --- a/algorithms_cpp/test/merge_sort_test.cc +++ b/algorithms_cpp/test/merge_sort_test.cc @@ -208,13 +208,6 @@ void MergeSortTest::TestPolicy() { vector = init; MergeSortAllocate(vector.begin(), vector.end(), std::less(), - ExecutionPolicy(false)); - for (size_t i = 0; i < count; i++) { - PT_EXPECT_EQ(vector_copy[i], vector[i]); - } - - vector = init; - MergeSortAllocate(vector.begin(), vector.end(), std::less(), ExecutionPolicy(true, 1)); for (size_t i = 0; i < count; i++) { PT_EXPECT_EQ(vector_copy[i], vector[i]); diff --git a/algorithms_cpp/test/quick_sort_test.cc b/algorithms_cpp/test/quick_sort_test.cc index 04a5a3e..8f724ca 100644 --- a/algorithms_cpp/test/quick_sort_test.cc +++ b/algorithms_cpp/test/quick_sort_test.cc @@ -214,13 +214,6 @@ void QuickSortTest::TestPolicy() { vector = init; QuickSort(vector.begin(), vector.end(), std::greater(), - ExecutionPolicy(false)); - for (size_t i = 0; i < count; i++) { - PT_EXPECT_EQ(vector_copy[i], vector[i]); - } - - vector = init; - QuickSort(vector.begin(), vector.end(), std::greater(), ExecutionPolicy(true, 1)); for (size_t i = 0; i < count; i++) { PT_EXPECT_EQ(vector_copy[i], vector[i]); diff --git a/algorithms_cpp/test/reduce_test.cc b/algorithms_cpp/test/reduce_test.cc index c5395a6..2ebebe5 100644 --- a/algorithms_cpp/test/reduce_test.cc +++ b/algorithms_cpp/test/reduce_test.cc @@ -179,8 +179,6 @@ void ReduceTest::TestPolicy() { Identity(), ExecutionPolicy()), sum); PT_EXPECT_EQ(Reduce(vector.begin(), vector.end(), 0, std::plus(), Identity(), ExecutionPolicy(true)), sum); - PT_EXPECT_EQ(Reduce(vector.begin(), vector.end(), 0, - std::plus(), Identity(), ExecutionPolicy(false)), sum); PT_EXPECT_EQ(Reduce(vector.begin(), vector.end(), 0, std::plus(), Identity(), ExecutionPolicy(true, 1)), sum); } diff --git a/algorithms_cpp/test/scan_test.cc b/algorithms_cpp/test/scan_test.cc index 112095e..e88effd 100644 --- a/algorithms_cpp/test/scan_test.cc +++ b/algorithms_cpp/test/scan_test.cc @@ -284,15 +284,6 @@ void ScanTest::TestPolicy() { outputVector = init; Scan(vector.begin(), vector.end(), outputVector.begin(), 0, std::plus(), - Identity(), ExecutionPolicy(false)); - expected = 0; - for (size_t i = 0; i < count; i++) { - expected += vector[i]; - PT_EXPECT_EQ(expected, outputVector[i]); - } - - outputVector = init; - Scan(vector.begin(), vector.end(), outputVector.begin(), 0, std::plus(), Identity(), ExecutionPolicy(true, 1)); expected = 0; for (size_t i = 0; i < count; i++) { diff --git a/mtapi_cpp/include/embb/mtapi/execution_policy.h b/mtapi_cpp/include/embb/mtapi/execution_policy.h index adccfee..85727f3 100644 --- a/mtapi_cpp/include/embb/mtapi/execution_policy.h +++ b/mtapi_cpp/include/embb/mtapi/execution_policy.h @@ -106,6 +106,13 @@ class ExecutionPolicy{ ); /** + * Returns the number of cores the policy is affine to. + * + * \return the number of cores + */ + unsigned int GetCoreCount() const; + + /** * Returns the affinity * * \return the affinity diff --git a/mtapi_cpp/include/embb/mtapi/node.h b/mtapi_cpp/include/embb/mtapi/node.h index 442ebd0..a7c5d99 100644 --- a/mtapi_cpp/include/embb/mtapi/node.h +++ b/mtapi_cpp/include/embb/mtapi/node.h @@ -129,6 +129,15 @@ class Node { } /** + * Returns the number of worker threads. + * \return The number of worker threads. + * \waitfree + */ + mtapi_uint_t GetWorkerThreadCount() const { + return worker_thread_count_; + } + + /** * Creates a Group to launch \link Task Tasks \endlink in. * \return A reference to the created Group * \throws ErrorException if the Group object could not be constructed. @@ -210,6 +219,7 @@ class Node { mtapi_task_context_t * context); mtapi_uint_t core_count_; + mtapi_uint_t worker_thread_count_; mtapi_action_hndl_t action_handle_; std::list queues_; std::list groups_; diff --git a/mtapi_cpp/src/execution_policy.cc b/mtapi_cpp/src/execution_policy.cc index 570b843..157d0ac 100644 --- a/mtapi_cpp/src/execution_policy.cc +++ b/mtapi_cpp/src/execution_policy.cc @@ -27,6 +27,7 @@ #include #include #include +#include #include namespace embb { @@ -105,6 +106,10 @@ bool ExecutionPolicy::IsSetWorker(mtapi_uint_t worker) { return MTAPI_TRUE == aff; } +unsigned int ExecutionPolicy::GetCoreCount() const { + return embb_bitset_count(&affinity_); +} + const mtapi_affinity_t &ExecutionPolicy::GetAffinity() const { return affinity_; } diff --git a/mtapi_cpp/src/node.cc b/mtapi_cpp/src/node.cc index 0deb322..0ffe21a 100644 --- a/mtapi_cpp/src/node.cc +++ b/mtapi_cpp/src/node.cc @@ -75,6 +75,7 @@ Node::Node( "mtapi::Node could not initialize mtapi"); } core_count_ = info.hardware_concurrency; + worker_thread_count_ = embb_core_set_count(&attr->core_affinity); action_handle_ = mtapi_action_create(MTAPI_CPP_TASK_JOB, action_func, MTAPI_NULL, 0, MTAPI_NULL, &status); if (MTAPI_SUCCESS != status) { -- libgit2 0.26.0