diff --git a/CMakeLists.txt b/CMakeLists.txt index ecd1a6d..837e52c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,7 +28,7 @@ cmake_minimum_required (VERSION 2.8.9) # Version number set (EMBB_BASE_VERSION_MAJOR 0) set (EMBB_BASE_VERSION_MINOR 2) -set (EMBB_BASE_VERSION_PATCH 3) +set (EMBB_BASE_VERSION_PATCH 4) # Fix compilation for CMake versions >= 3.1 # diff --git a/README.md b/README.md index 5498983..7996d94 100644 --- a/README.md +++ b/README.md @@ -92,13 +92,14 @@ contact us: embb-dev@googlegroups.com. Directory Structure ------------------- -EMB² is a technology stack consisting of various building blocks. For some of -them, there exist C and C++ versions, others are only implemented in C++. The -directory names are postfixed with either "_cpp" or "_c" for the C++ and C -versions, respectively. Currently, EMB² contains the following components: +EMB² consists of various building blocks. For some of them, there exist C and +C++ versions, others are only implemented in C++. The directory names are +postfixed with either "_cpp" or "_c" for the C++ and C versions, respectively. +Currently, EMB² contains the following components: - base: base_c, base_cpp - - mtapi: mtapi_c, mtapi_cpp + - mtapi: mtapi_c, mtapi_network_c, mtapi_opencl_c, mtapi_cpp + - tasks: tasks_cpp - algorithms: algorithms_cpp - dataflow: dataflow_cpp - containers: containers_cpp @@ -109,12 +110,14 @@ the header files, source files, and unit tests, respectively. Component base_c contains abstractions for threading, synchronization, atomic operations, and other functionalities. As the name indicates, the code is implemented in C. Component base_cpp is mainly a C++ wrapper around the base_c -functionalities. Component mtapi_c is a task scheduler written in C and -mtapi_cpp a C++ wrapper for the scheduler. Component algorithms_cpp provides -high-level constructs for typical parallelization task in C++, and -dataflow_cpp generic skeletons for the development of parallel stream-based -applications. Finally, component containers_cpp provides containers, i.e., -data structures for storing object in an organized and thread-safe way. +functions. Component mtapi_c is a task scheduler written in C and mtapi_cpp a +C++ wrapper for the scheduler (mtapi_network_c and mtapi_opencl_c are scheduler +plugins for distributed and OpenCL-based heterogeneous systems, respectively). +To simplify programming of homogeneous systems, tasks_cpp contains abstractions +to the MTAPI interfaces. Component algorithms_cpp provides high-level constructs +for typical parallelization tasks in C++, and dataflow_cpp generic skeletons for +the development of parallel stream-based applications. Finally, containers_cpp +provides data structures for storing objects in a thread-safe way. Build and Installation diff --git a/algorithms_cpp/include/embb/algorithms/internal/partition-inl.h b/algorithms_cpp/include/embb/algorithms/internal/partition-inl.h index 48cdb9f..231ccb4 100644 --- a/algorithms_cpp/include/embb/algorithms/internal/partition-inl.h +++ b/algorithms_cpp/include/embb/algorithms/internal/partition-inl.h @@ -31,109 +31,104 @@ namespace embb { namespace algorithms { namespace internal { -template -ChunkDescriptor::ChunkDescriptor(ForwardIterator first, - ForwardIterator last) : - first(first), last(last) { +template +ChunkDescriptor::ChunkDescriptor( + RAI first, RAI last) : + first_(first), last_(last) { } -template -ForwardIterator ChunkDescriptor::GetFirst() const { - return first; +template +RAI ChunkDescriptor::GetFirst() const { + return first_; } -template -ForwardIterator ChunkDescriptor::GetLast() const { - return last; +template +RAI ChunkDescriptor::GetLast() const { + return last_; } -template -BlockSizePartitioner::BlockSizePartitioner( - ForwardIterator first, ForwardIterator last, size_t chunkSize) : - first(first), last(last), chunkSize(chunkSize) { - elements_count = static_cast(std::distance(first, last)); - chunks = elements_count / chunkSize; - if (elements_count % chunkSize != 0) - chunks++; +template +BlockSizePartitioner::BlockSizePartitioner( + RAI first, RAI last, size_t chunkSize) : + first_(first), last_(last), chunk_size_(chunkSize) { + elements_count_ = static_cast(std::distance(first_, last_)); + chunks_ = elements_count_ / chunk_size_; + if (elements_count_ % chunk_size_ != 0) { + chunks_++; + } } -template -size_t BlockSizePartitioner::Size() { - return chunks; +template +size_t BlockSizePartitioner::Size() { + return chunks_; } -template -const ChunkDescriptor - BlockSizePartitioner::operator[]( - size_t const& index) const { - ForwardIterator first_new = first; - std::advance(first_new, index * chunkSize); - - ForwardIterator last_new = first_new; - - if (index >= chunks - 1) { - last_new = last; +template +const ChunkDescriptor + BlockSizePartitioner::operator[]( + size_t const & index) const { + typedef typename std::iterator_traits::difference_type + difference_type; + RAI first_new(first_); + first_new += static_cast(chunk_size_ * index); + RAI last_new(first_new); + if (index >= chunks_ - 1) { + last_new = last_; } else { - std::advance(last_new, chunkSize); + last_new += static_cast(chunk_size_); } - - return ChunkDescriptor(first_new, last_new); + return ChunkDescriptor(first_new, last_new); } -template -size_t ChunkPartitioner::Size() { - return size; +template +size_t ChunkPartitioner::Size() { + return size_; } -template -ChunkPartitioner::ChunkPartitioner(ForwardIterator first, - ForwardIterator last, size_t amountChunks) : - first(first), last(last) { +template +ChunkPartitioner::ChunkPartitioner( + RAI first, RAI last, size_t amountChunks) : + first_(first), last_(last) { if (amountChunks > 0) { - size = amountChunks; + size_ = amountChunks; } else { - // if no concrete chunk size was given, use number of cores... + // if no concrete chunk size was given, use number of cores embb::tasks::Node& node = embb::tasks::Node::GetInstance(); - size = node.GetWorkerThreadCount(); + size_ = node.GetWorkerThreadCount(); } - - elements_count = static_cast(std::distance(first, last)); - if (size > elements_count) { + elements_count_ = static_cast(std::distance(first_, last_)); + if (size_ > elements_count_) { // if we want to make more chunks than we have elements, correct // the number of chunks - size = elements_count; + size_ = elements_count_; } - standard_chunk_size = elements_count / size; - bigger_chunk_count = elements_count % size; + standard_chunk_size_ = elements_count_ / size_; + bigger_chunk_count_ = elements_count_ % size_; } -template -const ChunkDescriptor - ChunkPartitioner::operator[]( +template +const ChunkDescriptor + ChunkPartitioner::operator[]( size_t const& index) const { - typedef typename std::iterator_traits::difference_type + typedef typename std::iterator_traits::difference_type difference_type; + // Number of element preceding elements in the given chunk size_t prec_elements_count = 0; - - if (index <= bigger_chunk_count) { - prec_elements_count = index * (standard_chunk_size + 1); + if (index <= bigger_chunk_count_) { + prec_elements_count = index * (standard_chunk_size_ + 1); } else { - prec_elements_count = (standard_chunk_size + 1) * bigger_chunk_count - + standard_chunk_size * (index - bigger_chunk_count); + prec_elements_count = + (standard_chunk_size_ + 1) * bigger_chunk_count_ + + (standard_chunk_size_ * (index - bigger_chunk_count_)); } - - size_t cur_elements_count = - (index < bigger_chunk_count) ? - (standard_chunk_size + 1) : standard_chunk_size; - - ForwardIterator first_new = first; - std::advance(first_new, prec_elements_count); - - first_new = first + static_cast(prec_elements_count); - ForwardIterator last_new = first_new; - std::advance(last_new, cur_elements_count); - - return ChunkDescriptor(first_new, last_new); + size_t cur_elements_count = (index < bigger_chunk_count_) + ? (standard_chunk_size_ + 1) + : standard_chunk_size_; + RAI first_new(first_); + first_new += static_cast(prec_elements_count); + RAI last_new(first_new); + last_new += static_cast(cur_elements_count); + return ChunkDescriptor(first_new, last_new); } } // namespace internal diff --git a/algorithms_cpp/include/embb/algorithms/internal/partition.h b/algorithms_cpp/include/embb/algorithms/internal/partition.h index 7e7548e..400923d 100644 --- a/algorithms_cpp/include/embb/algorithms/internal/partition.h +++ b/algorithms_cpp/include/embb/algorithms/internal/partition.h @@ -38,14 +38,14 @@ namespace internal { * Describes a single partition of a 1-dimensional * partitioning, using first and last iterator. * - * \tparam ForwardIterator Type of the iterator. + * \tparam RAI Type of the iterator. */ -template +template class ChunkDescriptor { private: - ForwardIterator first; - ForwardIterator last; + RAI first_; + RAI last_; public: /** @@ -54,7 +54,7 @@ class ChunkDescriptor { * \param first The first iterator. * \param last The last iterator */ - ChunkDescriptor(ForwardIterator first, ForwardIterator last); + ChunkDescriptor(RAI first, RAI last); /** * Gets the first iterator. @@ -63,7 +63,7 @@ class ChunkDescriptor { * * \waitfree */ - ForwardIterator GetFirst() const; + RAI GetFirst() const; /** * Gets the last iterator. @@ -72,7 +72,7 @@ class ChunkDescriptor { * * \waitfree */ - ForwardIterator GetLast() const; + RAI GetLast() const; }; /** @@ -80,9 +80,9 @@ class ChunkDescriptor { * * Describes the interface for accessing a 1-dimensional partitioning. * - * \tparam ForwardIterator Type of the iterator. + * \tparam RAI Type of the iterator. */ -template +template class IPartitioner { public: virtual ~IPartitioner() {} @@ -106,7 +106,7 @@ class IPartitioner { * * \waitfree */ - virtual const ChunkDescriptor operator[]( + virtual const ChunkDescriptor operator[]( size_t const& index) const = 0; }; @@ -129,16 +129,16 @@ class IPartitioner { * 2: [6,7,8,9,10] * 3: [11,12,13] * - * \tparam ForwardIterator Type of the iterator. + * \tparam RAI Type of the iterator. */ -template -class BlockSizePartitioner : IPartitioner < ForwardIterator > { +template +class BlockSizePartitioner : IPartitioner < RAI > { private: - ForwardIterator first; - ForwardIterator last; - size_t chunkSize; - size_t elements_count; - size_t chunks; + RAI first_; + RAI last_; + size_t chunk_size_; + size_t elements_count_; + size_t chunks_; public: /** @@ -150,7 +150,7 @@ class BlockSizePartitioner : IPartitioner < ForwardIterator > { * \param chunkSize (Optional) size of the chunk. */ BlockSizePartitioner( - ForwardIterator first, ForwardIterator last, size_t chunkSize = 1); + RAI first, RAI last, size_t chunkSize = 1); /** * See IPartitioner @@ -164,7 +164,7 @@ class BlockSizePartitioner : IPartitioner < ForwardIterator > { * * \waitfree */ - virtual const ChunkDescriptor operator[]( + virtual const ChunkDescriptor operator[]( size_t const& index) const; }; @@ -196,17 +196,17 @@ class BlockSizePartitioner : IPartitioner < ForwardIterator > { * 4: [10,11] * 5: [12,13] * - * \tparam ForwardIterator Type of the iterator. + * \tparam RAI Type of the iterator. */ -template -class ChunkPartitioner : IPartitioner < ForwardIterator > { +template +class ChunkPartitioner : IPartitioner < RAI > { private: - size_t size; - size_t elements_count; - ForwardIterator first; - ForwardIterator last; - size_t standard_chunk_size; - size_t bigger_chunk_count; + size_t size_; + size_t elements_count_; + RAI first_; + RAI last_; + size_t standard_chunk_size_; + size_t bigger_chunk_count_; public: /** @@ -227,7 +227,7 @@ class ChunkPartitioner : IPartitioner < ForwardIterator > { * \param last The last. * \param amountChunks (Optional) the amount chunks. */ - ChunkPartitioner(ForwardIterator first, ForwardIterator last, + ChunkPartitioner(RAI first, RAI last, size_t amountChunks = 0); /** @@ -235,7 +235,7 @@ class ChunkPartitioner : IPartitioner < ForwardIterator > { * * \waitfree */ - virtual const ChunkDescriptor operator[]( + virtual const ChunkDescriptor operator[]( size_t const& index) const; }; diff --git a/algorithms_cpp/test/partitioner_test.cc b/algorithms_cpp/test/partitioner_test.cc index bed03dc..d17cc77 100644 --- a/algorithms_cpp/test/partitioner_test.cc +++ b/algorithms_cpp/test/partitioner_test.cc @@ -33,9 +33,15 @@ #include #include -PartitionerTest::PartitionerTest() { - CreateUnit("algorithms partitioner test"). - Add(&PartitionerTest::TestBasic, this); +PartitionerTest::PartitionerTest() +: partitioned_array_size_(16384) { + // Size of array to be partitioned should be power of 2 + CreateUnit("TestBasic") + .Add(&PartitionerTest::TestBasic, this); + CreateUnit("TestLargeRange") + .Pre(&PartitionerTest::TestLargeRangePre, this) + .Add(&PartitionerTest::TestLargeRange, this) + .Post(&PartitionerTest::TestLargeRangePost, this); } void PartitionerTest::TestBasic() { @@ -70,3 +76,62 @@ void PartitionerTest::TestBasic() { PT_EXPECT_EQ_MSG(partitioner2.Size(), size_t(3), "Check count of partitions"); } +void PartitionerTest::TestLargeRangePre() { + partitioned_array_ = new int[partitioned_array_size_]; + for (size_t i = 0; i < partitioned_array_size_; ++i) { + partitioned_array_[i] = static_cast(i); + } +} + +void PartitionerTest::TestLargeRangePost() { + delete[] partitioned_array_; +} + +void PartitionerTest::TestLargeRange() { + // Test chunk partitioner with increasing number of chunks: + for (size_t num_chunks = 2; + num_chunks < partitioned_array_size_; + num_chunks *= 2) { + embb::algorithms::internal::ChunkPartitioner + chunk_partitioner( + partitioned_array_, + partitioned_array_ + partitioned_array_size_, + num_chunks); + int last_value_prev = -1; + PT_EXPECT_EQ(num_chunks, chunk_partitioner.Size()); + // Iterate over chunks in partition: + for (size_t chunk = 0; chunk < chunk_partitioner.Size(); ++chunk) { + int first_value = *(chunk_partitioner[chunk].GetFirst()); + int last_value = *(chunk_partitioner[chunk].GetLast() - 1); + PT_EXPECT_LT(first_value, last_value); + // Test seams between chunks: chunk[i].last + 1 == chunk[i+1].first + PT_EXPECT_EQ((last_value_prev + 1), first_value); + last_value_prev = last_value; + } + } + // Test block size partitioner with increasing chunk size: + for (size_t block_size = 1; + block_size < partitioned_array_size_; + block_size *= 2) { + embb::algorithms::internal::BlockSizePartitioner + chunk_partitioner( + partitioned_array_, + partitioned_array_ + partitioned_array_size_, + block_size); + int last_value_prev = -1; + // Iterate over chunks in partition: + for (size_t chunk = 0; chunk < chunk_partitioner.Size(); ++chunk) { + int first_value = *(chunk_partitioner[chunk].GetFirst()); + int last_value = *(chunk_partitioner[chunk].GetLast() - 1); + if (block_size == 1) { + PT_EXPECT_EQ(first_value, last_value); + } else { + PT_EXPECT_LT(first_value, last_value); + } + // Test seams between chunks: chunk[i].last + 1 == chunk[i+1].first + PT_EXPECT_EQ((last_value_prev + 1), first_value); + last_value_prev = last_value; + } + } +} + diff --git a/algorithms_cpp/test/partitioner_test.h b/algorithms_cpp/test/partitioner_test.h index b188740..ecea402 100644 --- a/algorithms_cpp/test/partitioner_test.h +++ b/algorithms_cpp/test/partitioner_test.h @@ -35,6 +35,13 @@ class PartitionerTest : public partest::TestCase { private: void TestBasic(); + + void TestLargeRangePre(); + void TestLargeRangePost(); + void TestLargeRange(); + + int * partitioned_array_; + size_t partitioned_array_size_; }; #endif // ALGORITHMS_CPP_TEST_PARTITIONER_TEST_H_ diff --git a/containers_cpp/include/embb/containers/internal/wait_free_spsc_queue-inl.h b/containers_cpp/include/embb/containers/internal/wait_free_spsc_queue-inl.h index 05638db..352e6c8 100644 --- a/containers_cpp/include/embb/containers/internal/wait_free_spsc_queue-inl.h +++ b/containers_cpp/include/embb/containers/internal/wait_free_spsc_queue-inl.h @@ -37,11 +37,19 @@ namespace embb { namespace containers { template -WaitFreeSPSCQueue::WaitFreeSPSCQueue(size_t capacity) : -capacity(capacity), - head_index(0), - tail_index(0) { - queue_array = allocator.allocate(capacity); +size_t WaitFreeSPSCQueue:: +AlignCapacityToPowerOfTwo(size_t capacity) { + size_t result = 1; + while (result < capacity) result <<= 1; + return result; +} + +template +WaitFreeSPSCQueue::WaitFreeSPSCQueue(size_t capacity) + : capacity(AlignCapacityToPowerOfTwo(capacity)), + head_index(0), + tail_index(0) { + queue_array = allocator.allocate(this->capacity); } template @@ -51,7 +59,7 @@ size_t WaitFreeSPSCQueue::GetCapacity() { template bool WaitFreeSPSCQueue::TryEnqueue(Type const & element) { - if (head_index - tail_index == capacity) + if (tail_index - head_index == capacity) return false; queue_array[tail_index % capacity] = element; diff --git a/containers_cpp/include/embb/containers/wait_free_spsc_queue.h b/containers_cpp/include/embb/containers/wait_free_spsc_queue.h index a0f0cdc..1fabf3f 100644 --- a/containers_cpp/include/embb/containers/wait_free_spsc_queue.h +++ b/containers_cpp/include/embb/containers/wait_free_spsc_queue.h @@ -68,7 +68,7 @@ * \code{.cpp} Queue(capacity) \endcode * Nothing * - * Constructs a queue with capacity \c capacity that holds elements of + * Constructs a queue with minimal capacity \c capacity that holds elements of * type \c T. * * @@ -145,11 +145,17 @@ class WaitFreeSPSCQueue { */ embb::base::Atomic tail_index; + /** + * Align capacity to the next smallest power of two + */ + static size_t AlignCapacityToPowerOfTwo(size_t capacity); + public: /** - * Creates a queue with the specified capacity. + * Creates a queue with at least the specified capacity. * - * \memory Allocates \c capacity elements of type \c Type. + * \memory Allocates \c 2^k elements of type \c Type, where \k is the + * smallest number such that capacity <= 2^k holds. * * \notthreadsafe * diff --git a/containers_cpp/test/queue_test-inl.h b/containers_cpp/test/queue_test-inl.h index 97207b2..b871965 100644 --- a/containers_cpp/test/queue_test-inl.h +++ b/containers_cpp/test/queue_test-inl.h @@ -276,16 +276,43 @@ QueueTestSingleProducerSingleConsumer_ThreadMethod() { template void QueueTest:: QueueTestSingleThreadEnqueueDequeue_ThreadMethod() { + // Enqueue the expected amount of elements for (int i = 0; i != n_queue_size; ++i) { bool success = queue->TryEnqueue(element_t(0, i * 133)); PT_ASSERT(success == true); } + + // Some queues may allow enqueueing more elements than their capacity + // permits, so try to enqueue additional elements until the queue is full + int oversized_count = n_queue_size; + while ( queue->TryEnqueue(element_t(0, oversized_count * 133)) ) { + ++oversized_count; + } + // Oversized amount should not be larger than the original capacity + PT_ASSERT_LT(oversized_count, 2 * n_queue_size); + + // Dequeue the expected amount of elements for (int i = 0; i != n_queue_size; ++i) { element_t dequ(0, -1); bool success = queue->TryDequeue(dequ); PT_ASSERT(success == true); PT_ASSERT(dequ.second == i * 133); } + + // Dequeue any elements enqueued above the original capacity + for (int i = n_queue_size; i != oversized_count; ++i) { + element_t dequ(0, -1); + bool success = queue->TryDequeue(dequ); + PT_ASSERT(success == true); + PT_ASSERT(dequ.second == i * 133); + } + + // Ensure the queue is now empty + { + element_t dequ; + bool success = queue->TryDequeue(dequ); + PT_ASSERT(success == false); + } } template diff --git a/mtapi_opencl_c/src/CL/cl_gl_ext.h b/mtapi_opencl_c/src/CL/cl_gl_ext.h index 90996f2..5555dd1 100644 --- a/mtapi_opencl_c/src/CL/cl_gl_ext.h +++ b/mtapi_opencl_c/src/CL/cl_gl_ext.h @@ -41,7 +41,8 @@ extern "C" { /* * For each extension, follow this template - * /* cl_VEN_extname extension */ + * // cl_VEN_extname extension + */ /* #define cl_VEN_extname 1 * ... define new types, if any * ... define new tokens, if any