Commit 5f3e1399 by Tobias Fuchs

Merge branch 'development' into embb327_llx_scx

parents 56a22c45 c291e97a
# Taken from CMake Version 3.2.1, modified to work on older versions
#.rst:
# FindOpenCL
# ----------
#
# Try to find OpenCL
#
# Once done this will define::
#
# OpenCL_FOUND - True if OpenCL was found
# OpenCL_INCLUDE_DIRS - include directories for OpenCL
# OpenCL_LIBRARIES - link against this library to use OpenCL
# OpenCL_VERSION_STRING - Highest supported OpenCL version (eg. 1.2)
# OpenCL_VERSION_MAJOR - The major version of the OpenCL implementation
# OpenCL_VERSION_MINOR - The minor version of the OpenCL implementation
#
# The module will also define two cache variables::
#
# OpenCL_INCLUDE_DIR - the OpenCL include directory
# OpenCL_LIBRARY - the path to the OpenCL library
#
#=============================================================================
# Copyright 2014 Matthaeus G. Chajdas
#
# Distributed under the OSI-approved BSD License (the "License");
# see accompanying file Copyright.txt for details.
#
# This software is distributed WITHOUT ANY WARRANTY; without even the
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the License for more information.
#=============================================================================
# (To distribute this file outside of CMake, substitute the full
# License text for the above reference.)
function(_FIND_OPENCL_VERSION)
include(CheckSymbolExists)
include(CMakePushCheckState)
set(CMAKE_REQUIRED_QUIET ${OpenCL_FIND_QUIETLY})
CMAKE_PUSH_CHECK_STATE()
foreach(VERSION "2_0" "1_2" "1_1" "1_0")
set(CMAKE_REQUIRED_INCLUDES "${OpenCL_INCLUDE_DIR}")
if(APPLE)
CHECK_SYMBOL_EXISTS(
CL_VERSION_${VERSION}
"${OpenCL_INCLUDE_DIR}/OpenCL/cl.h"
OPENCL_VERSION_${VERSION})
else()
CHECK_SYMBOL_EXISTS(
CL_VERSION_${VERSION}
"${OpenCL_INCLUDE_DIR}/CL/cl.h"
OPENCL_VERSION_${VERSION})
endif()
if(OPENCL_VERSION_${VERSION})
string(REPLACE "_" "." VERSION "${VERSION}")
set(OpenCL_VERSION_STRING ${VERSION} PARENT_SCOPE)
string(REGEX MATCHALL "[0-9]+" version_components "${VERSION}")
list(GET version_components 0 major_version)
list(GET version_components 1 minor_version)
set(OpenCL_VERSION_MAJOR ${major_version} PARENT_SCOPE)
set(OpenCL_VERSION_MINOR ${minor_version} PARENT_SCOPE)
break()
endif()
endforeach()
CMAKE_POP_CHECK_STATE()
endfunction()
find_path(OpenCL_INCLUDE_DIR
NAMES
CL/cl.h OpenCL/cl.h
PATHS
ENV "PROGRAMFILES(X86)"
ENV AMDAPPSDKROOT
ENV INTELOCLSDKROOT
ENV NVSDKCOMPUTE_ROOT
ENV CUDA_PATH
ENV ATISTREAMSDKROOT
PATH_SUFFIXES
include
OpenCL/common/inc
"AMD APP/include")
_FIND_OPENCL_VERSION()
if(WIN32)
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
find_library(OpenCL_LIBRARY
NAMES OpenCL
PATHS
ENV "PROGRAMFILES(X86)"
ENV AMDAPPSDKROOT
ENV INTELOCLSDKROOT
ENV CUDA_PATH
ENV NVSDKCOMPUTE_ROOT
ENV ATISTREAMSDKROOT
PATH_SUFFIXES
"AMD APP/lib/x86"
lib/x86
lib/Win32
OpenCL/common/lib/Win32)
elseif(CMAKE_SIZEOF_VOID_P EQUAL 8)
find_library(OpenCL_LIBRARY
NAMES OpenCL
PATHS
ENV "PROGRAMFILES(X86)"
ENV AMDAPPSDKROOT
ENV INTELOCLSDKROOT
ENV CUDA_PATH
ENV NVSDKCOMPUTE_ROOT
ENV ATISTREAMSDKROOT
PATH_SUFFIXES
"AMD APP/lib/x86_64"
lib/x86_64
lib/x64
OpenCL/common/lib/x64)
endif()
else()
find_library(OpenCL_LIBRARY
NAMES OpenCL)
endif()
set(OpenCL_LIBRARIES ${OpenCL_LIBRARY})
set(OpenCL_INCLUDE_DIRS ${OpenCL_INCLUDE_DIR})
#find_package_handle_standard_args not available in older CMake versions...
#include(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake)
#find_package_handle_standard_args(
# OpenCL
# FOUND_VAR OpenCL_FOUND
# REQUIRED_VARS OpenCL_LIBRARY OpenCL_INCLUDE_DIR
# VERSION_VAR OpenCL_VERSION_STRING)
#mark_as_advanced(
# OpenCL_INCLUDE_DIR
# OpenCL_LIBRARY)
# This replaces FindPackageHandleStandardArgs.cmake, which is not available in older
# CMake versions
if( OpenCL_LIBRARIES AND OpenCL_INCLUDE_DIRS )
set(OpenCL_FOUND 1)
else()
set(OpenCL_FOUND 0)
endif()
......@@ -28,7 +28,38 @@ cmake_minimum_required (VERSION 2.8.9)
# Version number
set (EMBB_BASE_VERSION_MAJOR 0)
set (EMBB_BASE_VERSION_MINOR 2)
set (EMBB_BASE_VERSION_PATCH 3)
set (EMBB_BASE_VERSION_PATCH 4)
# Fix compilation for CMake versions >= 3.1
#
# New Policy 0054:
# CMake 3.1 and above no longer implicitly dereference variables
# or interpret keywords in an if() command argument when it is a
# Quoted Argument.
# See http://www.cmake.org/cmake/help/v3.1/policy/CMP0054.html
#
# New Policy 0053:
# CMake 3.1 introduced faster implementation of evaluation of the
# Variable References and Escape Sequences. This breaks compilation
# here.
# See http://www.cmake.org/cmake/help/v3.1/policy/CMP0053.html
#
# Set those policies to be treated the legacy (CMake < 3.1) way.
if(POLICY CMP0054)
cmake_policy(SET CMP0054 OLD)
endif(POLICY CMP0054)
if(POLICY CMP0053)
cmake_policy(SET CMP0053 OLD)
endif(POLICY CMP0053)
include(CMakeCommon/FindOpenCL.cmake)
IF(NOT OpenCL_FOUND)
MESSAGE( STATUS "OpenCL is not there, will build without MTAPI OpenCL Plugin." )
ENDIF()
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING
......@@ -93,8 +124,26 @@ else()
endif()
message(" (set with command line option -DUSE_EXCEPTIONS=ON/OFF)")
# these are the test executables, we expect to be generated.
set(EXPECTED_EMBB_TEST_EXECUTABLES "embb_algorithms_cpp_test"
"embb_base_c_test"
"embb_base_cpp_test"
"embb_containers_cpp_test"
"embb_dataflow_cpp_test"
"embb_mtapi_c_test"
"embb_mtapi_cpp_test"
"embb_mtapi_network_c_test"
"embb_tasks_cpp_test"
)
# if opencl is there, we also expect the mtapi opencl test to be generated
if(OpenCL_FOUND)
list(APPEND EXPECTED_EMBB_TEST_EXECUTABLES "embb_mtapi_opencl_c_test")
endif()
## Copy test execution script to local binaries folder
#
if (DEFINED CYGWIN)
set(test_script_in run_tests_cygwin.sh)
set(test_script_out run_tests.sh)
......@@ -105,9 +154,7 @@ else()
set(test_script_in run_tests_windows.bat)
set(test_script_out run_tests.bat)
endif()
execute_process(
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/scripts/${test_script_in} binaries/${test_script_out}
)
CONFIGURE_FILE( ${CMAKE_SOURCE_DIR}/scripts/${test_script_in}.cmake binaries/${test_script_out} )
## Test and Partest build
#
......@@ -126,7 +173,9 @@ add_subdirectory(base_c)
add_subdirectory(base_cpp)
add_subdirectory(mtapi_c)
add_subdirectory(mtapi_network_c)
add_subdirectory(mtapi_opencl_c)
if(OpenCL_FOUND)
add_subdirectory(mtapi_opencl_c)
endif()
add_subdirectory(tasks_cpp)
add_subdirectory(mtapi_cpp)
add_subdirectory(containers_cpp)
......
......@@ -92,13 +92,14 @@ contact us: embb-dev@googlegroups.com.
Directory Structure
-------------------
EMB² is a technology stack consisting of various building blocks. For some of
them, there exist C and C++ versions, others are only implemented in C++. The
directory names are postfixed with either "_cpp" or "_c" for the C++ and C
versions, respectively. Currently, EMB² contains the following components:
EMB² consists of various building blocks. For some of them, there exist C and
C++ versions, others are only implemented in C++. The directory names are
postfixed with either "_cpp" or "_c" for the C++ and C versions, respectively.
Currently, EMB² contains the following components:
- base: base_c, base_cpp
- mtapi: mtapi_c, mtapi_cpp
- mtapi: mtapi_c, mtapi_network_c, mtapi_opencl_c, mtapi_cpp
- tasks: tasks_cpp
- algorithms: algorithms_cpp
- dataflow: dataflow_cpp
- containers: containers_cpp
......@@ -109,12 +110,14 @@ the header files, source files, and unit tests, respectively.
Component base_c contains abstractions for threading, synchronization, atomic
operations, and other functionalities. As the name indicates, the code is
implemented in C. Component base_cpp is mainly a C++ wrapper around the base_c
functionalities. Component mtapi_c is a task scheduler written in C and
mtapi_cpp a C++ wrapper for the scheduler. Component algorithms_cpp provides
high-level constructs for typical parallelization task in C++, and
dataflow_cpp generic skeletons for the development of parallel stream-based
applications. Finally, component containers_cpp provides containers, i.e.,
data structures for storing object in an organized and thread-safe way.
functions. Component mtapi_c is a task scheduler written in C and mtapi_cpp a
C++ wrapper for the scheduler (mtapi_network_c and mtapi_opencl_c are scheduler
plugins for distributed and OpenCL-based heterogeneous systems, respectively).
To simplify programming of homogeneous systems, tasks_cpp contains abstractions
to the MTAPI interfaces. Component algorithms_cpp provides high-level constructs
for typical parallelization tasks in C++, and dataflow_cpp generic skeletons for
the development of parallel stream-based applications. Finally, containers_cpp
provides data structures for storing objects in a thread-safe way.
Build and Installation
......
......@@ -31,109 +31,104 @@ namespace embb {
namespace algorithms {
namespace internal {
template<typename ForwardIterator>
ChunkDescriptor<ForwardIterator>::ChunkDescriptor(ForwardIterator first,
ForwardIterator last) :
first(first), last(last) {
template<typename RAI>
ChunkDescriptor<RAI>::ChunkDescriptor(
RAI first, RAI last) :
first_(first), last_(last) {
}
template<typename ForwardIterator>
ForwardIterator ChunkDescriptor<ForwardIterator>::GetFirst() const {
return first;
template<typename RAI>
RAI ChunkDescriptor<RAI>::GetFirst() const {
return first_;
}
template<typename ForwardIterator>
ForwardIterator ChunkDescriptor<ForwardIterator>::GetLast() const {
return last;
template<typename RAI>
RAI ChunkDescriptor<RAI>::GetLast() const {
return last_;
}
template<typename ForwardIterator>
BlockSizePartitioner<ForwardIterator>::BlockSizePartitioner(
ForwardIterator first, ForwardIterator last, size_t chunkSize) :
first(first), last(last), chunkSize(chunkSize) {
elements_count = static_cast<size_t>(std::distance(first, last));
chunks = elements_count / chunkSize;
if (elements_count % chunkSize != 0)
chunks++;
template<typename RAI>
BlockSizePartitioner<RAI>::BlockSizePartitioner(
RAI first, RAI last, size_t chunkSize) :
first_(first), last_(last), chunk_size_(chunkSize) {
elements_count_ = static_cast<size_t>(std::distance(first_, last_));
chunks_ = elements_count_ / chunk_size_;
if (elements_count_ % chunk_size_ != 0) {
chunks_++;
}
}
template<typename ForwardIterator>
size_t BlockSizePartitioner<ForwardIterator>::Size() {
return chunks;
template<typename RAI>
size_t BlockSizePartitioner<RAI>::Size() {
return chunks_;
}
template<typename ForwardIterator>
const ChunkDescriptor<ForwardIterator>
BlockSizePartitioner<ForwardIterator>::operator[](
size_t const& index) const {
ForwardIterator first_new = first;
std::advance(first_new, index * chunkSize);
ForwardIterator last_new = first_new;
if (index >= chunks - 1) {
last_new = last;
template<typename RAI>
const ChunkDescriptor<RAI>
BlockSizePartitioner<RAI>::operator[](
size_t const & index) const {
typedef typename std::iterator_traits<RAI>::difference_type
difference_type;
RAI first_new(first_);
first_new += static_cast<difference_type>(chunk_size_ * index);
RAI last_new(first_new);
if (index >= chunks_ - 1) {
last_new = last_;
} else {
std::advance(last_new, chunkSize);
last_new += static_cast<difference_type>(chunk_size_);
}
return ChunkDescriptor<ForwardIterator>(first_new, last_new);
return ChunkDescriptor<RAI>(first_new, last_new);
}
template<typename ForwardIterator>
size_t ChunkPartitioner<ForwardIterator>::Size() {
return size;
template<typename RAI>
size_t ChunkPartitioner<RAI>::Size() {
return size_;
}
template<typename ForwardIterator>
ChunkPartitioner<ForwardIterator>::ChunkPartitioner(ForwardIterator first,
ForwardIterator last, size_t amountChunks) :
first(first), last(last) {
template<typename RAI>
ChunkPartitioner<RAI>::ChunkPartitioner(
RAI first, RAI last, size_t amountChunks) :
first_(first), last_(last) {
if (amountChunks > 0) {
size = amountChunks;
size_ = amountChunks;
} else {
// if no concrete chunk size was given, use number of cores...
// if no concrete chunk size was given, use number of cores
embb::tasks::Node& node = embb::tasks::Node::GetInstance();
size = node.GetWorkerThreadCount();
size_ = node.GetWorkerThreadCount();
}
elements_count = static_cast<size_t>(std::distance(first, last));
if (size > elements_count) {
elements_count_ = static_cast<size_t>(std::distance(first_, last_));
if (size_ > elements_count_) {
// if we want to make more chunks than we have elements, correct
// the number of chunks
size = elements_count;
size_ = elements_count_;
}
standard_chunk_size = elements_count / size;
bigger_chunk_count = elements_count % size;
standard_chunk_size_ = elements_count_ / size_;
bigger_chunk_count_ = elements_count_ % size_;
}
template<typename ForwardIterator>
const ChunkDescriptor<ForwardIterator>
ChunkPartitioner<ForwardIterator>::operator[](
template<typename RAI>
const ChunkDescriptor<RAI>
ChunkPartitioner<RAI>::operator[](
size_t const& index) const {
typedef typename std::iterator_traits<ForwardIterator>::difference_type
typedef typename std::iterator_traits<RAI>::difference_type
difference_type;
// Number of element preceding elements in the given chunk
size_t prec_elements_count = 0;
if (index <= bigger_chunk_count) {
prec_elements_count = index * (standard_chunk_size + 1);
if (index <= bigger_chunk_count_) {
prec_elements_count = index * (standard_chunk_size_ + 1);
} else {
prec_elements_count = (standard_chunk_size + 1) * bigger_chunk_count
+ standard_chunk_size * (index - bigger_chunk_count);
prec_elements_count =
(standard_chunk_size_ + 1) * bigger_chunk_count_ +
(standard_chunk_size_ * (index - bigger_chunk_count_));
}
size_t cur_elements_count =
(index < bigger_chunk_count) ?
(standard_chunk_size + 1) : standard_chunk_size;
ForwardIterator first_new = first;
std::advance(first_new, prec_elements_count);
first_new = first + static_cast<difference_type>(prec_elements_count);
ForwardIterator last_new = first_new;
std::advance(last_new, cur_elements_count);
return ChunkDescriptor<ForwardIterator>(first_new, last_new);
size_t cur_elements_count = (index < bigger_chunk_count_)
? (standard_chunk_size_ + 1)
: standard_chunk_size_;
RAI first_new(first_);
first_new += static_cast<difference_type>(prec_elements_count);
RAI last_new(first_new);
last_new += static_cast<difference_type>(cur_elements_count);
return ChunkDescriptor<RAI>(first_new, last_new);
}
} // namespace internal
......
......@@ -38,14 +38,14 @@ namespace internal {
* Describes a single partition of a 1-dimensional
* partitioning, using first and last iterator.
*
* \tparam ForwardIterator Type of the iterator.
* \tparam RAI Type of the iterator.
*/
template<typename ForwardIterator>
template<typename RAI>
class ChunkDescriptor {
private:
ForwardIterator first;
ForwardIterator last;
RAI first_;
RAI last_;
public:
/**
......@@ -54,7 +54,7 @@ class ChunkDescriptor {
* \param first The first iterator.
* \param last The last iterator
*/
ChunkDescriptor(ForwardIterator first, ForwardIterator last);
ChunkDescriptor(RAI first, RAI last);
/**
* Gets the first iterator.
......@@ -63,7 +63,7 @@ class ChunkDescriptor {
*
* \waitfree
*/
ForwardIterator GetFirst() const;
RAI GetFirst() const;
/**
* Gets the last iterator.
......@@ -72,7 +72,7 @@ class ChunkDescriptor {
*
* \waitfree
*/
ForwardIterator GetLast() const;
RAI GetLast() const;
};
/**
......@@ -80,9 +80,9 @@ class ChunkDescriptor {
*
* Describes the interface for accessing a 1-dimensional partitioning.
*
* \tparam ForwardIterator Type of the iterator.
* \tparam RAI Type of the iterator.
*/
template<typename ForwardIterator>
template<typename RAI>
class IPartitioner {
public:
virtual ~IPartitioner() {}
......@@ -106,7 +106,7 @@ class IPartitioner {
*
* \waitfree
*/
virtual const ChunkDescriptor<ForwardIterator> operator[](
virtual const ChunkDescriptor<RAI> operator[](
size_t const& index) const = 0;
};
......@@ -129,16 +129,16 @@ class IPartitioner {
* 2: [6,7,8,9,10]
* 3: [11,12,13]
*
* \tparam ForwardIterator Type of the iterator.
* \tparam RAI Type of the iterator.
*/
template<typename ForwardIterator>
class BlockSizePartitioner : IPartitioner < ForwardIterator > {
template<typename RAI>
class BlockSizePartitioner : IPartitioner < RAI > {
private:
ForwardIterator first;
ForwardIterator last;
size_t chunkSize;
size_t elements_count;
size_t chunks;
RAI first_;
RAI last_;
size_t chunk_size_;
size_t elements_count_;
size_t chunks_;
public:
/**
......@@ -150,7 +150,7 @@ class BlockSizePartitioner : IPartitioner < ForwardIterator > {
* \param chunkSize (Optional) size of the chunk.
*/
BlockSizePartitioner(
ForwardIterator first, ForwardIterator last, size_t chunkSize = 1);
RAI first, RAI last, size_t chunkSize = 1);
/**
* See IPartitioner
......@@ -164,7 +164,7 @@ class BlockSizePartitioner : IPartitioner < ForwardIterator > {
*
* \waitfree
*/
virtual const ChunkDescriptor<ForwardIterator> operator[](
virtual const ChunkDescriptor<RAI> operator[](
size_t const& index) const;
};
......@@ -196,17 +196,17 @@ class BlockSizePartitioner : IPartitioner < ForwardIterator > {
* 4: [10,11]
* 5: [12,13]
*
* \tparam ForwardIterator Type of the iterator.
* \tparam RAI Type of the iterator.
*/
template<typename ForwardIterator>
class ChunkPartitioner : IPartitioner < ForwardIterator > {
template<typename RAI>
class ChunkPartitioner : IPartitioner < RAI > {
private:
size_t size;
size_t elements_count;
ForwardIterator first;
ForwardIterator last;
size_t standard_chunk_size;
size_t bigger_chunk_count;
size_t size_;
size_t elements_count_;
RAI first_;
RAI last_;
size_t standard_chunk_size_;
size_t bigger_chunk_count_;
public:
/**
......@@ -227,7 +227,7 @@ class ChunkPartitioner : IPartitioner < ForwardIterator > {
* \param last The last.
* \param amountChunks (Optional) the amount chunks.
*/
ChunkPartitioner(ForwardIterator first, ForwardIterator last,
ChunkPartitioner(RAI first, RAI last,
size_t amountChunks = 0);
/**
......@@ -235,7 +235,7 @@ class ChunkPartitioner : IPartitioner < ForwardIterator > {
*
* \waitfree
*/
virtual const ChunkDescriptor<ForwardIterator> operator[](
virtual const ChunkDescriptor<RAI> operator[](
size_t const& index) const;
};
......
......@@ -33,9 +33,15 @@
#include <vector>
#include <list>
PartitionerTest::PartitionerTest() {
CreateUnit("algorithms partitioner test").
Add(&PartitionerTest::TestBasic, this);
PartitionerTest::PartitionerTest()
: partitioned_array_size_(16384) {
// Size of array to be partitioned should be power of 2
CreateUnit("TestBasic")
.Add(&PartitionerTest::TestBasic, this);
CreateUnit("TestLargeRange")
.Pre(&PartitionerTest::TestLargeRangePre, this)
.Add(&PartitionerTest::TestLargeRange, this)
.Post(&PartitionerTest::TestLargeRangePost, this);
}
void PartitionerTest::TestBasic() {
......@@ -70,3 +76,62 @@ void PartitionerTest::TestBasic() {
PT_EXPECT_EQ_MSG(partitioner2.Size(), size_t(3), "Check count of partitions");
}
void PartitionerTest::TestLargeRangePre() {
partitioned_array_ = new int[partitioned_array_size_];
for (size_t i = 0; i < partitioned_array_size_; ++i) {
partitioned_array_[i] = static_cast<int>(i);
}
}
void PartitionerTest::TestLargeRangePost() {
delete[] partitioned_array_;
}
void PartitionerTest::TestLargeRange() {
// Test chunk partitioner with increasing number of chunks:
for (size_t num_chunks = 2;
num_chunks < partitioned_array_size_;
num_chunks *= 2) {
embb::algorithms::internal::ChunkPartitioner<int *>
chunk_partitioner(
partitioned_array_,
partitioned_array_ + partitioned_array_size_,
num_chunks);
int last_value_prev = -1;
PT_EXPECT_EQ(num_chunks, chunk_partitioner.Size());
// Iterate over chunks in partition:
for (size_t chunk = 0; chunk < chunk_partitioner.Size(); ++chunk) {
int first_value = *(chunk_partitioner[chunk].GetFirst());
int last_value = *(chunk_partitioner[chunk].GetLast() - 1);
PT_EXPECT_LT(first_value, last_value);
// Test seams between chunks: chunk[i].last + 1 == chunk[i+1].first
PT_EXPECT_EQ((last_value_prev + 1), first_value);
last_value_prev = last_value;
}
}
// Test block size partitioner with increasing chunk size:
for (size_t block_size = 1;
block_size < partitioned_array_size_;
block_size *= 2) {
embb::algorithms::internal::BlockSizePartitioner<int *>
chunk_partitioner(
partitioned_array_,
partitioned_array_ + partitioned_array_size_,
block_size);
int last_value_prev = -1;
// Iterate over chunks in partition:
for (size_t chunk = 0; chunk < chunk_partitioner.Size(); ++chunk) {
int first_value = *(chunk_partitioner[chunk].GetFirst());
int last_value = *(chunk_partitioner[chunk].GetLast() - 1);
if (block_size == 1) {
PT_EXPECT_EQ(first_value, last_value);
} else {
PT_EXPECT_LT(first_value, last_value);
}
// Test seams between chunks: chunk[i].last + 1 == chunk[i+1].first
PT_EXPECT_EQ((last_value_prev + 1), first_value);
last_value_prev = last_value;
}
}
}
......@@ -35,6 +35,13 @@ class PartitionerTest : public partest::TestCase {
private:
void TestBasic();
void TestLargeRangePre();
void TestLargeRangePost();
void TestLargeRange();
int * partitioned_array_;
size_t partitioned_array_size_;
};
#endif // ALGORITHMS_CPP_TEST_PARTITIONER_TEST_H_
......@@ -37,11 +37,19 @@
namespace embb {
namespace containers {
template<typename Type, class Allocator>
WaitFreeSPSCQueue<Type, Allocator>::WaitFreeSPSCQueue(size_t capacity) :
capacity(capacity),
size_t WaitFreeSPSCQueue<Type, Allocator>::
AlignCapacityToPowerOfTwo(size_t capacity) {
size_t result = 1;
while (result < capacity) result <<= 1;
return result;
}
template<typename Type, class Allocator>
WaitFreeSPSCQueue<Type, Allocator>::WaitFreeSPSCQueue(size_t capacity)
: capacity(AlignCapacityToPowerOfTwo(capacity)),
head_index(0),
tail_index(0) {
queue_array = allocator.allocate(capacity);
queue_array = allocator.allocate(this->capacity);
}
template<typename Type, class Allocator>
......@@ -51,7 +59,7 @@ size_t WaitFreeSPSCQueue<Type, Allocator>::GetCapacity() {
template<typename Type, class Allocator>
bool WaitFreeSPSCQueue<Type, Allocator>::TryEnqueue(Type const & element) {
if (head_index - tail_index == capacity)
if (tail_index - head_index == capacity)
return false;
queue_array[tail_index % capacity] = element;
......
......@@ -68,7 +68,7 @@
* <td>\code{.cpp} Queue<Type>(capacity) \endcode</td>
* <td>Nothing</td>
* <td>
* Constructs a queue with capacity \c capacity that holds elements of
* Constructs a queue with minimal capacity \c capacity that holds elements of
* type \c T.
* </td>
* </tr>
......@@ -145,11 +145,17 @@ class WaitFreeSPSCQueue {
*/
embb::base::Atomic<size_t> tail_index;
/**
* Align capacity to the next smallest power of two
*/
static size_t AlignCapacityToPowerOfTwo(size_t capacity);
public:
/**
* Creates a queue with the specified capacity.
* Creates a queue with at least the specified capacity.
*
* \memory Allocates \c capacity elements of type \c Type.
* \memory Allocates \c 2^k elements of type \c Type, where \k is the
* smallest number such that <tt>capacity <= 2^k</tt> holds.
*
* \notthreadsafe
*
......
......@@ -276,16 +276,43 @@ QueueTestSingleProducerSingleConsumer_ThreadMethod() {
template<typename Queue_t, bool MultipleProducers, bool MultipleConsumers>
void QueueTest<Queue_t, MultipleProducers, MultipleConsumers>::
QueueTestSingleThreadEnqueueDequeue_ThreadMethod() {
// Enqueue the expected amount of elements
for (int i = 0; i != n_queue_size; ++i) {
bool success = queue->TryEnqueue(element_t(0, i * 133));
PT_ASSERT(success == true);
}
// Some queues may allow enqueueing more elements than their capacity
// permits, so try to enqueue additional elements until the queue is full
int oversized_count = n_queue_size;
while ( queue->TryEnqueue(element_t(0, oversized_count * 133)) ) {
++oversized_count;
}
// Oversized amount should not be larger than the original capacity
PT_ASSERT_LT(oversized_count, 2 * n_queue_size);
// Dequeue the expected amount of elements
for (int i = 0; i != n_queue_size; ++i) {
element_t dequ(0, -1);
bool success = queue->TryDequeue(dequ);
PT_ASSERT(success == true);
PT_ASSERT(dequ.second == i * 133);
}
// Dequeue any elements enqueued above the original capacity
for (int i = n_queue_size; i != oversized_count; ++i) {
element_t dequ(0, -1);
bool success = queue->TryDequeue(dequ);
PT_ASSERT(success == true);
PT_ASSERT(dequ.second == i * 133);
}
// Ensure the queue is now empty
{
element_t dequ;
bool success = queue->TryDequeue(dequ);
PT_ASSERT(success == false);
}
}
template<typename Queue_t, bool MultipleProducers, bool MultipleConsumers>
......
......@@ -36,6 +36,9 @@
#include <embb/dataflow/dataflow.h>
#define NUM_SLICES 8
#define TEST_COUNT 12
typedef embb::dataflow::Network<8> MyNetwork;
typedef MyNetwork::ConstantSource< int > MyConstantSource;
typedef MyNetwork::Source< int > MySource;
......@@ -49,8 +52,6 @@ typedef MyNetwork::Sink< int > MySink;
typedef MyNetwork::Switch< int > MySwitch;
typedef MyNetwork::Select< int > MySelect;
#define TEST_COUNT 12
embb::base::Atomic<int> source_counter;
int source_array[TEST_COUNT];
......@@ -142,8 +143,25 @@ SimpleTest::SimpleTest() {
CreateUnit("dataflow_cpp simple test").Add(&SimpleTest::TestBasic, this);
}
#define MTAPI_DOMAIN_ID 1
#define MTAPI_NODE_ID 1
void SimpleTest::TestBasic() {
embb::tasks::Node::Initialize(1, 1);
// All available cores
embb::base::CoreSet core_set(true);
int num_cores = core_set.Count();
embb::tasks::Node::Initialize(
MTAPI_DOMAIN_ID,
MTAPI_NODE_ID,
core_set,
1024, // max tasks (default: 1024)
128, // max groups (default: 128)
// Currently needs to be initialized
// with (max_queues + 1), see defect embb449
num_cores + 1, // max queues (default: 16)
1024, // queue capacity (default: 1024)
4 // num priorities (default: 4)
);
for (int ii = 0; ii < 10000; ii++) {
ArraySink<TEST_COUNT> asink;
......@@ -163,6 +181,7 @@ void SimpleTest::TestBasic() {
filter_array[kk] = -1;
mult_array[kk] = -1;
}
source_counter = 0;
pred_counter = 0;
mult_counter = 0;
......@@ -189,7 +208,11 @@ void SimpleTest::TestBasic() {
network.AddSource(constant);
network.AddSource(source);
try {
network();
} catch (embb::base::ErrorException & e) {
PT_ASSERT_MSG(false, e.What());
}
PT_EXPECT(asink.Check());
}
......@@ -198,3 +221,4 @@ void SimpleTest::TestBasic() {
PT_EXPECT(embb_get_bytes_allocated() == 0);
}
......@@ -2,6 +2,7 @@ project (project_embb_tutorials)
file(GLOB_RECURSE EXAMPLES_SOURCES "*.cc" "*.h")
include_directories(
${CMAKE_CURRENT_SOURCE_DIR}/
${CMAKE_CURRENT_BINARY_DIR}/
......@@ -12,7 +13,6 @@ include_directories(
${CMAKE_CURRENT_SOURCE_DIR}/../../mtapi_c/include
${CMAKE_CURRENT_SOURCE_DIR}/../../mtapi_c/src
${CMAKE_CURRENT_SOURCE_DIR}/../../mtapi_network_c/include
${CMAKE_CURRENT_SOURCE_DIR}/../../mtapi_opencl_c/include
${CMAKE_CURRENT_SOURCE_DIR}/../../mtapi_cpp/include
${CMAKE_CURRENT_SOURCE_DIR}/../../tasks_cpp/include
${CMAKE_CURRENT_BINARY_DIR}/../../tasks_cpp/include
......@@ -21,6 +21,21 @@ include_directories(
${CMAKE_CURRENT_SOURCE_DIR}/../../dataflow_cpp/include
)
if(OpenCL_FOUND)
# used in source code, to include opencl code
add_definitions(-DEMBB_WITH_OPENCL)
# add opencl includes
include_directories(
${CMAKE_CURRENT_SOURCE_DIR}/../../mtapi_opencl_c/include
)
# later used, to link opencl to target...
set (EMBB_MTAPI_OPENCL_C_CONDITIONAL "embb_mtapi_opencl_c")
else()
# remove opencl examples from sources (should not be build)
file(GLOB_RECURSE EXAMPLES_SOURCES_OPENCL_TO_REMOVE "*opencl*" )
list(REMOVE_ITEM EXAMPLES_SOURCES ${EXAMPLES_SOURCES_OPENCL_TO_REMOVE})
endif()
if(CMAKE_COMPILER_IS_GNUCXX)
set(CMAKE_CXX_FLAGS "-std=c++11")
set (EXTRA_LIBS dl)
......@@ -32,7 +47,7 @@ ENDIF()
add_executable(examples ${EXAMPLES_SOURCES})
target_link_libraries(examples embb_dataflow_cpp embb_algorithms_cpp embb_tasks_cpp embb_mtapi_cpp
embb_mtapi_network_c embb_mtapi_opencl_c embb_mtapi_c
embb_mtapi_network_c ${EMBB_MTAPI_OPENCL_C_CONDITIONAL} embb_mtapi_c
embb_base_cpp embb_base_c embb_containers_cpp
${EXTRA_LIBS} ${compiler_libs})
CopyBin(BIN examples DEST ${local_install_dir})
......@@ -30,7 +30,9 @@
void RunMTAPI_C();
void RunMTAPI_C_Plugin();
void RunMTAPI_C_Network();
#ifdef EMBB_WITH_OPENCL
void RunMTAPI_C_OpenCL();
#endif
void RunMTAPI_CPP();
void RunTasks();
void RunDataflowLinear();
......@@ -66,9 +68,11 @@ int main() {
RunMTAPI_C_Network();
std::cout << "RunMTAPI_C_Network() ... done" << std::endl;
#ifdef EMBB_WITH_OPENCL
std::cout << "RunMTAPI_C_OpenCL() ..." << std::endl;
RunMTAPI_C_OpenCL();
std::cout << "RunMTAPI_C_OpenCL() ... done" << std::endl;
#endif
std::cout << "RunMTAPI_CPP() ..." << std::endl;
RunMTAPI_CPP();
......
......@@ -41,7 +41,8 @@ extern "C" {
/*
* For each extension, follow this template
* /* cl_VEN_extname extension */
* // cl_VEN_extname extension
*/
/* #define cl_VEN_extname 1
* ... define new types, if any
* ... define new tokens, if any
......
......@@ -46,8 +46,8 @@ struct embb_mtapi_opencl_plugin_struct {
cl_device_id device_id;
cl_context context;
cl_command_queue command_queue;
cl_uint work_group_size;
cl_uint work_item_sizes[3];
size_t work_group_size;
size_t work_item_sizes[3];
};
typedef struct embb_mtapi_opencl_plugin_struct embb_mtapi_opencl_plugin_t;
......@@ -270,12 +270,13 @@ void mtapi_opencl_plugin_initialize(
NULL, NULL, &err);
}
if (CL_SUCCESS == err) {
size_t work_group_size;
err = clGetDeviceInfo(plugin->device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE,
sizeof(cl_uint), &plugin->work_group_size, NULL);
sizeof(size_t), &plugin->work_group_size, NULL);
}
if (CL_SUCCESS == err) {
err = clGetDeviceInfo(plugin->device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES,
3 * sizeof(cl_uint), &plugin->work_item_sizes[0], NULL);
3 * sizeof(size_t), &plugin->work_item_sizes[0], NULL);
}
if (CL_SUCCESS == err) {
plugin->command_queue = clCreateCommandQueue(plugin->context,
......
......@@ -27,6 +27,7 @@
# Needs to be located in the folder containing the tests!!
# Is copied automatically there when generating build files with cmake.
EMBB_TEST_EXECUTABLES="@EXPECTED_EMBB_TEST_EXECUTABLES@"
SCRIPT_LOCATION="$0"
# case we have symlinks...
......@@ -36,10 +37,6 @@ done
DIR=`dirname "$SCRIPT_LOCATION"`
TESTS="embb_base_c_test embb_base_cpp_test embb_mtapi_c_test \
embb_mtapi_cpp_test embb_tasks_cpp_test embb_algorithms_cpp_test \
embb_containers_cpp_test embb_dataflow_cpp_test"
for TEST in $TESTS; do
for TEST in $(echo $EMBB_TEST_EXECUTABLES | tr ";" " "); do
"$DIR/$TEST".exe;
done
......@@ -27,6 +27,7 @@
# Needs to be located in the folder containing the tests!!
# Is copied automatically there when generating build files with cmake.
EMBB_TEST_EXECUTABLES="@EXPECTED_EMBB_TEST_EXECUTABLES@"
SCRIPT_LOCATION="$0"
# case we have symlinks...
......@@ -36,10 +37,6 @@ done
DIR=`dirname "$SCRIPT_LOCATION"`
TESTS="embb_base_c_test embb_base_cpp_test embb_mtapi_c_test \
embb_mtapi_cpp_test embb_tasks_cpp_test embb_algorithms_cpp_test \
embb_containers_cpp_test embb_dataflow_cpp_test"
for TEST in $TESTS; do
for TEST in $(echo $EMBB_TEST_EXECUTABLES | tr ";" " "); do
$DIR/$TEST;
done
......@@ -28,29 +28,28 @@
setlocal EnableDelayedExpansion
SET NUM_ERRORS=0
SET DIR=%~dp0
"%DIR:~0,-1%\embb_base_c_test.exe"
if not !ERRORLEVEL! ==0 set /a NUM_ERRORS=!NUM_ERRORS!+1
echo.
"%DIR:~0,-1%\embb_base_cpp_test.exe"
if not !ERRORLEVEL! ==0 set /a NUM_ERRORS=!NUM_ERRORS!+1
echo.
"%DIR:~0,-1%\embb_mtapi_c_test.exe"
if not !ERRORLEVEL! ==0 set /a NUM_ERRORS=!NUM_ERRORS!+1
echo.
"%DIR:~0,-1%\embb_mtapi_cpp_test.exe"
if not !ERRORLEVEL! ==0 set /a NUM_ERRORS=!NUM_ERRORS!+1
echo.
"%DIR:~0,-1%\embb_tasks_cpp_test.exe"
if not !ERRORLEVEL! ==0 set /a NUM_ERRORS=!NUM_ERRORS!+1
echo.
"%DIR:~0,-1%\embb_algorithms_cpp_test.exe"
if not !ERRORLEVEL! ==0 set /a NUM_ERRORS=!NUM_ERRORS!+1
echo.
"%DIR:~0,-1%\embb_containers_cpp_test.exe"
if not !ERRORLEVEL! ==0 set /a NUM_ERRORS=!NUM_ERRORS!+1
echo.
"%DIR:~0,-1%\embb_dataflow_cpp_test.exe"
SET EMBB_EXECUTABLES=@EXPECTED_EMBB_TEST_EXECUTABLES@
call :parse "%EMBB_EXECUTABLES%"
goto :end
:parse
set list=%1
set list=%list:"=%
FOR /f "tokens=1* delims=;" %%a IN ("%list%") DO (
if not "%%a" == "" call :sub %%a
if not "%%b" == "" call :parse "%%b"
)
exit /b
:sub
call "%DIR:~0,-1%\%1.exe"
if not !ERRORLEVEL! ==0 set /a NUM_ERRORS=!NUM_ERRORS!+1
exit /b
:end
if not !NUM_ERRORS! ==0 (
echo.
SET ERRORLEVEL=1
......
......@@ -54,7 +54,18 @@ Queue::Queue(mtapi_uint_t priority, bool ordered) {
mtapi_job_hndl_t job = mtapi_job_get(TASKS_CPP_JOB, domain_id, &status);
assert(MTAPI_SUCCESS == status);
handle_ = mtapi_queue_create(MTAPI_QUEUE_ID_NONE, job, &attr, &status);
if (MTAPI_SUCCESS != status) {
// Handle MTAPI error status in appropriate exceptions
if (status == MTAPI_SUCCESS) {
return;
} else if (status == MTAPI_ERR_QUEUE_LIMIT) {
EMBB_THROW(embb::base::ErrorException,
"mtapi::Queue could not be constructed, "
"maximum number of queues exceeded");
} else if (status == MTAPI_ERR_JOB_INVALID) {
EMBB_THROW(embb::base::ErrorException,
"mtapi::Queue could not be constructed, "
"invalid job");
} else {
EMBB_THROW(embb::base::ErrorException,
"mtapi::Queue could not be constructed");
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment