Commit 8aa76fe9 by Marcus Winter

Merge branch 'development' into embb517_mutex_based_atomics

parents 3bac4c22 784baa28
# Copyright (c) 2014-2016, Siemens AG. All rights reserved.
# SPDX-License-Identifier: BSD-2-Clause
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
language: cpp
compiler:
- gcc
- clang
install:
- sudo apt-get install -qq cppcheck
- sudo add-apt-repository --yes ppa:kalakris/cmake
- sudo apt-get update -qq
- sudo apt-get install cmake
script:
- cppcheck -q --enable=all *_c*
- mkdir build && cd build
- cmake ..
- make
......
......@@ -30,6 +30,8 @@ set (EMBB_BASE_VERSION_MAJOR 0)
set (EMBB_BASE_VERSION_MINOR 4)
set (EMBB_BASE_VERSION_PATCH 0)
include(FindCUDA)
# Fix compilation for CMake versions >= 3.1
#
# New Policy 0054:
......@@ -152,6 +154,9 @@ set(EXPECTED_EMBB_TEST_EXECUTABLES "embb_algorithms_cpp_test"
if(BUILD_OPENCL_PLUGIN STREQUAL ON)
list(APPEND EXPECTED_EMBB_TEST_EXECUTABLES "embb_mtapi_opencl_c_test")
endif()
if(CUDA_FOUND)
list(APPEND EXPECTED_EMBB_TEST_EXECUTABLES "embb_mtapi_cuda_c_test")
endif()
## Copy test execution script to local binaries folder
......@@ -188,6 +193,9 @@ add_subdirectory(mtapi_plugins_c/mtapi_network_c)
if(BUILD_OPENCL_PLUGIN STREQUAL ON)
add_subdirectory(mtapi_plugins_c/mtapi_opencl_c)
endif()
if(CUDA_FOUND)
add_subdirectory(mtapi_plugins_c/mtapi_cuda_c)
endif()
add_subdirectory(mtapi_cpp)
add_subdirectory(containers_cpp)
add_subdirectory(algorithms_cpp)
......
......@@ -29,8 +29,9 @@
#include <cassert>
#include <iterator>
#include <algorithm>
#include <functional>
#include <utility> // swap C++ 11
#include <algorithm> // swap C++ 98
#include <embb/base/exceptions.h>
#include <embb/mtapi/mtapi.h>
......
......@@ -57,6 +57,19 @@ typedef opaque_type embb_thread_t;
#endif /* DOXYGEN */
/**
* Thread priority type.
*/
typedef enum {
EMBB_THREAD_PRIORITY_IDLE,
EMBB_THREAD_PRIORITY_LOWEST,
EMBB_THREAD_PRIORITY_BELOW_NORMAL,
EMBB_THREAD_PRIORITY_NORMAL,
EMBB_THREAD_PRIORITY_ABOVE_NORMAL,
EMBB_THREAD_PRIORITY_HIGHEST,
EMBB_THREAD_PRIORITY_TIME_CRITICAL
} embb_thread_priority_t;
/**
* Thread start function pointer type.
*
* The return value can be used to return a user-defined exit code when the
......@@ -123,7 +136,7 @@ void embb_thread_yield();
* Creates and runs a thread.
*
* \pre The given thread is not running and has not yet been successfully
joined.
* joined.
* \post On success, the given thread has started to run.
* \return EMBB_SUCCESS if the thread could be created. \n
* EMBB_NOMEM if there was insufficient amount of memory \n
......@@ -148,6 +161,36 @@ int embb_thread_create(
);
/**
* Creates and runs a thread.
*
* \pre The given thread is not running and has not yet been successfully
* joined.
* \post On success, the given thread has started to run.
* \return EMBB_SUCCESS if the thread could be created. \n
* EMBB_NOMEM if there was insufficient amount of memory \n
* EMBB_ERROR otherwise.
* \memory Dynamically allocates a small constant amount of memory to store the
* function and argument pointers. This memory is freed when the thread
* is joined.
* \notthreadsafe
* \see embb_thread_join()
*/
int embb_thread_create_with_priority(
embb_thread_t* thread,
/**< [OUT] Thread to be run */
const embb_core_set_t* core_set,
/**< [IN] Set of cores on which the thread shall be executed. Can be NULL to
indicate automatic thread scheduling by the OS. */
embb_thread_priority_t priority,
/**< [IN] Priority to run the thread at. */
embb_thread_start_t function,
/**< [IN] Function which is executed by the thread when started. Has to be of
type embb_thread_start_t. */
void* arg
/**< [IN/OUT] Argument to thread start function. Can be NULL. */
);
/**
* Waits until the given thread has finished execution.
*
* \pre The given thread has been successfully created using
......
......@@ -40,6 +40,15 @@ void embb_thread_set_max_count(unsigned int max) {
embb_internal_thread_index_set_max(max);
}
int embb_thread_create(
embb_thread_t* thread,
const embb_core_set_t* core_set,
embb_thread_start_t func,
void *arg) {
return embb_thread_create_with_priority(thread, core_set,
EMBB_THREAD_PRIORITY_NORMAL, func, arg);
}
#ifdef EMBB_PLATFORM_THREADING_WINTHREADS
/**
......@@ -78,8 +87,12 @@ void embb_thread_yield() {
SwitchToThread();
}
int embb_thread_create(embb_thread_t* thread, const embb_core_set_t* core_set,
embb_thread_start_t func, void *arg) {
int embb_thread_create_with_priority(
embb_thread_t* thread,
const embb_core_set_t* core_set,
embb_thread_priority_t priority,
embb_thread_start_t func,
void *arg) {
if (thread == NULL) {
return EMBB_ERROR;
}
......@@ -121,6 +134,37 @@ int embb_thread_create(embb_thread_t* thread, const embb_core_set_t* core_set,
}
}
int internal_priority;
switch (priority) {
case EMBB_THREAD_PRIORITY_IDLE:
internal_priority = THREAD_PRIORITY_IDLE;
break;
case EMBB_THREAD_PRIORITY_LOWEST:
internal_priority = THREAD_PRIORITY_LOWEST;
break;
case EMBB_THREAD_PRIORITY_BELOW_NORMAL:
internal_priority = THREAD_PRIORITY_BELOW_NORMAL;
break;
case EMBB_THREAD_PRIORITY_ABOVE_NORMAL:
internal_priority = THREAD_PRIORITY_ABOVE_NORMAL;
break;
case EMBB_THREAD_PRIORITY_HIGHEST:
internal_priority = THREAD_PRIORITY_HIGHEST;
break;
case EMBB_THREAD_PRIORITY_TIME_CRITICAL:
internal_priority = THREAD_PRIORITY_TIME_CRITICAL;
break;
case EMBB_THREAD_PRIORITY_NORMAL:
default:
internal_priority = THREAD_PRIORITY_NORMAL;
break;
}
BOOL result = SetThreadPriority(
thread->embb_internal_handle, internal_priority);
if (result == 0) {
return EMBB_ERROR;
}
return EMBB_SUCCESS;
}
......@@ -180,12 +224,18 @@ int embb_thread_equal(const embb_thread_t* lhs, const embb_thread_t* rhs) {
#include <sys/sysinfo.h> /* Used to get number of processors */
#endif /* EMBB_PLATFORM_HAS_HEADER_SYSINFO */
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/resource.h>
/**
* Used to wrap client thread start function and argument when calling internal
* thread start function embb_internal_thread_start.
*/
typedef struct embb_internal_thread_arg_t {
embb_thread_start_t func;
int priority;
void* arg;
int result;
} embb_internal_thread_arg_t;
......@@ -197,6 +247,12 @@ typedef struct embb_internal_thread_arg_t {
* argument.
*/
void* embb_internal_thread_start(void* internalArg) {
#ifdef EMBB_PLATFORM_HAS_GLIB_CPU
pid_t tid;
tid = syscall(SYS_gettid);
setpriority(PRIO_PROCESS, tid,
((embb_internal_thread_arg_t*)internalArg)->priority);
#endif
((embb_internal_thread_arg_t*)internalArg)->result =
((embb_internal_thread_arg_t*)internalArg)->func(
((struct embb_internal_thread_arg_t*)internalArg)->arg);
......@@ -214,8 +270,12 @@ void embb_thread_yield() {
pthread_yield();
}
int embb_thread_create(embb_thread_t* thread, const embb_core_set_t* core_set,
embb_thread_start_t func, void* arg) {
int embb_thread_create_with_priority(
embb_thread_t* thread,
const embb_core_set_t* core_set,
embb_thread_priority_t priority,
embb_thread_start_t func,
void* arg) {
if (thread == NULL) {
return EMBB_ERROR;
}
......@@ -261,6 +321,31 @@ int embb_thread_create(embb_thread_t* thread, const embb_core_set_t* core_set,
thread->embb_internal_arg->func = func;
thread->embb_internal_arg->arg = arg;
switch (priority) {
case EMBB_THREAD_PRIORITY_IDLE:
thread->embb_internal_arg->priority = 19;
break;
case EMBB_THREAD_PRIORITY_LOWEST:
thread->embb_internal_arg->priority = 2;
break;
case EMBB_THREAD_PRIORITY_BELOW_NORMAL:
thread->embb_internal_arg->priority = 1;
break;
case EMBB_THREAD_PRIORITY_ABOVE_NORMAL:
thread->embb_internal_arg->priority = -1;
break;
case EMBB_THREAD_PRIORITY_HIGHEST:
thread->embb_internal_arg->priority = -2;
break;
case EMBB_THREAD_PRIORITY_TIME_CRITICAL:
thread->embb_internal_arg->priority = -19;
break;
case EMBB_THREAD_PRIORITY_NORMAL:
default:
thread->embb_internal_arg->priority = 0;
break;
}
status = pthread_create(
&(thread->embb_internal_handle), /* pthread handle */
&attr, /* additional attributes,
......
......@@ -28,7 +28,8 @@
#define EMBB_BASE_INTERNAL_MUTEX_INL_H_
#include <cassert>
#include <algorithm>
#include <utility> // swap C++ 11
#include <algorithm> // swap C++ 98
namespace embb {
namespace base {
......
......@@ -28,7 +28,6 @@
#define EMBB_BASE_INTERNAL_THREAD_INL_H_
#include <embb/base/exceptions.h>
#include <embb/base/c/thread.h>
#include <embb/base/internal/thread_closures.h>
#include <embb/base/memory_allocation.h>
#include <iostream>
......@@ -60,6 +59,22 @@ Thread::Thread(CoreSet& core_set, Function function) : rep_() {
CheckThreadCreationErrors(result, closure);
}
template<typename Function>
Thread::Thread(
CoreSet& core_set,
embb_thread_priority_t priority,
Function function) : rep_() {
typedef internal::ThreadClosure<Function> Closure;
Closure* closure = Allocation::New<Closure>(function);
int result = embb_thread_create_with_priority(
&rep_,
&core_set.rep_,
priority,
internal::ThreadClosure<Function>::ThreadStart,
static_cast<void*>(closure));
CheckThreadCreationErrors(result, closure);
}
template<typename Function, typename Arg1>
Thread::Thread(Function function, Arg1 arg1) : rep_() {
typedef internal::ThreadClosureArg1<Function, Arg1> Closure;
......
......@@ -31,6 +31,7 @@
#include <embb/base/internal/thread_closures.h>
#include <embb/base/mutex.h>
#include <embb/base/core_set.h>
#include <embb/base/c/thread.h>
#include <ostream>
namespace embb {
......@@ -177,9 +178,33 @@ class Thread {
* \tparam Function Function object type
*/
template<typename Function>
explicit Thread(
Thread(
CoreSet& core_set,
/**< [IN] Set of cores on which the thread shall be executed. */
Function function
/**< [IN] Copyable function object, callable without arguments */
);
/**
* Creates and runs a thread with zero-argument start function.
*
* \note If the function is passed as a temporary object when creating a
* thread, this might be interpreted as a function declaration ("most vexing
* parse"). C++11 resolves this by using curly braces for initialization.
*
* \throws NoMemoryException if not enough memory is available
* \throws ErrorException in case of another error
* \memory A small constant amount of memory to store the function. This
* memory is freed the thread is joined.
* \notthreadsafe
* \tparam Function Function object type
*/
template<typename Function>
Thread(
CoreSet& core_set,
/**< [IN] Set of cores on which the thread shall be executed. */
embb_thread_priority_t priority,
/**< [IN] Priority of the new thread. */
Function function
/**< [IN] Copyable function object, callable without arguments */
);
......
......@@ -38,18 +38,17 @@ LogTest::LogTest() {
CreateUnit("Test all").Add(&LogTest::Test, this);
}
static std::string logged_message;
static void test_log_function(void * context, char const * msg) {
EMBB_UNUSED(context);
logged_message = msg;
std::string * logged_message = reinterpret_cast<std::string*>(context);
*logged_message = msg;
}
void LogTest::Test() {
using embb::base::Log;
char const * test_msg = "hello";
std::string logged_message;
Log::SetLogFunction(0, test_log_function);
Log::SetLogFunction(&logged_message, test_log_function);
Log::SetLogLevel(EMBB_LOG_LEVEL_TRACE);
logged_message = "none";
......
......@@ -29,6 +29,7 @@
#include <cstddef>
#include <embb/base/exceptions.h>
#include <embb/mtapi/execution_policy.h>
#include <embb/dataflow/internal/scheduler.h>
#include <embb/dataflow/internal/clock_listener.h>
......@@ -58,10 +59,14 @@ class Node {
SetSlices(0);
}
}
void SetPolicy(embb::mtapi::ExecutionPolicy const & policy) {
policy_ = policy;
}
protected:
Scheduler * sched_;
static int next_process_id_;
embb::mtapi::ExecutionPolicy policy_;
static int GetNextProcessID() { return next_process_id_++; }
virtual void SetSlices(int /*slices*/) {}
......
......@@ -146,7 +146,7 @@ class Process< Serial, Inputs<I1, I2, I3, I4, I5>,
for (int ii = clk; ii < clk_res; ii++) {
const int idx = ii % slices_;
action_[idx] = Action(this, ii);
sched_->Enqueue(queue_id_, action_[idx]);
sched_->Enqueue(queue_id_, action_[idx], policy_);
}
queued_clock_.Store(clk_res);
retry = false;
......@@ -158,7 +158,7 @@ class Process< Serial, Inputs<I1, I2, I3, I4, I5>,
} else {
const int idx = clock % slices_;
action_[idx] = Action(this, clock);
sched_->Start(action_[idx]);
sched_->Start(action_[idx], policy_);
}
}
......
......@@ -27,6 +27,8 @@
#ifndef EMBB_DATAFLOW_INTERNAL_SCHEDULER_H_
#define EMBB_DATAFLOW_INTERNAL_SCHEDULER_H_
#include <embb/mtapi/execution_policy.h>
namespace embb {
namespace dataflow {
namespace internal {
......@@ -37,8 +39,13 @@ class Scheduler {
public:
Scheduler() {}
virtual ~Scheduler() {}
virtual void Start(Action & action) = 0;
virtual void Enqueue(int process_id, Action & action) = 0;
virtual void Start(
Action & action,
embb::mtapi::ExecutionPolicy const & policy) = 0;
virtual void Enqueue(
int process_id,
Action & action,
embb::mtapi::ExecutionPolicy const & policy) = 0;
virtual void WaitForSlice(int slice) = 0;
virtual int GetSlices() = 0;
};
......
......@@ -96,14 +96,25 @@ class SchedulerMTAPI : public Scheduler {
embb::base::Allocation::Free(group_);
embb::base::Allocation::Free(queue_);
}
virtual void Start(Action & action) {
virtual void Start(
Action & action,
embb::mtapi::ExecutionPolicy const & policy) {
const int idx = action.GetClock() % slices_;
group_[idx].Start(job_, &action, static_cast<void*>(NULL));
embb::mtapi::TaskAttributes task_attr;
task_attr.SetPolicy(policy);
group_[idx].Start(job_, &action, static_cast<void*>(NULL),
task_attr);
}
virtual void Enqueue(int process_id, Action & action) {
virtual void Enqueue(
int process_id,
Action & action,
embb::mtapi::ExecutionPolicy const & policy) {
const int idx = action.GetClock() % slices_;
const int queue_id = process_id % queue_count_;
queue_[queue_id].Enqueue(&action, static_cast<void*>(NULL), group_[idx]);
embb::mtapi::TaskAttributes task_attr;
task_attr.SetPolicy(policy);
queue_[queue_id].Enqueue(&action, static_cast<void*>(NULL),
task_attr, group_[idx]);
}
virtual void WaitForSlice(int slice) {
group_[slice].WaitAll(MTAPI_INFINITE);
......
......@@ -38,10 +38,15 @@ class SchedulerSequential : public Scheduler {
public:
SchedulerSequential() {}
virtual ~SchedulerSequential() {}
virtual void Start(Action & action) {
virtual void Start(
Action & action,
embb::mtapi::ExecutionPolicy const &) {
action.RunSequential();
}
virtual void Enqueue(int, Action & action) {
virtual void Enqueue(
int,
Action & action,
embb::mtapi::ExecutionPolicy const &) {
action.RunSequential();
}
virtual void WaitForSlice(int /*slice*/) {}
......
......@@ -113,7 +113,7 @@ class Sink< Inputs<I1, I2, I3, I4, I5> >
for (int ii = clk; ii < clk_res; ii++) {
const int idx = ii % slices_;
action_[idx] = Action(this, ii);
sched_->Enqueue(queue_id_, action_[idx]);
sched_->Enqueue(queue_id_, action_[idx], policy_);
}
queued_clock_.Store(clk_res);
retry = false;
......
......@@ -71,6 +71,19 @@ class Network {
explicit Network(int slices) {}
/**
* Constructs an empty network.
* \param policy Default execution policy of the processes in the network.
*/
explicit Network(embb::mtapi::ExecutionPolicy const & policy) {}
/**
* Constructs an empty network.
* \param slices Number of concurrent tokens allowed in the network.
* \param policy Default execution policy of the processes in the network.
*/
Network(int slices, embb::mtapi::ExecutionPolicy const & policy) {}
/**
* Input port class.
*/
template <typename Type>
......@@ -208,7 +221,16 @@ class Network {
* \param network The network this node is going to be part of.
* \param function The Function to call to process a token.
*/
explicit SerialProcess(Network & network, FunctionType function);
SerialProcess(Network & network, FunctionType function);
/**
* Constructs a SerialProcess with a user specified processing function.
* \param network The network this node is going to be part of.
* \param function The Function to call to process a token.
* \param policy The execution policy of the process.
*/
SerialProcess(Network & network, FunctionType function,
embb::mtapi::ExecutionPolicy const & policy);
/**
* \returns \c true if the SerialProcess has any inputs, \c false
......@@ -290,7 +312,16 @@ class Network {
* \param network The network this node is going to be part of.
* \param function The Function to call to process a token.
*/
explicit ParallelProcess(Network & network, FunctionType function);
ParallelProcess(Network & network, FunctionType function);
/**
* Constructs a ParallelProcess with a user specified processing function.
* \param network The network this node is going to be part of.
* \param function The Function to call to process a token.
* \param policy The execution policy of the process.
*/
ParallelProcess(Network & network, FunctionType function,
embb::mtapi::ExecutionPolicy const & policy);
/**
* \returns \c true if the ParallelProcess has any inputs, \c false
......@@ -373,6 +404,13 @@ class Network {
explicit Select(Network & network);
/**
* Constructs a Switch process.
* \param network The network this node is going to be part of.
* \param policy The execution policy of the process.
*/
Select(Network & network, embb::mtapi::ExecutionPolicy const & policy);
/**
* \returns Always \c true.
*/
virtual bool HasInputs() const;
......@@ -451,6 +489,13 @@ class Network {
explicit Select(Network & network);
/**
* Constructs a Select process.
* \param network The network this node is going to be part of.
* \param policy The execution policy of the process.
*/
Select(Network & network, embb::mtapi::ExecutionPolicy const & policy);
/**
* \returns Always \c true.
*/
virtual bool HasInputs() const;
......@@ -528,7 +573,16 @@ class Network {
* \param network The network this node is going to be part of.
* \param function The Function to call to process a token.
*/
explicit Sink(Network & network, FunctionType function);
Sink(Network & network, FunctionType function);
/**
* Constructs a Sink with a user specified processing function.
* \param network The network this node is going to be part of.
* \param function The Function to call to process a token.
* \param policy The execution policy of the process.
*/
Sink(Network & network, FunctionType function,
embb::mtapi::ExecutionPolicy const & policy);
/**
* \returns Always \c true.
......@@ -588,7 +642,16 @@ class Network {
* \param network The network this node is going to be part of.
* \param function The Function to call to emit a token.
*/
explicit Source(Network & network, FunctionType function);
Source(Network & network, FunctionType function);
/**
* Constructs a Source with a user specified processing function.
* \param network The network this node is going to be part of.
* \param function The Function to call to emit a token.
* \param policy The execution policy of the process.
*/
Source(Network & network, FunctionType function,
embb::mtapi::ExecutionPolicy const & policy);
/**
* \returns Always \c false.
......@@ -641,7 +704,16 @@ class Network {
* \param network The network this node is going to be part of.
* \param value The value to emit.
*/
explicit ConstantSource(Network & network, Type value);
ConstantSource(Network & network, Type value);
/**
* Constructs a ConstantSource with a value to emit on each token.
* \param network The network this node is going to be part of.
* \param value The value to emit.
* \param policy The execution policy of the process.
*/
ConstantSource(Network & network, Type value,
embb::mtapi::ExecutionPolicy const & policy);
/**
* \returns Always \c false.
......@@ -698,12 +770,29 @@ class Network {
class Network : public internal::ClockListener {
public:
Network()
: sink_counter_(NULL), sink_count_(0), slices_(0), sched_(NULL) {
: sink_counter_(NULL), sink_count_(0)
, slices_(0), sched_(NULL)
, policy_() {
// empty
}
explicit Network(int slices)
: sink_counter_(NULL), sink_count_(0), slices_(slices), sched_(NULL) {
: sink_counter_(NULL), sink_count_(0),
slices_(slices), sched_(NULL)
, policy_() {
PrepareSlices();
}
explicit Network(embb::mtapi::ExecutionPolicy const & policy)
: sink_counter_(NULL), sink_count_(0)
, slices_(0), sched_(NULL)
, policy_(policy) {
}
Network(int slices, embb::mtapi::ExecutionPolicy const & policy)
: sink_counter_(NULL), sink_count_(0)
, slices_(slices), sched_(NULL)
, policy_(policy) {
PrepareSlices();
}
......@@ -751,11 +840,23 @@ class Network : public internal::ClockListener {
internal::Inputs<I1, I2, I3, I4, I5>,
internal::Outputs<O1, O2, O3, O4, O5> >::FunctionType
FunctionType;
explicit SerialProcess(Network & network, FunctionType function)
SerialProcess(Network & network, FunctionType function)
: internal::Process< true,
internal::Inputs<I1, I2, I3, I4, I5>,
internal::Outputs<O1, O2, O3, O4, O5> >(
network.sched_, function) {
this->SetPolicy(network.policy_);
network.processes_.push_back(this);
}
SerialProcess(Network & network, FunctionType function,
embb::mtapi::ExecutionPolicy const & policy)
: internal::Process< true,
internal::Inputs<I1, I2, I3, I4, I5>,
internal::Outputs<O1, O2, O3, O4, O5> >(
network.sched_, function) {
this->SetPolicy(policy);
network.processes_.push_back(this);
}
};
......@@ -775,11 +876,23 @@ class Network : public internal::ClockListener {
internal::Inputs<I1, I2, I3, I4, I5>,
internal::Outputs<O1, O2, O3, O4, O5> >::FunctionType
FunctionType;
explicit ParallelProcess(Network & network, FunctionType function)
ParallelProcess(Network & network, FunctionType function)
: internal::Process< false,
internal::Inputs<I1, I2, I3, I4, I5>,
internal::Outputs<O1, O2, O3, O4, O5> >(
network.sched_, function) {
this->SetPolicy(network.policy_);
network.processes_.push_back(this);
}
ParallelProcess(Network & network, FunctionType function,
embb::mtapi::ExecutionPolicy const & policy)
: internal::Process< false,
internal::Inputs<I1, I2, I3, I4, I5>,
internal::Outputs<O1, O2, O3, O4, O5> >(
network.sched_, function) {
this->SetPolicy(policy);
network.processes_.push_back(this);
}
};
......@@ -789,6 +902,13 @@ class Network : public internal::ClockListener {
public:
explicit Switch(Network & network)
: internal::Switch<Type>(network.sched_) {
this->SetPolicy(network.policy_);
network.processes_.push_back(this);
}
Switch(Network & network, embb::mtapi::ExecutionPolicy const & policy)
: internal::Switch<Type>(network.sched_) {
this->SetPolicy(policy);
network.processes_.push_back(this);
}
};
......@@ -798,6 +918,13 @@ class Network : public internal::ClockListener {
public:
explicit Select(Network & network)
: internal::Select<Type>(network.sched_) {
this->SetPolicy(network.policy_);
network.processes_.push_back(this);
}
Select(Network & network, embb::mtapi::ExecutionPolicy const & policy)
: internal::Select<Type>(network.sched_) {
this->SetPolicy(policy);
network.processes_.push_back(this);
}
};
......@@ -812,10 +939,21 @@ class Network : public internal::ClockListener {
typedef typename internal::Sink<
internal::Inputs<I1, I2, I3, I4, I5> >::FunctionType FunctionType;
explicit Sink(Network & network, FunctionType function)
Sink(Network & network, FunctionType function)
: internal::Sink<
internal::Inputs<I1, I2, I3, I4, I5> >(
network.sched_, &network, function) {
this->SetPolicy(network.policy_);
network.sinks_.push_back(this);
network.sink_count_++;
}
Sink(Network & network, FunctionType function,
embb::mtapi::ExecutionPolicy const & policy)
: internal::Sink<
internal::Inputs<I1, I2, I3, I4, I5> >(
network.sched_, &network, function) {
this->SetPolicy(policy);
network.sinks_.push_back(this);
network.sink_count_++;
}
......@@ -832,9 +970,18 @@ class Network : public internal::ClockListener {
internal::Outputs<O1, O2, O3, O4, O5> >::FunctionType
FunctionType;
explicit Source(Network & network, FunctionType function)
Source(Network & network, FunctionType function)
: internal::Source<
internal::Outputs<O1, O2, O3, O4, O5> >(network.sched_, function) {
this->SetPolicy(network.policy_);
network.sources_.push_back(this);
}
Source(Network & network, FunctionType function,
embb::mtapi::ExecutionPolicy const & policy)
: internal::Source<
internal::Outputs<O1, O2, O3, O4, O5> >(network.sched_, function) {
this->SetPolicy(policy);
network.sources_.push_back(this);
}
};
......@@ -842,8 +989,16 @@ class Network : public internal::ClockListener {
template<typename Type>
class ConstantSource : public internal::ConstantSource<Type> {
public:
explicit ConstantSource(Network & network, Type value)
ConstantSource(Network & network, Type value)
: internal::ConstantSource<Type>(network.sched_, value) {
this->SetPolicy(network.policy_);
network.sources_.push_back(this);
}
ConstantSource(Network & network, Type value,
embb::mtapi::ExecutionPolicy const & policy)
: internal::ConstantSource<Type>(network.sched_, value) {
this->SetPolicy(policy);
network.sources_.push_back(this);
}
};
......@@ -928,6 +1083,7 @@ class Network : public internal::ClockListener {
int sink_count_;
int slices_;
internal::Scheduler * sched_;
embb::mtapi::ExecutionPolicy policy_;
#if EMBB_DATAFLOW_TRACE_SIGNAL_HISTORY
std::vector<int> spawn_history_[Slices];
......
......@@ -153,7 +153,8 @@ INPUT = "@CMAKE_SOURCE_DIR@/doc/reference/embb.dox" \
"@CMAKE_SOURCE_DIR@/mtapi_c/include" \
"@CMAKE_SOURCE_DIR@/base_c/include" \
"@CMAKE_SOURCE_DIR@/mtapi_plugins_c/mtapi_opencl_c/include" \
"@CMAKE_SOURCE_DIR@/mtapi_plugins_c/mtapi_network_c/include"
"@CMAKE_SOURCE_DIR@/mtapi_plugins_c/mtapi_network_c/include" \
"@CMAKE_SOURCE_DIR@/mtapi_plugins_c/mtapi_cuda_c/include"
INPUT_ENCODING = UTF-8
FILE_PATTERNS = *.h \
......
......@@ -244,6 +244,7 @@
#include <stdint.h>
#include <embb/base/c/core_set.h>
#include <embb/base/c/thread.h>
#ifdef __cplusplus
extern "C" {
......@@ -529,6 +530,96 @@ enum mtapi_notification_enum {
typedef enum mtapi_notification_enum mtapi_notification_t;
/**< runtime notification */
/**
* Enum to select default or specific worker for priority setter
*/
enum mtapi_worker_priority_type_enum {
MTAPI_WORKER_PRIORITY_END = 0,
MTAPI_WORKER_PRIORITY_DEFAULT = 1,
MTAPI_WORKER_PRIORITY_WORKER = 100,
MTAPI_WORKER_PRIORITY_WORKER_0 = MTAPI_WORKER_PRIORITY_WORKER + 0,
MTAPI_WORKER_PRIORITY_WORKER_1 = MTAPI_WORKER_PRIORITY_WORKER + 1,
MTAPI_WORKER_PRIORITY_WORKER_2 = MTAPI_WORKER_PRIORITY_WORKER + 2,
MTAPI_WORKER_PRIORITY_WORKER_3 = MTAPI_WORKER_PRIORITY_WORKER + 3,
MTAPI_WORKER_PRIORITY_WORKER_4 = MTAPI_WORKER_PRIORITY_WORKER + 4,
MTAPI_WORKER_PRIORITY_WORKER_5 = MTAPI_WORKER_PRIORITY_WORKER + 5,
MTAPI_WORKER_PRIORITY_WORKER_6 = MTAPI_WORKER_PRIORITY_WORKER + 6,
MTAPI_WORKER_PRIORITY_WORKER_7 = MTAPI_WORKER_PRIORITY_WORKER + 7,
MTAPI_WORKER_PRIORITY_WORKER_8 = MTAPI_WORKER_PRIORITY_WORKER + 8,
MTAPI_WORKER_PRIORITY_WORKER_9 = MTAPI_WORKER_PRIORITY_WORKER + 9,
MTAPI_WORKER_PRIORITY_WORKER_10 = MTAPI_WORKER_PRIORITY_WORKER + 10,
MTAPI_WORKER_PRIORITY_WORKER_11 = MTAPI_WORKER_PRIORITY_WORKER + 11,
MTAPI_WORKER_PRIORITY_WORKER_12 = MTAPI_WORKER_PRIORITY_WORKER + 12,
MTAPI_WORKER_PRIORITY_WORKER_13 = MTAPI_WORKER_PRIORITY_WORKER + 13,
MTAPI_WORKER_PRIORITY_WORKER_14 = MTAPI_WORKER_PRIORITY_WORKER + 14,
MTAPI_WORKER_PRIORITY_WORKER_15 = MTAPI_WORKER_PRIORITY_WORKER + 15,
MTAPI_WORKER_PRIORITY_WORKER_16 = MTAPI_WORKER_PRIORITY_WORKER + 16,
MTAPI_WORKER_PRIORITY_WORKER_17 = MTAPI_WORKER_PRIORITY_WORKER + 17,
MTAPI_WORKER_PRIORITY_WORKER_18 = MTAPI_WORKER_PRIORITY_WORKER + 18,
MTAPI_WORKER_PRIORITY_WORKER_19 = MTAPI_WORKER_PRIORITY_WORKER + 19,
MTAPI_WORKER_PRIORITY_WORKER_20 = MTAPI_WORKER_PRIORITY_WORKER + 20,
MTAPI_WORKER_PRIORITY_WORKER_21 = MTAPI_WORKER_PRIORITY_WORKER + 21,
MTAPI_WORKER_PRIORITY_WORKER_22 = MTAPI_WORKER_PRIORITY_WORKER + 22,
MTAPI_WORKER_PRIORITY_WORKER_23 = MTAPI_WORKER_PRIORITY_WORKER + 23,
MTAPI_WORKER_PRIORITY_WORKER_24 = MTAPI_WORKER_PRIORITY_WORKER + 24,
MTAPI_WORKER_PRIORITY_WORKER_25 = MTAPI_WORKER_PRIORITY_WORKER + 25,
MTAPI_WORKER_PRIORITY_WORKER_26 = MTAPI_WORKER_PRIORITY_WORKER + 26,
MTAPI_WORKER_PRIORITY_WORKER_27 = MTAPI_WORKER_PRIORITY_WORKER + 27,
MTAPI_WORKER_PRIORITY_WORKER_28 = MTAPI_WORKER_PRIORITY_WORKER + 28,
MTAPI_WORKER_PRIORITY_WORKER_29 = MTAPI_WORKER_PRIORITY_WORKER + 29,
MTAPI_WORKER_PRIORITY_WORKER_30 = MTAPI_WORKER_PRIORITY_WORKER + 30,
MTAPI_WORKER_PRIORITY_WORKER_31 = MTAPI_WORKER_PRIORITY_WORKER + 31,
MTAPI_WORKER_PRIORITY_WORKER_32 = MTAPI_WORKER_PRIORITY_WORKER + 32,
MTAPI_WORKER_PRIORITY_WORKER_33 = MTAPI_WORKER_PRIORITY_WORKER + 33,
MTAPI_WORKER_PRIORITY_WORKER_34 = MTAPI_WORKER_PRIORITY_WORKER + 34,
MTAPI_WORKER_PRIORITY_WORKER_35 = MTAPI_WORKER_PRIORITY_WORKER + 35,
MTAPI_WORKER_PRIORITY_WORKER_36 = MTAPI_WORKER_PRIORITY_WORKER + 36,
MTAPI_WORKER_PRIORITY_WORKER_37 = MTAPI_WORKER_PRIORITY_WORKER + 37,
MTAPI_WORKER_PRIORITY_WORKER_38 = MTAPI_WORKER_PRIORITY_WORKER + 38,
MTAPI_WORKER_PRIORITY_WORKER_39 = MTAPI_WORKER_PRIORITY_WORKER + 39,
MTAPI_WORKER_PRIORITY_WORKER_40 = MTAPI_WORKER_PRIORITY_WORKER + 40,
MTAPI_WORKER_PRIORITY_WORKER_41 = MTAPI_WORKER_PRIORITY_WORKER + 41,
MTAPI_WORKER_PRIORITY_WORKER_42 = MTAPI_WORKER_PRIORITY_WORKER + 42,
MTAPI_WORKER_PRIORITY_WORKER_43 = MTAPI_WORKER_PRIORITY_WORKER + 43,
MTAPI_WORKER_PRIORITY_WORKER_44 = MTAPI_WORKER_PRIORITY_WORKER + 44,
MTAPI_WORKER_PRIORITY_WORKER_45 = MTAPI_WORKER_PRIORITY_WORKER + 45,
MTAPI_WORKER_PRIORITY_WORKER_46 = MTAPI_WORKER_PRIORITY_WORKER + 46,
MTAPI_WORKER_PRIORITY_WORKER_47 = MTAPI_WORKER_PRIORITY_WORKER + 47,
MTAPI_WORKER_PRIORITY_WORKER_48 = MTAPI_WORKER_PRIORITY_WORKER + 48,
MTAPI_WORKER_PRIORITY_WORKER_49 = MTAPI_WORKER_PRIORITY_WORKER + 49,
MTAPI_WORKER_PRIORITY_WORKER_50 = MTAPI_WORKER_PRIORITY_WORKER + 50,
MTAPI_WORKER_PRIORITY_WORKER_51 = MTAPI_WORKER_PRIORITY_WORKER + 51,
MTAPI_WORKER_PRIORITY_WORKER_52 = MTAPI_WORKER_PRIORITY_WORKER + 52,
MTAPI_WORKER_PRIORITY_WORKER_53 = MTAPI_WORKER_PRIORITY_WORKER + 53,
MTAPI_WORKER_PRIORITY_WORKER_54 = MTAPI_WORKER_PRIORITY_WORKER + 54,
MTAPI_WORKER_PRIORITY_WORKER_55 = MTAPI_WORKER_PRIORITY_WORKER + 55,
MTAPI_WORKER_PRIORITY_WORKER_56 = MTAPI_WORKER_PRIORITY_WORKER + 56,
MTAPI_WORKER_PRIORITY_WORKER_57 = MTAPI_WORKER_PRIORITY_WORKER + 57,
MTAPI_WORKER_PRIORITY_WORKER_58 = MTAPI_WORKER_PRIORITY_WORKER + 58,
MTAPI_WORKER_PRIORITY_WORKER_59 = MTAPI_WORKER_PRIORITY_WORKER + 59,
MTAPI_WORKER_PRIORITY_WORKER_60 = MTAPI_WORKER_PRIORITY_WORKER + 60,
MTAPI_WORKER_PRIORITY_WORKER_61 = MTAPI_WORKER_PRIORITY_WORKER + 61,
MTAPI_WORKER_PRIORITY_WORKER_62 = MTAPI_WORKER_PRIORITY_WORKER + 62,
MTAPI_WORKER_PRIORITY_WORKER_63 = MTAPI_WORKER_PRIORITY_WORKER + 63
};
/**
* Enum to select default or specific worker for priority setter
*/
typedef enum mtapi_worker_priority_type_enum mtapi_worker_priority_type_t;
/**
* Describes the default priority of all workers or the priority of a
* specific worker.
*/
struct mtapi_worker_priority_entry_struct {
mtapi_worker_priority_type_t type; /**< default or specific worker */
embb_thread_priority_t priority; /**< priority to set */
};
/**
* Describes the default priority of all workers or the priority of a
* specific worker.
*/
typedef struct mtapi_worker_priority_entry_struct mtapi_worker_priority_entry_t;
/**
* Node attributes, to be extended for implementation specific attributes
......@@ -554,7 +645,8 @@ enum mtapi_node_attributes_enum {
allowed by the node */
MTAPI_NODE_MAX_PRIORITIES, /**< maximum number of priorities
allowed by the node */
MTAPI_NODE_REUSE_MAIN_THREAD /**< reuse main thread as worker */
MTAPI_NODE_REUSE_MAIN_THREAD, /**< reuse main thread as worker */
MTAPI_NODE_WORKER_PRIORITIES /**< set worker priorites */
};
/** size of the \a MTAPI_NODE_CORE_AFFINITY attribute */
#define MTAPI_NODE_CORE_AFFINITY_SIZE sizeof(embb_core_set_t)
......@@ -580,6 +672,8 @@ enum mtapi_node_attributes_enum {
#define MTAPI_NODE_MAX_PRIORITIES_SIZE sizeof(mtapi_uint_t)
/** size of the \a MTAPI_NODE_REUSE_MAIN_THREAD attribute */
#define MTAPI_NODE_REUSE_MAIN_THREAD_SIZE sizeof(mtapi_boolean_t)
/** size of the \a MTAPI_NODE_WORKER_PRIORITIES attribute */
#define MTAPI_NODE_WORKER_PRIORITIES_SIZE 0
/* example attribute value */
#define MTAPI_NODE_TYPE_SMP 1
......@@ -693,6 +787,9 @@ struct mtapi_node_attributes_struct {
mtapi_uint_t max_priorities; /**< stores MTAPI_NODE_MAX_PRIORITIES */
mtapi_boolean_t reuse_main_thread; /**< stores
MTAPI_NODE_REUSE_MAIN_THREAD */
mtapi_worker_priority_entry_t * worker_priorities;
/**< stores
MTAPI_NODE_WORKER_PRIORITIES */
};
/**
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......
......@@ -503,8 +503,25 @@ mtapi_boolean_t embb_mtapi_scheduler_initialize_with_mode(
}
core_num++;
}
isinit &= embb_mtapi_thread_context_initialize_with_node_worker_and_core(
&that->worker_contexts[ii], node, ii, core_num);
embb_thread_priority_t priority = EMBB_THREAD_PRIORITY_NORMAL;
if (NULL != node->attributes.worker_priorities) {
mtapi_worker_priority_entry_t * entry =
node->attributes.worker_priorities;
mtapi_worker_priority_type_t type = entry->type;
while (type != MTAPI_WORKER_PRIORITY_END) {
if (type == MTAPI_WORKER_PRIORITY_DEFAULT) {
priority = entry->priority;
} else if (type ==
(mtapi_worker_priority_type_t)(MTAPI_WORKER_PRIORITY_WORKER + ii)) {
priority = entry->priority;
break;
}
entry++;
type = entry->type;
}
}
isinit &= embb_mtapi_thread_context_initialize(
&that->worker_contexts[ii], node, ii, core_num, priority);
}
if (!isinit) {
return MTAPI_FALSE;
......
......@@ -38,11 +38,12 @@
/* ---- CLASS MEMBERS ------------------------------------------------------ */
mtapi_boolean_t embb_mtapi_thread_context_initialize_with_node_worker_and_core(
mtapi_boolean_t embb_mtapi_thread_context_initialize(
embb_mtapi_thread_context_t* that,
embb_mtapi_node_t* node,
mtapi_uint_t worker_index,
mtapi_uint_t core_num) {
mtapi_uint_t core_num,
embb_thread_priority_t priority) {
mtapi_uint_t ii;
mtapi_boolean_t result = MTAPI_TRUE;
......@@ -54,6 +55,7 @@ mtapi_boolean_t embb_mtapi_thread_context_initialize_with_node_worker_and_core(
that->core_num = core_num;
that->priorities = node->attributes.max_priorities;
that->is_initialized = MTAPI_FALSE;
that->thread_priority = priority;
that->is_main_thread = (worker_index == 0) ?
node->attributes.reuse_main_thread : MTAPI_FALSE;
embb_atomic_store_int(&that->run, 0);
......@@ -135,7 +137,8 @@ mtapi_boolean_t embb_mtapi_thread_context_start(
embb_tss_set(&(that->tss_id), that);
embb_atomic_store_int(&that->run, 1);
} else {
err = embb_thread_create(&that->thread, &core_set, worker_func, that);
err = embb_thread_create_with_priority(
&that->thread, &core_set, that->thread_priority, worker_func, that);
if (EMBB_SUCCESS != err) {
embb_mtapi_log_error(
"embb_mtapi_ThreadContext_initializeWithNodeAndCoreNumber() could not "
......
......@@ -69,6 +69,8 @@ struct embb_mtapi_thread_context_struct {
mtapi_status_t status;
mtapi_boolean_t is_initialized;
mtapi_boolean_t is_main_thread;
embb_thread_priority_t thread_priority;
};
#include <embb_mtapi_thread_context_t_fwd.h>
......@@ -78,11 +80,12 @@ struct embb_mtapi_thread_context_struct {
* \memberof embb_mtapi_thread_context_struct
* \returns MTAPI_TRUE if successful, MTAPI_FALSE on error
*/
mtapi_boolean_t embb_mtapi_thread_context_initialize_with_node_worker_and_core(
mtapi_boolean_t embb_mtapi_thread_context_initialize(
embb_mtapi_thread_context_t* that,
embb_mtapi_node_t* node,
mtapi_uint_t worker_index,
mtapi_uint_t core_num);
mtapi_uint_t core_num,
embb_thread_priority_t priority);
/**
* Destructor.
......
......@@ -53,6 +53,7 @@ void mtapi_nodeattr_init(
attributes->max_actions_per_job = MTAPI_NODE_MAX_ACTIONS_PER_JOB_DEFAULT;
attributes->max_priorities = MTAPI_NODE_MAX_PRIORITIES_DEFAULT;
attributes->reuse_main_thread = MTAPI_FALSE;
attributes->worker_priorities = NULL;
embb_core_set_init(&attributes->core_affinity, 1);
attributes->num_cores = embb_core_set_count(&attributes->core_affinity);
......@@ -149,6 +150,12 @@ void mtapi_nodeattr_set(
&attributes->reuse_main_thread, attribute, attribute_size);
break;
case MTAPI_NODE_WORKER_PRIORITIES:
local_status = MTAPI_SUCCESS;
attributes->worker_priorities =
(mtapi_worker_priority_entry_t*)attribute;
break;
default:
/* attribute unknown */
local_status = MTAPI_ERR_ATTR_NUM;
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......
......@@ -41,6 +41,8 @@ endif()
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/
DESTINATION include FILES_MATCHING PATTERN "*.h")
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/embb
DESTINATION include FILES_MATCHING PATTERN "*.h")
install(TARGETS embb_mtapi_cpp DESTINATION lib)
if (MSVC)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/Debug/embb_mtapi_cpp.pdb
......
......@@ -87,6 +87,23 @@ class NodeAttributes {
}
/**
* Sets the priority of the specified worker threads.
*
* \returns Reference to this object.
* \notthreadsafe
*/
NodeAttributes & SetWorkerPriority(
mtapi_worker_priority_entry_t * worker_priorities
/**< Array of priorities */
) {
mtapi_status_t status;
mtapi_nodeattr_set(&attributes_, MTAPI_NODE_WORKER_PRIORITIES,
worker_priorities, MTAPI_NODE_WORKER_PRIORITIES_SIZE, &status);
internal::CheckStatus(status);
return *this;
}
/**
* Sets the maximum number of concurrently active tasks.
*
* \returns Reference to this object.
......
project (project_embb_mtapi_cuda_c)
file(GLOB_RECURSE EMBB_MTAPI_CUDA_C_SOURCES "src/*.c" "src/*.h")
file(GLOB_RECURSE EMBB_MTAPI_CUDA_C_HEADERS "include/*.h")
file(GLOB_RECURSE EMBB_MTAPI_CUDA_TEST_SOURCES "test/*.cc" "test/*.cu" "test/*.h")
IF(MSVC8 OR MSVC9 OR MSVC10 OR MSVC11)
FOREACH(src_tmp ${EMBB_MTAPI_CUDA_TEST_SOURCES})
SET_PROPERTY(SOURCE ${src_tmp} PROPERTY LANGUAGE CXX)
ENDFOREACH(src_tmp)
FOREACH(src_tmp ${EMBB_MTAPI_CUDA_C_SOURCES})
SET_PROPERTY(SOURCE ${src_tmp} PROPERTY LANGUAGE CXX)
ENDFOREACH(src_tmp)
ENDIF()
IF(CMAKE_COMPILER_IS_GNUCC)
set (EMBB_MTAPI_CUDA_C_LIBS dl)
ENDIF()
# Execute the GroupSources macro
include(${CMAKE_SOURCE_DIR}/CMakeCommon/GroupSourcesMSVC.cmake)
GroupSourcesMSVC(include)
GroupSourcesMSVC(src)
GroupSourcesMSVC(test)
set (EMBB_MTAPI_CUDA_INCLUDE_DIRS "include" "src" "test")
include_directories(${EMBB_MTAPI_CUDA_INCLUDE_DIRS}
${CUDA_TOOLKIT_INCLUDE}
${CMAKE_CURRENT_SOURCE_DIR}/../../base_c/include
${CMAKE_CURRENT_BINARY_DIR}/../../base_c/include
${CMAKE_CURRENT_SOURCE_DIR}/../../mtapi_c/include
${CMAKE_CURRENT_SOURCE_DIR}/../../mtapi_c/src
)
add_library(embb_mtapi_cuda_c ${EMBB_MTAPI_CUDA_C_SOURCES} ${EMBB_MTAPI_CUDA_C_HEADERS})
target_link_libraries(embb_mtapi_cuda_c embb_mtapi_c embb_base_c)
if (BUILD_TESTS STREQUAL ON)
add_custom_command(
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/test/embb_mtapi_cuda_test_kernel.cu"
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/embb_mtapi_cuda_test_kernel.ptx"
COMMAND ${CUDA_NVCC_EXECUTABLE} -ptx -m 32
"${CMAKE_CURRENT_SOURCE_DIR}/test/embb_mtapi_cuda_test_kernel.cu"
-o "${CMAKE_CURRENT_BINARY_DIR}/embb_mtapi_cuda_test_kernel.ptx"
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
)
add_custom_command(
DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/embb_mtapi_cuda_test_kernel.ptx"
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/embb_mtapi_cuda_test_kernel.h"
COMMAND ${CUDA_TOOLKIT_ROOT_DIR}/bin/bin2c -p 0
"${CMAKE_CURRENT_BINARY_DIR}/embb_mtapi_cuda_test_kernel.ptx" >
"${CMAKE_CURRENT_BINARY_DIR}/embb_mtapi_cuda_test_kernel.h"
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
)
include_directories(
${CMAKE_CURRENT_BINARY_DIR}
${CMAKE_CURRENT_BINARY_DIR}/../../partest/include)
add_executable (embb_mtapi_cuda_c_test ${EMBB_MTAPI_CUDA_TEST_SOURCES} "${CMAKE_CURRENT_BINARY_DIR}/embb_mtapi_cuda_test_kernel.h")
target_link_libraries(embb_mtapi_cuda_c_test embb_mtapi_cuda_c embb_mtapi_c partest embb_base_c ${compiler_libs} ${EMBB_MTAPI_CUDA_C_LIBS} ${CUDA_CUDA_LIBRARY})
CopyBin(BIN embb_mtapi_cuda_c_test DEST ${local_install_dir})
endif()
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/
DESTINATION include FILES_MATCHING PATTERN "*.h")
install(TARGETS embb_mtapi_cuda_c DESTINATION lib)
if (MSVC)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/Debug/embb_mtapi_cuda_c.pdb
DESTINATION lib
CONFIGURATIONS Debug)
endif()
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_MTAPI_C_MTAPI_CUDA_H_
#define EMBB_MTAPI_C_MTAPI_CUDA_H_
#include <embb/mtapi/c/mtapi_ext.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* \defgroup C_MTAPI_CUDA MTAPI CUDA Plugin
*
* \ingroup C_MTAPI_EXT
*
* Provides functionality to execute tasks on CUDA devices.
*/
/**
* Initializes the MTAPI CUDA environment on a previously initialized MTAPI
* node.
*
* It must be called on all nodes using the MTAPI CUDA plugin.
*
* Application software using MTAPI CUDA must call
* mtapi_cuda_plugin_initialize() once per node. It is an error to call
* mtapi_cuda_plugin_initialize() multiple times
* from a given node, unless mtapi_cuda_plugin_finalize() is called in
* between.
*
* On success, \c *status is set to \c MTAPI_SUCCESS. On error, \c *status is
* set to the appropriate error defined below.
* Error code | Description
* --------------------------- | ----------------------------------------------
* \c MTAPI_ERR_UNKNOWN | MTAPI CUDA couldn't be initialized.
*
* \see mtapi_cuda_plugin_finalize()
*
* \notthreadsafe
* \ingroup C_MTAPI_CUDA
*/
void mtapi_cuda_plugin_initialize(
MTAPI_OUT mtapi_status_t* status /**< [out] Pointer to error code,
may be \c MTAPI_NULL */
);
/**
* Finalizes the MTAPI CUDA environment on the local MTAPI node.
*
* It has to be called by each node using MTAPI CUDA. It is an error to call
* mtapi_cuda_plugin_finalize() without first calling
* mtapi_cuda_plugin_initialize(). An MTAPI node can call
* mtapi_cuda_plugin_finalize() once for each call to
* mtapi_cuda_plugin_initialize(), but it is an error to call
* mtapi_cuda_plugin_finalize() multiple times from a given node
* unless mtapi_cuda_plugin_initialize() has been called prior to each
* mtapi_cuda_plugin_finalize() call.
*
* All CUDA tasks that have not completed and that have been started on the
* node where mtapi_cuda_plugin_finalize() is called will be canceled
* (see mtapi_task_cancel()). mtapi_cuda_plugin_finalize() blocks until all
* tasks that have been started on the same node return. Tasks that execute
* actions on the node where mtapi_cuda_plugin_finalize() is called, also
* block finalization of the MTAPI CUDA system on that node.
*
* On success, \c *status is set to \c MTAPI_SUCCESS. On error, \c *status is
* set to the appropriate error defined below.
* Error code | Description
* ----------------------------- | --------------------------------------------
* \c MTAPI_ERR_UNKNOWN | MTAPI CUDA couldn't be finalized.
*
* \see mtapi_cuda_plugin_initialize(), mtapi_task_cancel()
*
* \notthreadsafe
* \ingroup C_MTAPI_CUDA
*/
void mtapi_cuda_plugin_finalize(
MTAPI_OUT mtapi_status_t* status /**< [out] Pointer to error code,
may be \c MTAPI_NULL */
);
/**
* This function creates a CUDA action.
*
* It is called on the node where the user wants to execute an action on an
* CUDA device. A CUDA action contains a reference to a local job, the
* kernel source to compile and execute on the CUDA device, the name of the
* kernel function, a local work size (see CUDA specification for details)
* and the size of one element in the result buffer.
* After a CUDA action is created, it is referenced by the application using
* a node-local handle of type \c mtapi_action_hndl_t, or indirectly through a
* node-local job handle of type \c mtapi_job_hndl_t. A CUDA action's
* life-cycle begins with mtapi_cuda_action_create(), and ends when
* mtapi_action_delete() or mtapi_finalize() is called.
*
* To create an action, the application must supply the domain-wide job ID of
* the job associated with the action. Job IDs must be predefined in the
* application and runtime, of type \c mtapi_job_id_t, which is an
* implementation-defined type. The job ID is unique in the sense that it is
* unique for the job implemented by the action. However several actions may
* implement the same job for load balancing purposes.
*
* If \c node_local_data_size is not zero, \c node_local_data specifies the
* start of node local data shared by kernel functions executed on the same
* node. \c node_local_data_size can be used by the runtime for cache coherency
* operations.
*
* On success, an action handle is returned and \c *status is set to
* \c MTAPI_SUCCESS. On error, \c *status is set to the appropriate error
* defined below. In the case where the action already exists, \c status will
* be set to \c MTAPI_ERR_ACTION_EXISTS and the handle returned will not be a
* valid handle.
* <table>
* <tr>
* <th>Error code</th>
* <th>Description</th>
* </tr>
* <tr>
* <td>\c MTAPI_ERR_JOB_INVALID</td>
* <td>The \c job_id is not a valid job ID, i.e., no action was created for
* that ID or the action has been deleted.</td>
* </tr>
* <tr>
* <td>\c MTAPI_ERR_ACTION_EXISTS</td>
* <td>This action is already created.</td>
* </tr>
* <tr>
* <td>\c MTAPI_ERR_ACTION_LIMIT</td>
* <td>Exceeded maximum number of actions allowed.</td>
* </tr>
* <tr>
* <td>\c MTAPI_ERR_NODE_NOTINIT</td>
* <td>The calling node is not initialized.</td>
* </tr>
* <tr>
* <td>\c MTAPI_ERR_UNKNOWN</td>
* <td>The kernel could not be compiled or no CUDA device was
* available.</td>
* </tr>
* </table>
*
* \see mtapi_action_delete(), mtapi_finalize()
*
* \returns Handle to newly created CUDA action, invalid handle on error
* \threadsafe
* \ingroup C_MTAPI_CUDA
*/
mtapi_action_hndl_t mtapi_cuda_action_create(
MTAPI_IN mtapi_job_id_t job_id, /**< [in] Job id */
MTAPI_IN char* kernel_source, /**< [in] Pointer to kernel source */
MTAPI_IN char* kernel_name, /**< [in] Name of the kernel function */
MTAPI_IN mtapi_size_t local_work_size,
/**< [in] Size of local work group */
MTAPI_IN mtapi_size_t element_size, /**< [in] Size of one element in the
result buffer */
MTAPI_IN void* node_local_data, /**< [in] Data shared across tasks */
MTAPI_IN mtapi_size_t node_local_data_size,
/**< [in] Size of shared data */
MTAPI_OUT mtapi_status_t* status /**< [out] Pointer to error code,
may be \c MTAPI_NULL */
);
#ifdef __cplusplus
}
#endif
#endif // EMBB_MTAPI_C_MTAPI_CUDA_H_
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <cuda.h>
#include <string.h>
#include <assert.h>
#include <embb/base/c/memory_allocation.h>
#include <embb/mtapi/c/mtapi_ext.h>
#include <embb/base/c/internal/unused.h>
#include <embb/mtapi/c/mtapi_cuda.h>
#include <embb_mtapi_task_t.h>
#include <embb_mtapi_action_t.h>
#include <embb_mtapi_node_t.h>
#include <mtapi_status_t.h>
struct embb_mtapi_cuda_plugin_struct {
CUdevice device;
CUcontext context;
CUstream stream;
int work_group_size;
};
typedef struct embb_mtapi_cuda_plugin_struct embb_mtapi_cuda_plugin_t;
static embb_mtapi_cuda_plugin_t embb_mtapi_cuda_plugin;
struct embb_mtapi_cuda_action_struct {
CUmodule module;
CUfunction function;
CUdeviceptr node_local_data;
int node_local_data_size;
size_t local_work_size;
size_t element_size;
};
typedef struct embb_mtapi_cuda_action_struct embb_mtapi_cuda_action_t;
struct embb_mtapi_cuda_task_struct {
CUdeviceptr arguments;
int arguments_size;
CUdeviceptr result_buffer;
int result_buffer_size;
mtapi_task_hndl_t task;
};
typedef struct embb_mtapi_cuda_task_struct embb_mtapi_cuda_task_t;
static size_t round_up(size_t group_size, size_t global_size) {
size_t r = global_size % group_size;
if (r == 0) {
return global_size;
} else {
return global_size + group_size - r;
}
}
static void CUDA_CB cuda_task_complete(
CUstream stream, CUresult status, void *data) {
EMBB_UNUSED(stream);
EMBB_UNUSED(status);
CUresult err;
EMBB_UNUSED_IN_RELEASE(err);
embb_mtapi_cuda_task_t * cuda_task = (embb_mtapi_cuda_task_t*)data;
if (embb_mtapi_node_is_initialized()) {
embb_mtapi_node_t * node = embb_mtapi_node_get_instance();
if (embb_mtapi_task_pool_is_handle_valid(
node->task_pool, cuda_task->task)) {
embb_mtapi_task_t * local_task =
embb_mtapi_task_pool_get_storage_for_handle(
node->task_pool, cuda_task->task);
if (0 != cuda_task->result_buffer) {
err = cuMemFree_v2(cuda_task->result_buffer);
assert(CUDA_SUCCESS == err);
}
if (0 != cuda_task->arguments) {
err = cuMemFree_v2(cuda_task->arguments);
assert(CUDA_SUCCESS == err);
}
embb_free(cuda_task);
embb_mtapi_task_set_state(local_task, MTAPI_TASK_COMPLETED);
}
}
}
static void cuda_task_start(
MTAPI_IN mtapi_task_hndl_t task,
MTAPI_OUT mtapi_status_t* status) {
mtapi_status_t local_status = MTAPI_ERR_UNKNOWN;
CUresult err = CUDA_SUCCESS;
embb_mtapi_cuda_plugin_t * plugin = &embb_mtapi_cuda_plugin;
if (embb_mtapi_node_is_initialized()) {
embb_mtapi_node_t * node = embb_mtapi_node_get_instance();
if (embb_mtapi_task_pool_is_handle_valid(node->task_pool, task)) {
embb_mtapi_task_t * local_task =
embb_mtapi_task_pool_get_storage_for_handle(node->task_pool, task);
if (embb_mtapi_action_pool_is_handle_valid(
node->action_pool, local_task->action)) {
embb_mtapi_action_t * local_action =
embb_mtapi_action_pool_get_storage_for_handle(
node->action_pool, local_task->action);
embb_mtapi_cuda_action_t * cuda_action =
(embb_mtapi_cuda_action_t*)local_action->plugin_data;
embb_mtapi_cuda_task_t * cuda_task =
(embb_mtapi_cuda_task_t*)embb_alloc(
sizeof(embb_mtapi_cuda_task_t));
size_t elements = local_task->result_size /
cuda_action->element_size;
size_t global_work_size;
if (0 == elements)
elements = 1;
global_work_size =
round_up(cuda_action->local_work_size, elements);
cuda_task->task = task;
cuda_task->arguments_size = (int)local_task->arguments_size;
if (0 < local_task->arguments_size) {
err = cuMemAlloc_v2(&cuda_task->arguments,
local_task->arguments_size);
} else {
cuda_task->arguments = 0;
}
cuda_task->result_buffer_size = (int)local_task->result_size;
if (0 < local_task->result_size) {
err = cuMemAlloc_v2(&cuda_task->result_buffer,
local_task->result_size);
} else {
cuda_task->result_buffer = 0;
}
if (0 != cuda_task->arguments) {
err = cuMemcpyHtoDAsync_v2(cuda_task->arguments,
local_task->arguments, (size_t)cuda_task->arguments_size,
plugin->stream);
}
if (CUDA_SUCCESS == err) {
embb_mtapi_task_set_state(local_task, MTAPI_TASK_RUNNING);
void * args[6];
args[0] = &cuda_task->arguments;
args[1] = &cuda_task->arguments_size;
args[2] = &cuda_task->result_buffer;
args[3] = &cuda_task->result_buffer_size;
args[4] = &cuda_action->node_local_data;
args[5] = &cuda_action->node_local_data_size;
err = cuLaunchKernel(cuda_action->function,
global_work_size, 1, 1,
cuda_action->local_work_size, 1, 1,
1024, plugin->stream, args, NULL);
if (CUDA_SUCCESS == err) {
if (0 != cuda_task->result_buffer) {
err = cuMemcpyDtoHAsync_v2(local_task->result_buffer,
cuda_task->result_buffer, cuda_task->result_buffer_size,
plugin->stream);
}
err = cuStreamAddCallback(plugin->stream, cuda_task_complete,
cuda_task, 0);
}
}
if (CUDA_SUCCESS != err) {
embb_mtapi_task_set_state(local_task, MTAPI_TASK_ERROR);
local_status = MTAPI_ERR_ACTION_FAILED;
} else {
local_status = MTAPI_SUCCESS;
}
}
}
}
mtapi_status_set(status, local_status);
}
static void cuda_task_cancel(
MTAPI_IN mtapi_task_hndl_t task,
MTAPI_OUT mtapi_status_t* status
) {
mtapi_status_t local_status = MTAPI_ERR_UNKNOWN;
EMBB_UNUSED(task);
mtapi_status_set(status, local_status);
}
static void cuda_action_finalize(
MTAPI_IN mtapi_action_hndl_t action,
MTAPI_OUT mtapi_status_t* status
) {
mtapi_status_t local_status = MTAPI_ERR_UNKNOWN;
CUresult err;
EMBB_UNUSED_IN_RELEASE(err);
if (embb_mtapi_node_is_initialized()) {
embb_mtapi_node_t * node = embb_mtapi_node_get_instance();
if (embb_mtapi_action_pool_is_handle_valid(node->action_pool, action)) {
embb_mtapi_action_t * local_action =
embb_mtapi_action_pool_get_storage_for_handle(
node->action_pool, action);
embb_mtapi_cuda_action_t * cuda_action =
(embb_mtapi_cuda_action_t *)local_action->plugin_data;
if (0 != cuda_action->node_local_data) {
err = cuMemFree_v2(cuda_action->node_local_data);
assert(CUDA_SUCCESS == err);
}
err = cuModuleUnload(cuda_action->module);
assert(CUDA_SUCCESS == err);
embb_free(cuda_action);
local_status = MTAPI_SUCCESS;
}
}
mtapi_status_set(status, local_status);
}
char buffer[1024];
void mtapi_cuda_plugin_initialize(
MTAPI_OUT mtapi_status_t* status) {
mtapi_status_t local_status = MTAPI_ERR_UNKNOWN;
CUresult err;
embb_mtapi_cuda_plugin_t * plugin = &embb_mtapi_cuda_plugin;
mtapi_status_set(status, MTAPI_ERR_UNKNOWN);
err = cuInit(0);
if (CUDA_SUCCESS != err) return;
err = cuDeviceGet(&plugin->device, 0);
if (CUDA_SUCCESS != err) return;
err = cuCtxCreate_v2(&plugin->context, 0, plugin->device);
if (CUDA_SUCCESS != err) return;
cuDeviceGetAttribute(&plugin->work_group_size,
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, plugin->device);
err = cuStreamCreate(&plugin->stream, CU_STREAM_NON_BLOCKING);
if (CUDA_SUCCESS != err) return;
local_status = MTAPI_SUCCESS;
mtapi_status_set(status, local_status);
}
void mtapi_cuda_plugin_finalize(
MTAPI_OUT mtapi_status_t* status) {
mtapi_status_t local_status = MTAPI_ERR_UNKNOWN;
CUresult err;
EMBB_UNUSED_IN_RELEASE(err);
embb_mtapi_cuda_plugin_t * plugin = &embb_mtapi_cuda_plugin;
/* finalization */
err = cuStreamDestroy_v2(plugin->stream);
assert(CUDA_SUCCESS == err);
err = cuCtxDestroy_v2(plugin->context);
assert(CUDA_SUCCESS == err);
local_status = MTAPI_SUCCESS;
mtapi_status_set(status, local_status);
}
mtapi_action_hndl_t mtapi_cuda_action_create(
MTAPI_IN mtapi_job_id_t job_id,
MTAPI_IN char* kernel_source,
MTAPI_IN char* kernel_name,
MTAPI_IN mtapi_size_t local_work_size,
MTAPI_IN mtapi_size_t element_size,
MTAPI_IN void* node_local_data,
MTAPI_IN mtapi_size_t node_local_data_size,
MTAPI_OUT mtapi_status_t* status) {
mtapi_status_t local_status = MTAPI_ERR_UNKNOWN;
mtapi_status_set(status, MTAPI_ERR_UNKNOWN);
CUresult err;
embb_mtapi_cuda_action_t * action =
(embb_mtapi_cuda_action_t*)embb_alloc(
sizeof(embb_mtapi_cuda_action_t));
mtapi_action_hndl_t action_hndl = { 0, 0 }; // invalid handle
mtapi_boolean_t free_module_on_error = MTAPI_FALSE;
mtapi_boolean_t free_node_local_data_on_error = MTAPI_FALSE;
action->local_work_size = local_work_size;
action->element_size = element_size;
/* initialization */
err = cuModuleLoadData(&action->module, kernel_source);
if (CUDA_SUCCESS == err) {
free_module_on_error = MTAPI_TRUE;
err = cuModuleGetFunction(&action->function, action->module, kernel_name);
}
if (CUDA_SUCCESS == err) {
if (0 < node_local_data_size) {
err = cuMemAlloc_v2(&action->node_local_data, node_local_data_size);
if (CUDA_SUCCESS == err) {
free_node_local_data_on_error = MTAPI_TRUE;
}
action->node_local_data_size = (int)node_local_data_size;
if (CUDA_SUCCESS == err) {
err = cuMemcpyHtoD_v2(
action->node_local_data, node_local_data, node_local_data_size);
}
} else {
action->node_local_data = 0;
action->node_local_data_size = 0;
}
}
if (CUDA_SUCCESS == err) {
action_hndl = mtapi_ext_plugin_action_create(
job_id,
cuda_task_start,
cuda_task_cancel,
cuda_action_finalize,
action,
node_local_data,
node_local_data_size,
MTAPI_NULL,
&local_status);
} else {
if (free_node_local_data_on_error) {
cuMemFree_v2(action->node_local_data);
}
if (free_module_on_error) {
cuModuleUnload(action->module);
}
embb_free(action);
}
mtapi_status_set(status, local_status);
return action_hndl;
}
extern "C" __global__ void test(
void* arguments,
int arguments_size,
void* result_buffer,
int result_buffer_size,
void* node_local_data,
int node_local_data_size) {
int ii = blockDim.x * blockIdx.x + threadIdx.x;
int elements = arguments_size / sizeof(float) / 2;
if (ii >= elements)
return;
float* a = (float*)arguments;
float* b = ((float*)arguments) + elements;
float* c = (float*)result_buffer;
float* d = (float*)node_local_data;
c[ii] = a[ii] + b[ii] + d[0];
}
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <embb_mtapi_cuda_test_task.h>
#include <embb/mtapi/c/mtapi_cuda.h>
#define MTAPI_CHECK_STATUS(status) \
PT_ASSERT(MTAPI_SUCCESS == status)
#define CUDA_DOMAIN 1
#define CUDA_NODE 2
#define CUDA_JOB 2
// CUDA Kernel Function for element by element vector addition
#include <embb_mtapi_cuda_test_kernel.h>
TaskTest::TaskTest() {
CreateUnit("mtapi cuda task test").Add(&TaskTest::TestBasic, this);
}
void TaskTest::TestBasic() {
mtapi_status_t status;
mtapi_job_hndl_t job;
mtapi_task_hndl_t task;
mtapi_action_hndl_t action;
const int kElements = 64;
float arguments[kElements * 2];
float results[kElements];
for (int ii = 0; ii < kElements; ii++) {
arguments[ii] = static_cast<float>(ii);
arguments[ii + kElements] = static_cast<float>(ii);
}
mtapi_cuda_plugin_initialize(&status);
if (status == MTAPI_ERR_FUNC_NOT_IMPLEMENTED) {
// CUDA unavailable
return;
}
MTAPI_CHECK_STATUS(status);
mtapi_initialize(
CUDA_DOMAIN,
CUDA_NODE,
MTAPI_NULL,
MTAPI_NULL,
&status);
MTAPI_CHECK_STATUS(status);
float node_local = 1.0f;
action = mtapi_cuda_action_create(
CUDA_JOB,
reinterpret_cast<char const *>(imageBytes), "test", 32, 4,
&node_local, sizeof(float),
&status);
MTAPI_CHECK_STATUS(status);
status = MTAPI_ERR_UNKNOWN;
job = mtapi_job_get(CUDA_JOB, CUDA_DOMAIN, &status);
MTAPI_CHECK_STATUS(status);
task = mtapi_task_start(
MTAPI_TASK_ID_NONE,
job,
arguments, kElements * 2 * sizeof(float),
results, kElements*sizeof(float),
MTAPI_DEFAULT_TASK_ATTRIBUTES,
MTAPI_GROUP_NONE,
&status);
MTAPI_CHECK_STATUS(status);
mtapi_task_wait(task, MTAPI_INFINITE, &status);
MTAPI_CHECK_STATUS(status);
for (int ii = 0; ii < kElements; ii++) {
PT_EXPECT_EQ(results[ii], ii * 2 + 1);
}
mtapi_action_delete(action, MTAPI_INFINITE, &status);
MTAPI_CHECK_STATUS(status);
mtapi_finalize(&status);
MTAPI_CHECK_STATUS(status);
mtapi_cuda_plugin_finalize(&status);
MTAPI_CHECK_STATUS(status);
}
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MTAPI_PLUGINS_C_MTAPI_CUDA_C_TEST_EMBB_MTAPI_CUDA_TEST_TASK_H_
#define MTAPI_PLUGINS_C_MTAPI_CUDA_C_TEST_EMBB_MTAPI_CUDA_TEST_TASK_H_
#include <partest/partest.h>
class TaskTest : public partest::TestCase {
public:
TaskTest();
private:
void TestBasic();
};
#endif // MTAPI_PLUGINS_C_MTAPI_CUDA_C_TEST_EMBB_MTAPI_CUDA_TEST_TASK_H_
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <partest/partest.h>
#include <embb_mtapi_cuda_test_task.h>
PT_MAIN("MTAPI CUDA") {
PT_RUN(TaskTest);
}
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......@@ -51,7 +51,7 @@ extern "C" {
*
* It must be called on all nodes using the MTAPI OpenCL plugin.
*
* Application software using MTAPI network must call
* Application software using MTAPI OpenCL must call
* mtapi_opencl_plugin_initialize() once per node. It is an error to call
* mtapi_opencl_plugin_initialize() multiple times
* from a given node, unless mtapi_opencl_plugin_finalize() is called in
......@@ -85,7 +85,7 @@ void mtapi_opencl_plugin_initialize(
* unless mtapi_opencl_plugin_initialize() has been called prior to each
* mtapi_opencl_plugin_finalize() call.
*
* All network tasks that have not completed and that have been started on the
* All OpenCL tasks that have not completed and that have been started on the
* node where mtapi_opencl_plugin_finalize() is called will be canceled
* (see mtapi_task_cancel()). mtapi_opencl_plugin_finalize() blocks until all
* tasks that have been started on the same node return. Tasks that execute
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......@@ -26,6 +26,7 @@
#include <CL/opencl.h>
#include <embb/base/c/internal/config.h>
#include <embb_mtapi_opencl_runtimelinker.h>
//////////////////////////////////////////////////////////////////////////
// function pointer wrappers to hide runtime linking
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
* Copyright (c) 2014-2016, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......
......@@ -79,7 +79,7 @@ retval=0
##Excluded files
RAND_FILES=( embb_mtapi_test_group.cc embb_mtapi_test_queue.cc embb_mtapi_test_task.cc queue_test-inl.h )
for project in base_c mtapi_c mtapi_plugins_c/mtapi_network_c mtapi_plugins_c/mtapi_opencl_c base_cpp mtapi_cpp algorithms_cpp containers_cpp dataflow_cpp
for project in base_c mtapi_c mtapi_plugins_c/mtapi_network_c mtapi_plugins_c/mtapi_opencl_c mtapi_plugins_c/mtapi_cuda_c base_cpp mtapi_cpp algorithms_cpp containers_cpp dataflow_cpp
do
echo "-> Doing project: $project"
dir=$d/$project
......@@ -120,7 +120,7 @@ do
current_rules+=",-runtime/threadsafe_fn" # These tests are allowed to use the thread unsafe rand()
fi
done
python ${c} --filter=$current_rules --root="$project/include" --output=vs7 $file
python ${c} --extensions=c,cc,h --filter=$current_rules --root="$project/include" --output=vs7 $file
if [[ ! $? -eq 0 ]]; then
retval=$((retval+1))
fi
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment