Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
FORMUS3IC_LAS3
/
embb
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
d6dcb944
authored
May 03, 2016
by
Marcus Winter
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
mtapi_cuda_c: first implementation
parent
7b0d2c67
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
845 additions
and
0 deletions
+845
-0
CMakeLists.txt
+8
-0
mtapi_plugins_c/mtapi_cuda_c/CMakeLists.txt
+66
-0
mtapi_plugins_c/mtapi_cuda_c/include/embb/mtapi/c/mtapi_cuda.h
+198
-0
mtapi_plugins_c/mtapi_cuda_c/src/embb_mtapi_cuda.c
+370
-0
mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_kernel.cu
+17
-0
mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_task.cc
+112
-0
mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_task.h
+40
-0
mtapi_plugins_c/mtapi_cuda_c/test/main.cc
+34
-0
No files found.
CMakeLists.txt
View file @
d6dcb944
...
...
@@ -30,6 +30,8 @@ set (EMBB_BASE_VERSION_MAJOR 0)
set
(
EMBB_BASE_VERSION_MINOR 3
)
set
(
EMBB_BASE_VERSION_PATCH 2
)
include
(
FindCUDA
)
# Fix compilation for CMake versions >= 3.1
#
# New Policy 0054:
...
...
@@ -144,6 +146,9 @@ set(EXPECTED_EMBB_TEST_EXECUTABLES "embb_algorithms_cpp_test"
if
(
BUILD_OPENCL_PLUGIN STREQUAL ON
)
list
(
APPEND EXPECTED_EMBB_TEST_EXECUTABLES
"embb_mtapi_opencl_c_test"
)
endif
()
if
(
CUDA_FOUND
)
list
(
APPEND EXPECTED_EMBB_TEST_EXECUTABLES
"embb_mtapi_cuda_c_test"
)
endif
()
## Copy test execution script to local binaries folder
...
...
@@ -180,6 +185,9 @@ add_subdirectory(mtapi_plugins_c/mtapi_network_c)
if
(
BUILD_OPENCL_PLUGIN STREQUAL ON
)
add_subdirectory
(
mtapi_plugins_c/mtapi_opencl_c
)
endif
()
if
(
CUDA_FOUND
)
add_subdirectory
(
mtapi_plugins_c/mtapi_cuda_c
)
endif
()
add_subdirectory
(
tasks_cpp
)
add_subdirectory
(
mtapi_cpp
)
add_subdirectory
(
containers_cpp
)
...
...
mtapi_plugins_c/mtapi_cuda_c/CMakeLists.txt
0 → 100644
View file @
d6dcb944
project
(
project_embb_mtapi_cuda_c
)
file
(
GLOB_RECURSE EMBB_MTAPI_CUDA_C_SOURCES
"src/*.c"
"src/*.h"
)
file
(
GLOB_RECURSE EMBB_MTAPI_CUDA_C_HEADERS
"include/*.h"
)
file
(
GLOB_RECURSE EMBB_MTAPI_CUDA_TEST_SOURCES
"test/*.cc"
"test/*.cu"
"test/*.h"
)
IF
(
MSVC8 OR MSVC9 OR MSVC10 OR MSVC11
)
FOREACH
(
src_tmp
${
EMBB_MTAPI_CUDA_TEST_SOURCES
}
)
SET_PROPERTY
(
SOURCE
${
src_tmp
}
PROPERTY LANGUAGE CXX
)
ENDFOREACH
(
src_tmp
)
FOREACH
(
src_tmp
${
EMBB_MTAPI_CUDA_C_SOURCES
}
)
SET_PROPERTY
(
SOURCE
${
src_tmp
}
PROPERTY LANGUAGE CXX
)
ENDFOREACH
(
src_tmp
)
ENDIF
()
IF
(
CMAKE_COMPILER_IS_GNUCC
)
set
(
EMBB_MTAPI_CUDA_C_LIBS dl
)
ENDIF
()
# Execute the GroupSources macro
include
(
${
CMAKE_SOURCE_DIR
}
/CMakeCommon/GroupSourcesMSVC.cmake
)
GroupSourcesMSVC
(
include
)
GroupSourcesMSVC
(
src
)
GroupSourcesMSVC
(
test
)
set
(
EMBB_MTAPI_CUDA_INCLUDE_DIRS
"include"
"src"
"test"
)
include_directories
(
${
EMBB_MTAPI_CUDA_INCLUDE_DIRS
}
${
CUDA_TOOLKIT_INCLUDE
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/../../base_c/include
${
CMAKE_CURRENT_BINARY_DIR
}
/../../base_c/include
${
CMAKE_CURRENT_SOURCE_DIR
}
/../../mtapi_c/include
${
CMAKE_CURRENT_SOURCE_DIR
}
/../../mtapi_c/src
)
add_library
(
embb_mtapi_cuda_c
${
EMBB_MTAPI_CUDA_C_SOURCES
}
${
EMBB_MTAPI_CUDA_C_HEADERS
}
)
target_link_libraries
(
embb_mtapi_cuda_c embb_mtapi_c embb_base_c
)
if
(
BUILD_TESTS STREQUAL ON
)
add_custom_command
(
DEPENDS
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/test/embb_mtapi_cuda_test_kernel.cu"
OUTPUT
"
${
CMAKE_CURRENT_BINARY_DIR
}
/embb_mtapi_cuda_test_kernel.ptx"
COMMAND
${
CUDA_NVCC_EXECUTABLE
}
-ptx -m 32
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/test/embb_mtapi_cuda_test_kernel.cu"
-o
"
${
CMAKE_CURRENT_BINARY_DIR
}
/embb_mtapi_cuda_test_kernel.ptx"
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
add_custom_command
(
DEPENDS
"
${
CMAKE_CURRENT_BINARY_DIR
}
/embb_mtapi_cuda_test_kernel.ptx"
OUTPUT
"
${
CMAKE_CURRENT_BINARY_DIR
}
/embb_mtapi_cuda_test_kernel.h"
COMMAND
${
CUDA_TOOLKIT_ROOT_DIR
}
/bin/bin2c -p 0
"
${
CMAKE_CURRENT_BINARY_DIR
}
/embb_mtapi_cuda_test_kernel.ptx"
>
"
${
CMAKE_CURRENT_BINARY_DIR
}
/embb_mtapi_cuda_test_kernel.h"
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
include_directories
(
${
CMAKE_CURRENT_BINARY_DIR
}
${
CMAKE_CURRENT_BINARY_DIR
}
/../../partest/include
)
add_executable
(
embb_mtapi_cuda_c_test
${
EMBB_MTAPI_CUDA_TEST_SOURCES
}
"
${
CMAKE_CURRENT_BINARY_DIR
}
/embb_mtapi_cuda_test_kernel.h"
)
target_link_libraries
(
embb_mtapi_cuda_c_test embb_mtapi_cuda_c embb_mtapi_c partest embb_base_c
${
compiler_libs
}
${
EMBB_MTAPI_CUDA_C_LIBS
}
${
CUDA_CUDA_LIBRARY
}
)
CopyBin
(
BIN embb_mtapi_cuda_c_test DEST
${
local_install_dir
}
)
endif
()
install
(
DIRECTORY
${
CMAKE_CURRENT_SOURCE_DIR
}
/include/
DESTINATION include FILES_MATCHING PATTERN
"*.h"
)
install
(
TARGETS embb_mtapi_cuda_c DESTINATION lib
)
mtapi_plugins_c/mtapi_cuda_c/include/embb/mtapi/c/mtapi_cuda.h
0 → 100644
View file @
d6dcb944
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_MTAPI_C_MTAPI_CUDA_H_
#define EMBB_MTAPI_C_MTAPI_CUDA_H_
#include <embb/mtapi/c/mtapi_ext.h>
#ifdef __cplusplus
extern
"C"
{
#endif
/**
* \defgroup C_MTAPI_CUDA MTAPI CUDA Plugin
*
* \ingroup C_MTAPI_EXT
*
* Provides functionality to execute tasks on CUDA devices.
*/
/**
* Initializes the MTAPI CUDA environment on a previously initialized MTAPI
* node.
*
* It must be called on all nodes using the MTAPI CUDA plugin.
*
* Application software using MTAPI network must call
* mtapi_cuda_plugin_initialize() once per node. It is an error to call
* mtapi_cuda_plugin_initialize() multiple times
* from a given node, unless mtapi_cuda_plugin_finalize() is called in
* between.
*
* On success, \c *status is set to \c MTAPI_SUCCESS. On error, \c *status is
* set to the appropriate error defined below.
* Error code | Description
* --------------------------- | ----------------------------------------------
* \c MTAPI_ERR_UNKNOWN | MTAPI CUDA couldn't be initialized.
*
* \see mtapi_cuda_plugin_finalize()
*
* \notthreadsafe
* \ingroup C_MTAPI_CUDA
*/
void
mtapi_cuda_plugin_initialize
(
MTAPI_OUT
mtapi_status_t
*
status
/**< [out] Pointer to error code,
may be \c MTAPI_NULL */
);
/**
* Finalizes the MTAPI CUDA environment on the local MTAPI node.
*
* It has to be called by each node using MTAPI CUDA. It is an error to call
* mtapi_cuda_plugin_finalize() without first calling
* mtapi_cuda_plugin_initialize(). An MTAPI node can call
* mtapi_cuda_plugin_finalize() once for each call to
* mtapi_cuda_plugin_initialize(), but it is an error to call
* mtapi_cuda_plugin_finalize() multiple times from a given node
* unless mtapi_cuda_plugin_initialize() has been called prior to each
* mtapi_cuda_plugin_finalize() call.
*
* All network tasks that have not completed and that have been started on the
* node where mtapi_cuda_plugin_finalize() is called will be canceled
* (see mtapi_task_cancel()). mtapi_opencl_plugin_finalize() blocks until all
* tasks that have been started on the same node return. Tasks that execute
* actions on the node where mtapi_opencl_plugin_finalize() is called, also
* block finalization of the MTAPI CUDA system on that node.
*
* On success, \c *status is set to \c MTAPI_SUCCESS. On error, \c *status is
* set to the appropriate error defined below.
* Error code | Description
* ----------------------------- | --------------------------------------------
* \c MTAPI_ERR_UNKNOWN | MTAPI CUDA couldn't be finalized.
*
* \see mtapi_opencl_plugin_initialize(), mtapi_task_cancel()
*
* \notthreadsafe
* \ingroup C_MTAPI_CUDA
*/
void
mtapi_cuda_plugin_finalize
(
MTAPI_OUT
mtapi_status_t
*
status
/**< [out] Pointer to error code,
may be \c MTAPI_NULL */
);
/**
* This function creates an CUDA action.
*
* It is called on the node where the user wants to execute an action on an
* CUDA device. A CUDA action contains a reference to a local job, the
* kernel source to compile and execute on the CUDA device, the name of the
* kernel function, a local work size (see CUDA specification for details)
* and the size of one element in the result buffer.
* After an CUDA action is created, it is referenced by the application using
* a node-local handle of type \c mtapi_action_hndl_t, or indirectly through a
* node-local job handle of type \c mtapi_job_hndl_t. An CUDA action's
* life-cycle begins with mtapi_cuda_action_create(), and ends when
* mtapi_action_delete() or mtapi_finalize() is called.
*
* To create an action, the application must supply the domain-wide job ID of
* the job associated with the action. Job IDs must be predefined in the
* application and runtime, of type \c mtapi_job_id_t, which is an
* implementation-defined type. The job ID is unique in the sense that it is
* unique for the job implemented by the action. However several actions may
* implement the same job for load balancing purposes.
*
* If \c node_local_data_size is not zero, \c node_local_data specifies the
* start of node local data shared by kernel functions executed on the same
* node. \c node_local_data_size can be used by the runtime for cache coherency
* operations.
*
* On success, an action handle is returned and \c *status is set to
* \c MTAPI_SUCCESS. On error, \c *status is set to the appropriate error
* defined below. In the case where the action already exists, \c status will
* be set to \c MTAPI_ERR_ACTION_EXISTS and the handle returned will not be a
* valid handle.
* <table>
* <tr>
* <th>Error code</th>
* <th>Description</th>
* </tr>
* <tr>
* <td>\c MTAPI_ERR_JOB_INVALID</td>
* <td>The \c job_id is not a valid job ID, i.e., no action was created for
* that ID or the action has been deleted.</td>
* </tr>
* <tr>
* <td>\c MTAPI_ERR_ACTION_EXISTS</td>
* <td>This action is already created.</td>
* </tr>
* <tr>
* <td>\c MTAPI_ERR_ACTION_LIMIT</td>
* <td>Exceeded maximum number of actions allowed.</td>
* </tr>
* <tr>
* <td>\c MTAPI_ERR_NODE_NOTINIT</td>
* <td>The calling node is not initialized.</td>
* </tr>
* <tr>
* <td>\c MTAPI_ERR_UNKNOWN</td>
* <td>The kernel could not be compiled or no CUDA device was
* available.</td>
* </tr>
* </table>
*
* \see mtapi_action_delete(), mtapi_finalize()
*
* \returns Handle to newly created CUDA action, invalid handle on error
* \threadsafe
* \ingroup C_MTAPI_CUDA
*/
mtapi_action_hndl_t
mtapi_cuda_action_create
(
MTAPI_IN
mtapi_job_id_t
job_id
,
/**< [in] Job id */
MTAPI_IN
char
*
kernel_source
,
/**< [in] Pointer to kernel source */
MTAPI_IN
char
*
kernel_name
,
/**< [in] Name of the kernel function */
MTAPI_IN
mtapi_size_t
local_work_size
,
/**< [in] Size of local work group */
MTAPI_IN
mtapi_size_t
element_size
,
/**< [in] Size of one element in the
result buffer */
MTAPI_IN
void
*
node_local_data
,
/**< [in] Data shared across tasks */
MTAPI_IN
mtapi_size_t
node_local_data_size
,
/**< [in] Size of shared data */
MTAPI_OUT
mtapi_status_t
*
status
/**< [out] Pointer to error code,
may be \c MTAPI_NULL */
);
#ifdef __cplusplus
}
#endif
#endif // EMBB_MTAPI_C_MTAPI_CUDA_H_
mtapi_plugins_c/mtapi_cuda_c/src/embb_mtapi_cuda.c
0 → 100644
View file @
d6dcb944
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <cuda.h>
#include <string.h>
#include <assert.h>
#include <embb/base/c/memory_allocation.h>
#include <embb/mtapi/c/mtapi_ext.h>
#include <embb/base/c/internal/unused.h>
#include <embb/mtapi/c/mtapi_cuda.h>
#include <embb_mtapi_task_t.h>
#include <embb_mtapi_action_t.h>
#include <embb_mtapi_node_t.h>
#include <mtapi_status_t.h>
struct
embb_mtapi_cuda_plugin_struct
{
CUdevice
device
;
CUcontext
context
;
CUstream
stream
;
int
work_group_size
;
};
typedef
struct
embb_mtapi_cuda_plugin_struct
embb_mtapi_cuda_plugin_t
;
static
embb_mtapi_cuda_plugin_t
embb_mtapi_cuda_plugin
;
struct
embb_mtapi_cuda_action_struct
{
CUmodule
module
;
CUfunction
function
;
CUdeviceptr
node_local_data
;
int
node_local_data_size
;
size_t
local_work_size
;
size_t
element_size
;
};
typedef
struct
embb_mtapi_cuda_action_struct
embb_mtapi_cuda_action_t
;
struct
embb_mtapi_cuda_task_struct
{
CUdeviceptr
arguments
;
int
arguments_size
;
CUdeviceptr
result_buffer
;
int
result_buffer_size
;
mtapi_task_hndl_t
task
;
};
typedef
struct
embb_mtapi_cuda_task_struct
embb_mtapi_cuda_task_t
;
static
size_t
round_up
(
size_t
group_size
,
size_t
global_size
)
{
size_t
r
=
global_size
%
group_size
;
if
(
r
==
0
)
{
return
global_size
;
}
else
{
return
global_size
+
group_size
-
r
;
}
}
static
void
CUDA_CB
cuda_task_complete
(
CUstream
stream
,
CUresult
status
,
void
*
data
)
{
EMBB_UNUSED
(
stream
);
EMBB_UNUSED
(
status
);
CUresult
err
;
EMBB_UNUSED_IN_RELEASE
(
err
);
embb_mtapi_cuda_task_t
*
cuda_task
=
(
embb_mtapi_cuda_task_t
*
)
data
;
if
(
embb_mtapi_node_is_initialized
())
{
embb_mtapi_node_t
*
node
=
embb_mtapi_node_get_instance
();
if
(
embb_mtapi_task_pool_is_handle_valid
(
node
->
task_pool
,
cuda_task
->
task
))
{
embb_mtapi_task_t
*
local_task
=
embb_mtapi_task_pool_get_storage_for_handle
(
node
->
task_pool
,
cuda_task
->
task
);
if
(
0
!=
cuda_task
->
result_buffer
)
{
err
=
cuMemFree_v2
(
cuda_task
->
result_buffer
);
assert
(
CUDA_SUCCESS
==
err
);
}
if
(
0
!=
cuda_task
->
arguments
)
{
err
=
cuMemFree_v2
(
cuda_task
->
arguments
);
assert
(
CUDA_SUCCESS
==
err
);
}
embb_free
(
cuda_task
);
embb_mtapi_task_set_state
(
local_task
,
MTAPI_TASK_COMPLETED
);
}
}
}
static
void
cuda_task_start
(
MTAPI_IN
mtapi_task_hndl_t
task
,
MTAPI_OUT
mtapi_status_t
*
status
)
{
mtapi_status_t
local_status
=
MTAPI_ERR_UNKNOWN
;
CUresult
err
=
CUDA_SUCCESS
;
embb_mtapi_cuda_plugin_t
*
plugin
=
&
embb_mtapi_cuda_plugin
;
if
(
embb_mtapi_node_is_initialized
())
{
embb_mtapi_node_t
*
node
=
embb_mtapi_node_get_instance
();
if
(
embb_mtapi_task_pool_is_handle_valid
(
node
->
task_pool
,
task
))
{
embb_mtapi_task_t
*
local_task
=
embb_mtapi_task_pool_get_storage_for_handle
(
node
->
task_pool
,
task
);
if
(
embb_mtapi_action_pool_is_handle_valid
(
node
->
action_pool
,
local_task
->
action
))
{
embb_mtapi_action_t
*
local_action
=
embb_mtapi_action_pool_get_storage_for_handle
(
node
->
action_pool
,
local_task
->
action
);
embb_mtapi_cuda_action_t
*
cuda_action
=
(
embb_mtapi_cuda_action_t
*
)
local_action
->
plugin_data
;
embb_mtapi_cuda_task_t
*
cuda_task
=
(
embb_mtapi_cuda_task_t
*
)
embb_alloc
(
sizeof
(
embb_mtapi_cuda_task_t
));
size_t
elements
=
local_task
->
result_size
/
cuda_action
->
element_size
;
size_t
global_work_size
;
if
(
0
==
elements
)
elements
=
1
;
global_work_size
=
round_up
(
cuda_action
->
local_work_size
,
elements
);
cuda_task
->
task
=
task
;
cuda_task
->
arguments_size
=
(
int
)
local_task
->
arguments_size
;
if
(
0
<
local_task
->
arguments_size
)
{
err
=
cuMemAlloc_v2
(
&
cuda_task
->
arguments
,
local_task
->
arguments_size
);
}
else
{
cuda_task
->
arguments
=
0
;
}
cuda_task
->
result_buffer_size
=
(
int
)
local_task
->
result_size
;
if
(
0
<
local_task
->
result_size
)
{
err
=
cuMemAlloc_v2
(
&
cuda_task
->
result_buffer
,
local_task
->
result_size
);
}
else
{
cuda_task
->
result_buffer
=
0
;
}
if
(
0
!=
cuda_task
->
arguments
)
{
err
=
cuMemcpyHtoDAsync_v2
(
cuda_task
->
arguments
,
local_task
->
arguments
,
(
size_t
)
cuda_task
->
arguments_size
,
plugin
->
stream
);
}
if
(
CUDA_SUCCESS
==
err
)
{
embb_mtapi_task_set_state
(
local_task
,
MTAPI_TASK_RUNNING
);
void
*
args
[
6
];
args
[
0
]
=
&
cuda_task
->
arguments
;
args
[
1
]
=
&
cuda_task
->
arguments_size
;
args
[
2
]
=
&
cuda_task
->
result_buffer
;
args
[
3
]
=
&
cuda_task
->
result_buffer_size
;
args
[
4
]
=
&
cuda_action
->
node_local_data
;
args
[
5
]
=
&
cuda_action
->
node_local_data_size
;
err
=
cuLaunchKernel
(
cuda_action
->
function
,
global_work_size
,
1
,
1
,
cuda_action
->
local_work_size
,
1
,
1
,
1024
,
plugin
->
stream
,
args
,
NULL
);
if
(
CUDA_SUCCESS
==
err
)
{
if
(
0
!=
cuda_task
->
result_buffer
)
{
err
=
cuMemcpyDtoHAsync_v2
(
local_task
->
result_buffer
,
cuda_task
->
result_buffer
,
cuda_task
->
result_buffer_size
,
plugin
->
stream
);
}
err
=
cuStreamAddCallback
(
plugin
->
stream
,
cuda_task_complete
,
cuda_task
,
0
);
}
}
if
(
CUDA_SUCCESS
!=
err
)
{
embb_mtapi_task_set_state
(
local_task
,
MTAPI_TASK_ERROR
);
local_status
=
MTAPI_ERR_ACTION_FAILED
;
}
else
{
local_status
=
MTAPI_SUCCESS
;
}
}
}
}
mtapi_status_set
(
status
,
local_status
);
}
static
void
cuda_task_cancel
(
MTAPI_IN
mtapi_task_hndl_t
task
,
MTAPI_OUT
mtapi_status_t
*
status
)
{
mtapi_status_t
local_status
=
MTAPI_ERR_UNKNOWN
;
EMBB_UNUSED
(
task
);
mtapi_status_set
(
status
,
local_status
);
}
static
void
cuda_action_finalize
(
MTAPI_IN
mtapi_action_hndl_t
action
,
MTAPI_OUT
mtapi_status_t
*
status
)
{
mtapi_status_t
local_status
=
MTAPI_ERR_UNKNOWN
;
CUresult
err
;
EMBB_UNUSED_IN_RELEASE
(
err
);
if
(
embb_mtapi_node_is_initialized
())
{
embb_mtapi_node_t
*
node
=
embb_mtapi_node_get_instance
();
if
(
embb_mtapi_action_pool_is_handle_valid
(
node
->
action_pool
,
action
))
{
embb_mtapi_action_t
*
local_action
=
embb_mtapi_action_pool_get_storage_for_handle
(
node
->
action_pool
,
action
);
embb_mtapi_cuda_action_t
*
cuda_action
=
(
embb_mtapi_cuda_action_t
*
)
local_action
->
plugin_data
;
if
(
0
!=
cuda_action
->
node_local_data
)
{
err
=
cuMemFree_v2
(
cuda_action
->
node_local_data
);
assert
(
CUDA_SUCCESS
==
err
);
}
err
=
cuModuleUnload
(
cuda_action
->
module
);
assert
(
CUDA_SUCCESS
==
err
);
embb_free
(
cuda_action
);
local_status
=
MTAPI_SUCCESS
;
}
}
mtapi_status_set
(
status
,
local_status
);
}
char
buffer
[
1024
];
void
mtapi_cuda_plugin_initialize
(
MTAPI_OUT
mtapi_status_t
*
status
)
{
mtapi_status_t
local_status
=
MTAPI_ERR_UNKNOWN
;
CUresult
err
;
embb_mtapi_cuda_plugin_t
*
plugin
=
&
embb_mtapi_cuda_plugin
;
mtapi_status_set
(
status
,
MTAPI_ERR_UNKNOWN
);
err
=
cuInit
(
0
);
if
(
CUDA_SUCCESS
!=
err
)
return
;
err
=
cuDeviceGet
(
&
plugin
->
device
,
0
);
if
(
CUDA_SUCCESS
!=
err
)
return
;
err
=
cuCtxCreate_v2
(
&
plugin
->
context
,
0
,
plugin
->
device
);
if
(
CUDA_SUCCESS
!=
err
)
return
;
cuDeviceGetAttribute
(
&
plugin
->
work_group_size
,
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X
,
plugin
->
device
);
err
=
cuStreamCreate
(
&
plugin
->
stream
,
CU_STREAM_NON_BLOCKING
);
if
(
CUDA_SUCCESS
!=
err
)
return
;
local_status
=
MTAPI_SUCCESS
;
mtapi_status_set
(
status
,
local_status
);
}
void
mtapi_cuda_plugin_finalize
(
MTAPI_OUT
mtapi_status_t
*
status
)
{
mtapi_status_t
local_status
=
MTAPI_ERR_UNKNOWN
;
CUresult
err
;
EMBB_UNUSED_IN_RELEASE
(
err
);
embb_mtapi_cuda_plugin_t
*
plugin
=
&
embb_mtapi_cuda_plugin
;
/* finalization */
err
=
cuStreamDestroy_v2
(
plugin
->
stream
);
assert
(
CUDA_SUCCESS
==
err
);
err
=
cuCtxDestroy_v2
(
plugin
->
context
);
assert
(
CUDA_SUCCESS
==
err
);
local_status
=
MTAPI_SUCCESS
;
mtapi_status_set
(
status
,
local_status
);
}
mtapi_action_hndl_t
mtapi_cuda_action_create
(
MTAPI_IN
mtapi_job_id_t
job_id
,
MTAPI_IN
char
*
kernel_source
,
MTAPI_IN
char
*
kernel_name
,
MTAPI_IN
mtapi_size_t
local_work_size
,
MTAPI_IN
mtapi_size_t
element_size
,
MTAPI_IN
void
*
node_local_data
,
MTAPI_IN
mtapi_size_t
node_local_data_size
,
MTAPI_OUT
mtapi_status_t
*
status
)
{
mtapi_status_t
local_status
=
MTAPI_ERR_UNKNOWN
;
mtapi_status_set
(
status
,
MTAPI_ERR_UNKNOWN
);
CUresult
err
;
embb_mtapi_cuda_action_t
*
action
=
(
embb_mtapi_cuda_action_t
*
)
embb_alloc
(
sizeof
(
embb_mtapi_cuda_action_t
));
mtapi_action_hndl_t
action_hndl
=
{
0
,
0
};
// invalid handle
mtapi_boolean_t
free_module_on_error
=
MTAPI_FALSE
;
mtapi_boolean_t
free_node_local_data_on_error
=
MTAPI_FALSE
;
action
->
local_work_size
=
local_work_size
;
action
->
element_size
=
element_size
;
/* initialization */
err
=
cuModuleLoadData
(
&
action
->
module
,
kernel_source
);
if
(
CUDA_SUCCESS
==
err
)
{
free_module_on_error
=
MTAPI_TRUE
;
err
=
cuModuleGetFunction
(
&
action
->
function
,
action
->
module
,
kernel_name
);
}
if
(
CUDA_SUCCESS
==
err
)
{
if
(
0
<
node_local_data_size
)
{
err
=
cuMemAlloc_v2
(
&
action
->
node_local_data
,
node_local_data_size
);
if
(
CUDA_SUCCESS
==
err
)
{
free_node_local_data_on_error
=
MTAPI_TRUE
;
}
action
->
node_local_data_size
=
(
int
)
node_local_data_size
;
if
(
CUDA_SUCCESS
==
err
)
{
err
=
cuMemcpyHtoD_v2
(
action
->
node_local_data
,
node_local_data
,
node_local_data_size
);
}
}
else
{
action
->
node_local_data
=
0
;
action
->
node_local_data_size
=
0
;
}
}
if
(
CUDA_SUCCESS
==
err
)
{
action_hndl
=
mtapi_ext_plugin_action_create
(
job_id
,
cuda_task_start
,
cuda_task_cancel
,
cuda_action_finalize
,
action
,
node_local_data
,
node_local_data_size
,
MTAPI_NULL
,
&
local_status
);
}
else
{
if
(
free_node_local_data_on_error
)
{
cuMemFree_v2
(
action
->
node_local_data
);
}
if
(
free_module_on_error
)
{
cuModuleUnload
(
action
->
module
);
}
embb_free
(
action
);
}
mtapi_status_set
(
status
,
local_status
);
return
action_hndl
;
}
mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_kernel.cu
0 → 100644
View file @
d6dcb944
extern "C" __global__ void test(
void* arguments,
int arguments_size,
void* result_buffer,
int result_buffer_size,
void* node_local_data,
int node_local_data_size) {
int ii = blockDim.x * blockIdx.x + threadIdx.x;
int elements = arguments_size / sizeof(float) / 2;
if (ii >= elements)
return;
float* a = (float*)arguments;
float* b = ((float*)arguments) + elements;
float* c = (float*)result_buffer;
float* d = (float*)node_local_data;
c[ii] = a[ii] + b[ii] + d[0];
}
mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_task.cc
0 → 100644
View file @
d6dcb944
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <embb_mtapi_cuda_test_task.h>
#include <embb/mtapi/c/mtapi_cuda.h>
#define MTAPI_CHECK_STATUS(status) \
PT_ASSERT(MTAPI_SUCCESS == status)
#define CUDA_DOMAIN 1
#define CUDA_NODE 2
#define CUDA_JOB 2
// CUDA Kernel Function for element by element vector addition
#include "embb_mtapi_cuda_test_kernel.h"
TaskTest
::
TaskTest
()
{
CreateUnit
(
"mtapi cuda task test"
).
Add
(
&
TaskTest
::
TestBasic
,
this
);
}
void
TaskTest
::
TestBasic
()
{
mtapi_status_t
status
;
mtapi_job_hndl_t
job
;
mtapi_task_hndl_t
task
;
mtapi_action_hndl_t
action
;
const
int
kElements
=
64
;
float
arguments
[
kElements
*
2
];
float
results
[
kElements
];
for
(
int
ii
=
0
;
ii
<
kElements
;
ii
++
)
{
arguments
[
ii
]
=
static_cast
<
float
>
(
ii
);
arguments
[
ii
+
kElements
]
=
static_cast
<
float
>
(
ii
);
}
mtapi_cuda_plugin_initialize
(
&
status
);
if
(
status
==
MTAPI_ERR_FUNC_NOT_IMPLEMENTED
)
{
// CUDA unavailable
return
;
}
MTAPI_CHECK_STATUS
(
status
);
mtapi_initialize
(
CUDA_DOMAIN
,
CUDA_NODE
,
MTAPI_NULL
,
MTAPI_NULL
,
&
status
);
MTAPI_CHECK_STATUS
(
status
);
float
node_local
=
1.0
f
;
action
=
mtapi_cuda_action_create
(
CUDA_JOB
,
reinterpret_cast
<
char
const
*>
(
imageBytes
),
"test"
,
32
,
4
,
&
node_local
,
sizeof
(
float
),
&
status
);
MTAPI_CHECK_STATUS
(
status
);
status
=
MTAPI_ERR_UNKNOWN
;
job
=
mtapi_job_get
(
CUDA_JOB
,
CUDA_DOMAIN
,
&
status
);
MTAPI_CHECK_STATUS
(
status
);
task
=
mtapi_task_start
(
MTAPI_TASK_ID_NONE
,
job
,
arguments
,
kElements
*
2
*
sizeof
(
float
),
results
,
kElements
*
sizeof
(
float
),
MTAPI_DEFAULT_TASK_ATTRIBUTES
,
MTAPI_GROUP_NONE
,
&
status
);
MTAPI_CHECK_STATUS
(
status
);
mtapi_task_wait
(
task
,
MTAPI_INFINITE
,
&
status
);
MTAPI_CHECK_STATUS
(
status
);
for
(
int
ii
=
0
;
ii
<
kElements
;
ii
++
)
{
PT_EXPECT_EQ
(
results
[
ii
],
ii
*
2
+
1
);
}
mtapi_action_delete
(
action
,
MTAPI_INFINITE
,
&
status
);
MTAPI_CHECK_STATUS
(
status
);
mtapi_finalize
(
&
status
);
MTAPI_CHECK_STATUS
(
status
);
mtapi_cuda_plugin_finalize
(
&
status
);
MTAPI_CHECK_STATUS
(
status
);
}
mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_task.h
0 → 100644
View file @
d6dcb944
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MTAPI_PLUGINS_C_MTAPI_CUDA_C_TEST_EMBB_MTAPI_CUDA_TEST_TASK_H_
#define MTAPI_PLUGINS_C_MTAPI_CUDA_C_TEST_EMBB_MTAPI_CUDA_TEST_TASK_H_
#include <partest/partest.h>
class
TaskTest
:
public
partest
::
TestCase
{
public
:
TaskTest
();
private
:
void
TestBasic
();
};
#endif // MTAPI_PLUGINS_C_MTAPI_CUDA_C_TEST_EMBB_MTAPI_CUDA_TEST_TASK_H_
mtapi_plugins_c/mtapi_cuda_c/test/main.cc
0 → 100644
View file @
d6dcb944
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <partest/partest.h>
#include <embb_mtapi_cuda_test_task.h>
PT_MAIN
(
"MTAPI CUDA"
)
{
PT_RUN
(
TaskTest
);
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment