Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
FORMUS3IC_LAS3
/
embb
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
d6dcb944
authored
9 years ago
by
Marcus Winter
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
mtapi_cuda_c: first implementation
parent
7b0d2c67
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
845 additions
and
0 deletions
+845
-0
CMakeLists.txt
+8
-0
mtapi_plugins_c/mtapi_cuda_c/CMakeLists.txt
+66
-0
mtapi_plugins_c/mtapi_cuda_c/include/embb/mtapi/c/mtapi_cuda.h
+198
-0
mtapi_plugins_c/mtapi_cuda_c/src/embb_mtapi_cuda.c
+370
-0
mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_kernel.cu
+17
-0
mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_task.cc
+112
-0
mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_task.h
+40
-0
mtapi_plugins_c/mtapi_cuda_c/test/main.cc
+34
-0
No files found.
CMakeLists.txt
View file @
d6dcb944
...
...
@@ -30,6 +30,8 @@ set (EMBB_BASE_VERSION_MAJOR 0)
set
(
EMBB_BASE_VERSION_MINOR 3
)
set
(
EMBB_BASE_VERSION_PATCH 2
)
include
(
FindCUDA
)
# Fix compilation for CMake versions >= 3.1
#
# New Policy 0054:
...
...
@@ -144,6 +146,9 @@ set(EXPECTED_EMBB_TEST_EXECUTABLES "embb_algorithms_cpp_test"
if
(
BUILD_OPENCL_PLUGIN STREQUAL ON
)
list
(
APPEND EXPECTED_EMBB_TEST_EXECUTABLES
"embb_mtapi_opencl_c_test"
)
endif
()
if
(
CUDA_FOUND
)
list
(
APPEND EXPECTED_EMBB_TEST_EXECUTABLES
"embb_mtapi_cuda_c_test"
)
endif
()
## Copy test execution script to local binaries folder
...
...
@@ -180,6 +185,9 @@ add_subdirectory(mtapi_plugins_c/mtapi_network_c)
if
(
BUILD_OPENCL_PLUGIN STREQUAL ON
)
add_subdirectory
(
mtapi_plugins_c/mtapi_opencl_c
)
endif
()
if
(
CUDA_FOUND
)
add_subdirectory
(
mtapi_plugins_c/mtapi_cuda_c
)
endif
()
add_subdirectory
(
tasks_cpp
)
add_subdirectory
(
mtapi_cpp
)
add_subdirectory
(
containers_cpp
)
...
...
This diff is collapsed.
Click to expand it.
mtapi_plugins_c/mtapi_cuda_c/CMakeLists.txt
0 → 100644
View file @
d6dcb944
project
(
project_embb_mtapi_cuda_c
)
file
(
GLOB_RECURSE EMBB_MTAPI_CUDA_C_SOURCES
"src/*.c"
"src/*.h"
)
file
(
GLOB_RECURSE EMBB_MTAPI_CUDA_C_HEADERS
"include/*.h"
)
file
(
GLOB_RECURSE EMBB_MTAPI_CUDA_TEST_SOURCES
"test/*.cc"
"test/*.cu"
"test/*.h"
)
IF
(
MSVC8 OR MSVC9 OR MSVC10 OR MSVC11
)
FOREACH
(
src_tmp
${
EMBB_MTAPI_CUDA_TEST_SOURCES
}
)
SET_PROPERTY
(
SOURCE
${
src_tmp
}
PROPERTY LANGUAGE CXX
)
ENDFOREACH
(
src_tmp
)
FOREACH
(
src_tmp
${
EMBB_MTAPI_CUDA_C_SOURCES
}
)
SET_PROPERTY
(
SOURCE
${
src_tmp
}
PROPERTY LANGUAGE CXX
)
ENDFOREACH
(
src_tmp
)
ENDIF
()
IF
(
CMAKE_COMPILER_IS_GNUCC
)
set
(
EMBB_MTAPI_CUDA_C_LIBS dl
)
ENDIF
()
# Execute the GroupSources macro
include
(
${
CMAKE_SOURCE_DIR
}
/CMakeCommon/GroupSourcesMSVC.cmake
)
GroupSourcesMSVC
(
include
)
GroupSourcesMSVC
(
src
)
GroupSourcesMSVC
(
test
)
set
(
EMBB_MTAPI_CUDA_INCLUDE_DIRS
"include"
"src"
"test"
)
include_directories
(
${
EMBB_MTAPI_CUDA_INCLUDE_DIRS
}
${
CUDA_TOOLKIT_INCLUDE
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/../../base_c/include
${
CMAKE_CURRENT_BINARY_DIR
}
/../../base_c/include
${
CMAKE_CURRENT_SOURCE_DIR
}
/../../mtapi_c/include
${
CMAKE_CURRENT_SOURCE_DIR
}
/../../mtapi_c/src
)
add_library
(
embb_mtapi_cuda_c
${
EMBB_MTAPI_CUDA_C_SOURCES
}
${
EMBB_MTAPI_CUDA_C_HEADERS
}
)
target_link_libraries
(
embb_mtapi_cuda_c embb_mtapi_c embb_base_c
)
if
(
BUILD_TESTS STREQUAL ON
)
add_custom_command
(
DEPENDS
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/test/embb_mtapi_cuda_test_kernel.cu"
OUTPUT
"
${
CMAKE_CURRENT_BINARY_DIR
}
/embb_mtapi_cuda_test_kernel.ptx"
COMMAND
${
CUDA_NVCC_EXECUTABLE
}
-ptx -m 32
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/test/embb_mtapi_cuda_test_kernel.cu"
-o
"
${
CMAKE_CURRENT_BINARY_DIR
}
/embb_mtapi_cuda_test_kernel.ptx"
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
add_custom_command
(
DEPENDS
"
${
CMAKE_CURRENT_BINARY_DIR
}
/embb_mtapi_cuda_test_kernel.ptx"
OUTPUT
"
${
CMAKE_CURRENT_BINARY_DIR
}
/embb_mtapi_cuda_test_kernel.h"
COMMAND
${
CUDA_TOOLKIT_ROOT_DIR
}
/bin/bin2c -p 0
"
${
CMAKE_CURRENT_BINARY_DIR
}
/embb_mtapi_cuda_test_kernel.ptx"
>
"
${
CMAKE_CURRENT_BINARY_DIR
}
/embb_mtapi_cuda_test_kernel.h"
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
include_directories
(
${
CMAKE_CURRENT_BINARY_DIR
}
${
CMAKE_CURRENT_BINARY_DIR
}
/../../partest/include
)
add_executable
(
embb_mtapi_cuda_c_test
${
EMBB_MTAPI_CUDA_TEST_SOURCES
}
"
${
CMAKE_CURRENT_BINARY_DIR
}
/embb_mtapi_cuda_test_kernel.h"
)
target_link_libraries
(
embb_mtapi_cuda_c_test embb_mtapi_cuda_c embb_mtapi_c partest embb_base_c
${
compiler_libs
}
${
EMBB_MTAPI_CUDA_C_LIBS
}
${
CUDA_CUDA_LIBRARY
}
)
CopyBin
(
BIN embb_mtapi_cuda_c_test DEST
${
local_install_dir
}
)
endif
()
install
(
DIRECTORY
${
CMAKE_CURRENT_SOURCE_DIR
}
/include/
DESTINATION include FILES_MATCHING PATTERN
"*.h"
)
install
(
TARGETS embb_mtapi_cuda_c DESTINATION lib
)
This diff is collapsed.
Click to expand it.
mtapi_plugins_c/mtapi_cuda_c/include/embb/mtapi/c/mtapi_cuda.h
0 → 100644
View file @
d6dcb944
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EMBB_MTAPI_C_MTAPI_CUDA_H_
#define EMBB_MTAPI_C_MTAPI_CUDA_H_
#include <embb/mtapi/c/mtapi_ext.h>
#ifdef __cplusplus
extern
"C"
{
#endif
/**
* \defgroup C_MTAPI_CUDA MTAPI CUDA Plugin
*
* \ingroup C_MTAPI_EXT
*
* Provides functionality to execute tasks on CUDA devices.
*/
/**
* Initializes the MTAPI CUDA environment on a previously initialized MTAPI
* node.
*
* It must be called on all nodes using the MTAPI CUDA plugin.
*
* Application software using MTAPI network must call
* mtapi_cuda_plugin_initialize() once per node. It is an error to call
* mtapi_cuda_plugin_initialize() multiple times
* from a given node, unless mtapi_cuda_plugin_finalize() is called in
* between.
*
* On success, \c *status is set to \c MTAPI_SUCCESS. On error, \c *status is
* set to the appropriate error defined below.
* Error code | Description
* --------------------------- | ----------------------------------------------
* \c MTAPI_ERR_UNKNOWN | MTAPI CUDA couldn't be initialized.
*
* \see mtapi_cuda_plugin_finalize()
*
* \notthreadsafe
* \ingroup C_MTAPI_CUDA
*/
void
mtapi_cuda_plugin_initialize
(
MTAPI_OUT
mtapi_status_t
*
status
/**< [out] Pointer to error code,
may be \c MTAPI_NULL */
);
/**
* Finalizes the MTAPI CUDA environment on the local MTAPI node.
*
* It has to be called by each node using MTAPI CUDA. It is an error to call
* mtapi_cuda_plugin_finalize() without first calling
* mtapi_cuda_plugin_initialize(). An MTAPI node can call
* mtapi_cuda_plugin_finalize() once for each call to
* mtapi_cuda_plugin_initialize(), but it is an error to call
* mtapi_cuda_plugin_finalize() multiple times from a given node
* unless mtapi_cuda_plugin_initialize() has been called prior to each
* mtapi_cuda_plugin_finalize() call.
*
* All network tasks that have not completed and that have been started on the
* node where mtapi_cuda_plugin_finalize() is called will be canceled
* (see mtapi_task_cancel()). mtapi_opencl_plugin_finalize() blocks until all
* tasks that have been started on the same node return. Tasks that execute
* actions on the node where mtapi_opencl_plugin_finalize() is called, also
* block finalization of the MTAPI CUDA system on that node.
*
* On success, \c *status is set to \c MTAPI_SUCCESS. On error, \c *status is
* set to the appropriate error defined below.
* Error code | Description
* ----------------------------- | --------------------------------------------
* \c MTAPI_ERR_UNKNOWN | MTAPI CUDA couldn't be finalized.
*
* \see mtapi_opencl_plugin_initialize(), mtapi_task_cancel()
*
* \notthreadsafe
* \ingroup C_MTAPI_CUDA
*/
void
mtapi_cuda_plugin_finalize
(
MTAPI_OUT
mtapi_status_t
*
status
/**< [out] Pointer to error code,
may be \c MTAPI_NULL */
);
/**
* This function creates an CUDA action.
*
* It is called on the node where the user wants to execute an action on an
* CUDA device. A CUDA action contains a reference to a local job, the
* kernel source to compile and execute on the CUDA device, the name of the
* kernel function, a local work size (see CUDA specification for details)
* and the size of one element in the result buffer.
* After an CUDA action is created, it is referenced by the application using
* a node-local handle of type \c mtapi_action_hndl_t, or indirectly through a
* node-local job handle of type \c mtapi_job_hndl_t. An CUDA action's
* life-cycle begins with mtapi_cuda_action_create(), and ends when
* mtapi_action_delete() or mtapi_finalize() is called.
*
* To create an action, the application must supply the domain-wide job ID of
* the job associated with the action. Job IDs must be predefined in the
* application and runtime, of type \c mtapi_job_id_t, which is an
* implementation-defined type. The job ID is unique in the sense that it is
* unique for the job implemented by the action. However several actions may
* implement the same job for load balancing purposes.
*
* If \c node_local_data_size is not zero, \c node_local_data specifies the
* start of node local data shared by kernel functions executed on the same
* node. \c node_local_data_size can be used by the runtime for cache coherency
* operations.
*
* On success, an action handle is returned and \c *status is set to
* \c MTAPI_SUCCESS. On error, \c *status is set to the appropriate error
* defined below. In the case where the action already exists, \c status will
* be set to \c MTAPI_ERR_ACTION_EXISTS and the handle returned will not be a
* valid handle.
* <table>
* <tr>
* <th>Error code</th>
* <th>Description</th>
* </tr>
* <tr>
* <td>\c MTAPI_ERR_JOB_INVALID</td>
* <td>The \c job_id is not a valid job ID, i.e., no action was created for
* that ID or the action has been deleted.</td>
* </tr>
* <tr>
* <td>\c MTAPI_ERR_ACTION_EXISTS</td>
* <td>This action is already created.</td>
* </tr>
* <tr>
* <td>\c MTAPI_ERR_ACTION_LIMIT</td>
* <td>Exceeded maximum number of actions allowed.</td>
* </tr>
* <tr>
* <td>\c MTAPI_ERR_NODE_NOTINIT</td>
* <td>The calling node is not initialized.</td>
* </tr>
* <tr>
* <td>\c MTAPI_ERR_UNKNOWN</td>
* <td>The kernel could not be compiled or no CUDA device was
* available.</td>
* </tr>
* </table>
*
* \see mtapi_action_delete(), mtapi_finalize()
*
* \returns Handle to newly created CUDA action, invalid handle on error
* \threadsafe
* \ingroup C_MTAPI_CUDA
*/
mtapi_action_hndl_t
mtapi_cuda_action_create
(
MTAPI_IN
mtapi_job_id_t
job_id
,
/**< [in] Job id */
MTAPI_IN
char
*
kernel_source
,
/**< [in] Pointer to kernel source */
MTAPI_IN
char
*
kernel_name
,
/**< [in] Name of the kernel function */
MTAPI_IN
mtapi_size_t
local_work_size
,
/**< [in] Size of local work group */
MTAPI_IN
mtapi_size_t
element_size
,
/**< [in] Size of one element in the
result buffer */
MTAPI_IN
void
*
node_local_data
,
/**< [in] Data shared across tasks */
MTAPI_IN
mtapi_size_t
node_local_data_size
,
/**< [in] Size of shared data */
MTAPI_OUT
mtapi_status_t
*
status
/**< [out] Pointer to error code,
may be \c MTAPI_NULL */
);
#ifdef __cplusplus
}
#endif
#endif // EMBB_MTAPI_C_MTAPI_CUDA_H_
This diff is collapsed.
Click to expand it.
mtapi_plugins_c/mtapi_cuda_c/src/embb_mtapi_cuda.c
0 → 100644
View file @
d6dcb944
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <cuda.h>
#include <string.h>
#include <assert.h>
#include <embb/base/c/memory_allocation.h>
#include <embb/mtapi/c/mtapi_ext.h>
#include <embb/base/c/internal/unused.h>
#include <embb/mtapi/c/mtapi_cuda.h>
#include <embb_mtapi_task_t.h>
#include <embb_mtapi_action_t.h>
#include <embb_mtapi_node_t.h>
#include <mtapi_status_t.h>
struct
embb_mtapi_cuda_plugin_struct
{
CUdevice
device
;
CUcontext
context
;
CUstream
stream
;
int
work_group_size
;
};
typedef
struct
embb_mtapi_cuda_plugin_struct
embb_mtapi_cuda_plugin_t
;
static
embb_mtapi_cuda_plugin_t
embb_mtapi_cuda_plugin
;
struct
embb_mtapi_cuda_action_struct
{
CUmodule
module
;
CUfunction
function
;
CUdeviceptr
node_local_data
;
int
node_local_data_size
;
size_t
local_work_size
;
size_t
element_size
;
};
typedef
struct
embb_mtapi_cuda_action_struct
embb_mtapi_cuda_action_t
;
struct
embb_mtapi_cuda_task_struct
{
CUdeviceptr
arguments
;
int
arguments_size
;
CUdeviceptr
result_buffer
;
int
result_buffer_size
;
mtapi_task_hndl_t
task
;
};
typedef
struct
embb_mtapi_cuda_task_struct
embb_mtapi_cuda_task_t
;
static
size_t
round_up
(
size_t
group_size
,
size_t
global_size
)
{
size_t
r
=
global_size
%
group_size
;
if
(
r
==
0
)
{
return
global_size
;
}
else
{
return
global_size
+
group_size
-
r
;
}
}
static
void
CUDA_CB
cuda_task_complete
(
CUstream
stream
,
CUresult
status
,
void
*
data
)
{
EMBB_UNUSED
(
stream
);
EMBB_UNUSED
(
status
);
CUresult
err
;
EMBB_UNUSED_IN_RELEASE
(
err
);
embb_mtapi_cuda_task_t
*
cuda_task
=
(
embb_mtapi_cuda_task_t
*
)
data
;
if
(
embb_mtapi_node_is_initialized
())
{
embb_mtapi_node_t
*
node
=
embb_mtapi_node_get_instance
();
if
(
embb_mtapi_task_pool_is_handle_valid
(
node
->
task_pool
,
cuda_task
->
task
))
{
embb_mtapi_task_t
*
local_task
=
embb_mtapi_task_pool_get_storage_for_handle
(
node
->
task_pool
,
cuda_task
->
task
);
if
(
0
!=
cuda_task
->
result_buffer
)
{
err
=
cuMemFree_v2
(
cuda_task
->
result_buffer
);
assert
(
CUDA_SUCCESS
==
err
);
}
if
(
0
!=
cuda_task
->
arguments
)
{
err
=
cuMemFree_v2
(
cuda_task
->
arguments
);
assert
(
CUDA_SUCCESS
==
err
);
}
embb_free
(
cuda_task
);
embb_mtapi_task_set_state
(
local_task
,
MTAPI_TASK_COMPLETED
);
}
}
}
static
void
cuda_task_start
(
MTAPI_IN
mtapi_task_hndl_t
task
,
MTAPI_OUT
mtapi_status_t
*
status
)
{
mtapi_status_t
local_status
=
MTAPI_ERR_UNKNOWN
;
CUresult
err
=
CUDA_SUCCESS
;
embb_mtapi_cuda_plugin_t
*
plugin
=
&
embb_mtapi_cuda_plugin
;
if
(
embb_mtapi_node_is_initialized
())
{
embb_mtapi_node_t
*
node
=
embb_mtapi_node_get_instance
();
if
(
embb_mtapi_task_pool_is_handle_valid
(
node
->
task_pool
,
task
))
{
embb_mtapi_task_t
*
local_task
=
embb_mtapi_task_pool_get_storage_for_handle
(
node
->
task_pool
,
task
);
if
(
embb_mtapi_action_pool_is_handle_valid
(
node
->
action_pool
,
local_task
->
action
))
{
embb_mtapi_action_t
*
local_action
=
embb_mtapi_action_pool_get_storage_for_handle
(
node
->
action_pool
,
local_task
->
action
);
embb_mtapi_cuda_action_t
*
cuda_action
=
(
embb_mtapi_cuda_action_t
*
)
local_action
->
plugin_data
;
embb_mtapi_cuda_task_t
*
cuda_task
=
(
embb_mtapi_cuda_task_t
*
)
embb_alloc
(
sizeof
(
embb_mtapi_cuda_task_t
));
size_t
elements
=
local_task
->
result_size
/
cuda_action
->
element_size
;
size_t
global_work_size
;
if
(
0
==
elements
)
elements
=
1
;
global_work_size
=
round_up
(
cuda_action
->
local_work_size
,
elements
);
cuda_task
->
task
=
task
;
cuda_task
->
arguments_size
=
(
int
)
local_task
->
arguments_size
;
if
(
0
<
local_task
->
arguments_size
)
{
err
=
cuMemAlloc_v2
(
&
cuda_task
->
arguments
,
local_task
->
arguments_size
);
}
else
{
cuda_task
->
arguments
=
0
;
}
cuda_task
->
result_buffer_size
=
(
int
)
local_task
->
result_size
;
if
(
0
<
local_task
->
result_size
)
{
err
=
cuMemAlloc_v2
(
&
cuda_task
->
result_buffer
,
local_task
->
result_size
);
}
else
{
cuda_task
->
result_buffer
=
0
;
}
if
(
0
!=
cuda_task
->
arguments
)
{
err
=
cuMemcpyHtoDAsync_v2
(
cuda_task
->
arguments
,
local_task
->
arguments
,
(
size_t
)
cuda_task
->
arguments_size
,
plugin
->
stream
);
}
if
(
CUDA_SUCCESS
==
err
)
{
embb_mtapi_task_set_state
(
local_task
,
MTAPI_TASK_RUNNING
);
void
*
args
[
6
];
args
[
0
]
=
&
cuda_task
->
arguments
;
args
[
1
]
=
&
cuda_task
->
arguments_size
;
args
[
2
]
=
&
cuda_task
->
result_buffer
;
args
[
3
]
=
&
cuda_task
->
result_buffer_size
;
args
[
4
]
=
&
cuda_action
->
node_local_data
;
args
[
5
]
=
&
cuda_action
->
node_local_data_size
;
err
=
cuLaunchKernel
(
cuda_action
->
function
,
global_work_size
,
1
,
1
,
cuda_action
->
local_work_size
,
1
,
1
,
1024
,
plugin
->
stream
,
args
,
NULL
);
if
(
CUDA_SUCCESS
==
err
)
{
if
(
0
!=
cuda_task
->
result_buffer
)
{
err
=
cuMemcpyDtoHAsync_v2
(
local_task
->
result_buffer
,
cuda_task
->
result_buffer
,
cuda_task
->
result_buffer_size
,
plugin
->
stream
);
}
err
=
cuStreamAddCallback
(
plugin
->
stream
,
cuda_task_complete
,
cuda_task
,
0
);
}
}
if
(
CUDA_SUCCESS
!=
err
)
{
embb_mtapi_task_set_state
(
local_task
,
MTAPI_TASK_ERROR
);
local_status
=
MTAPI_ERR_ACTION_FAILED
;
}
else
{
local_status
=
MTAPI_SUCCESS
;
}
}
}
}
mtapi_status_set
(
status
,
local_status
);
}
static
void
cuda_task_cancel
(
MTAPI_IN
mtapi_task_hndl_t
task
,
MTAPI_OUT
mtapi_status_t
*
status
)
{
mtapi_status_t
local_status
=
MTAPI_ERR_UNKNOWN
;
EMBB_UNUSED
(
task
);
mtapi_status_set
(
status
,
local_status
);
}
static
void
cuda_action_finalize
(
MTAPI_IN
mtapi_action_hndl_t
action
,
MTAPI_OUT
mtapi_status_t
*
status
)
{
mtapi_status_t
local_status
=
MTAPI_ERR_UNKNOWN
;
CUresult
err
;
EMBB_UNUSED_IN_RELEASE
(
err
);
if
(
embb_mtapi_node_is_initialized
())
{
embb_mtapi_node_t
*
node
=
embb_mtapi_node_get_instance
();
if
(
embb_mtapi_action_pool_is_handle_valid
(
node
->
action_pool
,
action
))
{
embb_mtapi_action_t
*
local_action
=
embb_mtapi_action_pool_get_storage_for_handle
(
node
->
action_pool
,
action
);
embb_mtapi_cuda_action_t
*
cuda_action
=
(
embb_mtapi_cuda_action_t
*
)
local_action
->
plugin_data
;
if
(
0
!=
cuda_action
->
node_local_data
)
{
err
=
cuMemFree_v2
(
cuda_action
->
node_local_data
);
assert
(
CUDA_SUCCESS
==
err
);
}
err
=
cuModuleUnload
(
cuda_action
->
module
);
assert
(
CUDA_SUCCESS
==
err
);
embb_free
(
cuda_action
);
local_status
=
MTAPI_SUCCESS
;
}
}
mtapi_status_set
(
status
,
local_status
);
}
char
buffer
[
1024
];
void
mtapi_cuda_plugin_initialize
(
MTAPI_OUT
mtapi_status_t
*
status
)
{
mtapi_status_t
local_status
=
MTAPI_ERR_UNKNOWN
;
CUresult
err
;
embb_mtapi_cuda_plugin_t
*
plugin
=
&
embb_mtapi_cuda_plugin
;
mtapi_status_set
(
status
,
MTAPI_ERR_UNKNOWN
);
err
=
cuInit
(
0
);
if
(
CUDA_SUCCESS
!=
err
)
return
;
err
=
cuDeviceGet
(
&
plugin
->
device
,
0
);
if
(
CUDA_SUCCESS
!=
err
)
return
;
err
=
cuCtxCreate_v2
(
&
plugin
->
context
,
0
,
plugin
->
device
);
if
(
CUDA_SUCCESS
!=
err
)
return
;
cuDeviceGetAttribute
(
&
plugin
->
work_group_size
,
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X
,
plugin
->
device
);
err
=
cuStreamCreate
(
&
plugin
->
stream
,
CU_STREAM_NON_BLOCKING
);
if
(
CUDA_SUCCESS
!=
err
)
return
;
local_status
=
MTAPI_SUCCESS
;
mtapi_status_set
(
status
,
local_status
);
}
void
mtapi_cuda_plugin_finalize
(
MTAPI_OUT
mtapi_status_t
*
status
)
{
mtapi_status_t
local_status
=
MTAPI_ERR_UNKNOWN
;
CUresult
err
;
EMBB_UNUSED_IN_RELEASE
(
err
);
embb_mtapi_cuda_plugin_t
*
plugin
=
&
embb_mtapi_cuda_plugin
;
/* finalization */
err
=
cuStreamDestroy_v2
(
plugin
->
stream
);
assert
(
CUDA_SUCCESS
==
err
);
err
=
cuCtxDestroy_v2
(
plugin
->
context
);
assert
(
CUDA_SUCCESS
==
err
);
local_status
=
MTAPI_SUCCESS
;
mtapi_status_set
(
status
,
local_status
);
}
mtapi_action_hndl_t
mtapi_cuda_action_create
(
MTAPI_IN
mtapi_job_id_t
job_id
,
MTAPI_IN
char
*
kernel_source
,
MTAPI_IN
char
*
kernel_name
,
MTAPI_IN
mtapi_size_t
local_work_size
,
MTAPI_IN
mtapi_size_t
element_size
,
MTAPI_IN
void
*
node_local_data
,
MTAPI_IN
mtapi_size_t
node_local_data_size
,
MTAPI_OUT
mtapi_status_t
*
status
)
{
mtapi_status_t
local_status
=
MTAPI_ERR_UNKNOWN
;
mtapi_status_set
(
status
,
MTAPI_ERR_UNKNOWN
);
CUresult
err
;
embb_mtapi_cuda_action_t
*
action
=
(
embb_mtapi_cuda_action_t
*
)
embb_alloc
(
sizeof
(
embb_mtapi_cuda_action_t
));
mtapi_action_hndl_t
action_hndl
=
{
0
,
0
};
// invalid handle
mtapi_boolean_t
free_module_on_error
=
MTAPI_FALSE
;
mtapi_boolean_t
free_node_local_data_on_error
=
MTAPI_FALSE
;
action
->
local_work_size
=
local_work_size
;
action
->
element_size
=
element_size
;
/* initialization */
err
=
cuModuleLoadData
(
&
action
->
module
,
kernel_source
);
if
(
CUDA_SUCCESS
==
err
)
{
free_module_on_error
=
MTAPI_TRUE
;
err
=
cuModuleGetFunction
(
&
action
->
function
,
action
->
module
,
kernel_name
);
}
if
(
CUDA_SUCCESS
==
err
)
{
if
(
0
<
node_local_data_size
)
{
err
=
cuMemAlloc_v2
(
&
action
->
node_local_data
,
node_local_data_size
);
if
(
CUDA_SUCCESS
==
err
)
{
free_node_local_data_on_error
=
MTAPI_TRUE
;
}
action
->
node_local_data_size
=
(
int
)
node_local_data_size
;
if
(
CUDA_SUCCESS
==
err
)
{
err
=
cuMemcpyHtoD_v2
(
action
->
node_local_data
,
node_local_data
,
node_local_data_size
);
}
}
else
{
action
->
node_local_data
=
0
;
action
->
node_local_data_size
=
0
;
}
}
if
(
CUDA_SUCCESS
==
err
)
{
action_hndl
=
mtapi_ext_plugin_action_create
(
job_id
,
cuda_task_start
,
cuda_task_cancel
,
cuda_action_finalize
,
action
,
node_local_data
,
node_local_data_size
,
MTAPI_NULL
,
&
local_status
);
}
else
{
if
(
free_node_local_data_on_error
)
{
cuMemFree_v2
(
action
->
node_local_data
);
}
if
(
free_module_on_error
)
{
cuModuleUnload
(
action
->
module
);
}
embb_free
(
action
);
}
mtapi_status_set
(
status
,
local_status
);
return
action_hndl
;
}
This diff is collapsed.
Click to expand it.
mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_kernel.cu
0 → 100644
View file @
d6dcb944
extern "C" __global__ void test(
void* arguments,
int arguments_size,
void* result_buffer,
int result_buffer_size,
void* node_local_data,
int node_local_data_size) {
int ii = blockDim.x * blockIdx.x + threadIdx.x;
int elements = arguments_size / sizeof(float) / 2;
if (ii >= elements)
return;
float* a = (float*)arguments;
float* b = ((float*)arguments) + elements;
float* c = (float*)result_buffer;
float* d = (float*)node_local_data;
c[ii] = a[ii] + b[ii] + d[0];
}
This diff is collapsed.
Click to expand it.
mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_task.cc
0 → 100644
View file @
d6dcb944
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <embb_mtapi_cuda_test_task.h>
#include <embb/mtapi/c/mtapi_cuda.h>
#define MTAPI_CHECK_STATUS(status) \
PT_ASSERT(MTAPI_SUCCESS == status)
#define CUDA_DOMAIN 1
#define CUDA_NODE 2
#define CUDA_JOB 2
// CUDA Kernel Function for element by element vector addition
#include "embb_mtapi_cuda_test_kernel.h"
TaskTest
::
TaskTest
()
{
CreateUnit
(
"mtapi cuda task test"
).
Add
(
&
TaskTest
::
TestBasic
,
this
);
}
void
TaskTest
::
TestBasic
()
{
mtapi_status_t
status
;
mtapi_job_hndl_t
job
;
mtapi_task_hndl_t
task
;
mtapi_action_hndl_t
action
;
const
int
kElements
=
64
;
float
arguments
[
kElements
*
2
];
float
results
[
kElements
];
for
(
int
ii
=
0
;
ii
<
kElements
;
ii
++
)
{
arguments
[
ii
]
=
static_cast
<
float
>
(
ii
);
arguments
[
ii
+
kElements
]
=
static_cast
<
float
>
(
ii
);
}
mtapi_cuda_plugin_initialize
(
&
status
);
if
(
status
==
MTAPI_ERR_FUNC_NOT_IMPLEMENTED
)
{
// CUDA unavailable
return
;
}
MTAPI_CHECK_STATUS
(
status
);
mtapi_initialize
(
CUDA_DOMAIN
,
CUDA_NODE
,
MTAPI_NULL
,
MTAPI_NULL
,
&
status
);
MTAPI_CHECK_STATUS
(
status
);
float
node_local
=
1.0
f
;
action
=
mtapi_cuda_action_create
(
CUDA_JOB
,
reinterpret_cast
<
char
const
*>
(
imageBytes
),
"test"
,
32
,
4
,
&
node_local
,
sizeof
(
float
),
&
status
);
MTAPI_CHECK_STATUS
(
status
);
status
=
MTAPI_ERR_UNKNOWN
;
job
=
mtapi_job_get
(
CUDA_JOB
,
CUDA_DOMAIN
,
&
status
);
MTAPI_CHECK_STATUS
(
status
);
task
=
mtapi_task_start
(
MTAPI_TASK_ID_NONE
,
job
,
arguments
,
kElements
*
2
*
sizeof
(
float
),
results
,
kElements
*
sizeof
(
float
),
MTAPI_DEFAULT_TASK_ATTRIBUTES
,
MTAPI_GROUP_NONE
,
&
status
);
MTAPI_CHECK_STATUS
(
status
);
mtapi_task_wait
(
task
,
MTAPI_INFINITE
,
&
status
);
MTAPI_CHECK_STATUS
(
status
);
for
(
int
ii
=
0
;
ii
<
kElements
;
ii
++
)
{
PT_EXPECT_EQ
(
results
[
ii
],
ii
*
2
+
1
);
}
mtapi_action_delete
(
action
,
MTAPI_INFINITE
,
&
status
);
MTAPI_CHECK_STATUS
(
status
);
mtapi_finalize
(
&
status
);
MTAPI_CHECK_STATUS
(
status
);
mtapi_cuda_plugin_finalize
(
&
status
);
MTAPI_CHECK_STATUS
(
status
);
}
This diff is collapsed.
Click to expand it.
mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_task.h
0 → 100644
View file @
d6dcb944
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MTAPI_PLUGINS_C_MTAPI_CUDA_C_TEST_EMBB_MTAPI_CUDA_TEST_TASK_H_
#define MTAPI_PLUGINS_C_MTAPI_CUDA_C_TEST_EMBB_MTAPI_CUDA_TEST_TASK_H_
#include <partest/partest.h>
class
TaskTest
:
public
partest
::
TestCase
{
public
:
TaskTest
();
private
:
void
TestBasic
();
};
#endif // MTAPI_PLUGINS_C_MTAPI_CUDA_C_TEST_EMBB_MTAPI_CUDA_TEST_TASK_H_
This diff is collapsed.
Click to expand it.
mtapi_plugins_c/mtapi_cuda_c/test/main.cc
0 → 100644
View file @
d6dcb944
/*
* Copyright (c) 2014, Siemens AG. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <partest/partest.h>
#include <embb_mtapi_cuda_test_task.h>
PT_MAIN
(
"MTAPI CUDA"
)
{
PT_RUN
(
TaskTest
);
}
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment