From d6dcb944d51680fd7de16203144843c1e90d3961 Mon Sep 17 00:00:00 2001 From: Marcus Winter Date: Tue, 3 May 2016 16:00:52 +0200 Subject: [PATCH] mtapi_cuda_c: first implementation --- CMakeLists.txt | 8 ++++++++ mtapi_plugins_c/mtapi_cuda_c/CMakeLists.txt | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ mtapi_plugins_c/mtapi_cuda_c/include/embb/mtapi/c/mtapi_cuda.h | 198 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ mtapi_plugins_c/mtapi_cuda_c/src/embb_mtapi_cuda.c | 370 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_kernel.cu | 17 +++++++++++++++++ mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_task.cc | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_task.h | 40 ++++++++++++++++++++++++++++++++++++++++ mtapi_plugins_c/mtapi_cuda_c/test/main.cc | 34 ++++++++++++++++++++++++++++++++++ 8 files changed, 845 insertions(+) create mode 100644 mtapi_plugins_c/mtapi_cuda_c/CMakeLists.txt create mode 100644 mtapi_plugins_c/mtapi_cuda_c/include/embb/mtapi/c/mtapi_cuda.h create mode 100644 mtapi_plugins_c/mtapi_cuda_c/src/embb_mtapi_cuda.c create mode 100644 mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_kernel.cu create mode 100644 mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_task.cc create mode 100644 mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_task.h create mode 100644 mtapi_plugins_c/mtapi_cuda_c/test/main.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 048cb09..ca5d001 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,6 +30,8 @@ set (EMBB_BASE_VERSION_MAJOR 0) set (EMBB_BASE_VERSION_MINOR 3) set (EMBB_BASE_VERSION_PATCH 2) +include(FindCUDA) + # Fix compilation for CMake versions >= 3.1 # # New Policy 0054: @@ -144,6 +146,9 @@ set(EXPECTED_EMBB_TEST_EXECUTABLES "embb_algorithms_cpp_test" if(BUILD_OPENCL_PLUGIN STREQUAL ON) list(APPEND EXPECTED_EMBB_TEST_EXECUTABLES "embb_mtapi_opencl_c_test") endif() +if(CUDA_FOUND) + list(APPEND EXPECTED_EMBB_TEST_EXECUTABLES "embb_mtapi_cuda_c_test") +endif() ## Copy test execution script to local binaries folder @@ -180,6 +185,9 @@ add_subdirectory(mtapi_plugins_c/mtapi_network_c) if(BUILD_OPENCL_PLUGIN STREQUAL ON) add_subdirectory(mtapi_plugins_c/mtapi_opencl_c) endif() +if(CUDA_FOUND) + add_subdirectory(mtapi_plugins_c/mtapi_cuda_c) +endif() add_subdirectory(tasks_cpp) add_subdirectory(mtapi_cpp) add_subdirectory(containers_cpp) diff --git a/mtapi_plugins_c/mtapi_cuda_c/CMakeLists.txt b/mtapi_plugins_c/mtapi_cuda_c/CMakeLists.txt new file mode 100644 index 0000000..1cc256a --- /dev/null +++ b/mtapi_plugins_c/mtapi_cuda_c/CMakeLists.txt @@ -0,0 +1,66 @@ +project (project_embb_mtapi_cuda_c) + +file(GLOB_RECURSE EMBB_MTAPI_CUDA_C_SOURCES "src/*.c" "src/*.h") +file(GLOB_RECURSE EMBB_MTAPI_CUDA_C_HEADERS "include/*.h") + +file(GLOB_RECURSE EMBB_MTAPI_CUDA_TEST_SOURCES "test/*.cc" "test/*.cu" "test/*.h") + +IF(MSVC8 OR MSVC9 OR MSVC10 OR MSVC11) +FOREACH(src_tmp ${EMBB_MTAPI_CUDA_TEST_SOURCES}) + SET_PROPERTY(SOURCE ${src_tmp} PROPERTY LANGUAGE CXX) +ENDFOREACH(src_tmp) +FOREACH(src_tmp ${EMBB_MTAPI_CUDA_C_SOURCES}) + SET_PROPERTY(SOURCE ${src_tmp} PROPERTY LANGUAGE CXX) +ENDFOREACH(src_tmp) +ENDIF() + +IF(CMAKE_COMPILER_IS_GNUCC) + set (EMBB_MTAPI_CUDA_C_LIBS dl) +ENDIF() + +# Execute the GroupSources macro +include(${CMAKE_SOURCE_DIR}/CMakeCommon/GroupSourcesMSVC.cmake) +GroupSourcesMSVC(include) +GroupSourcesMSVC(src) +GroupSourcesMSVC(test) + +set (EMBB_MTAPI_CUDA_INCLUDE_DIRS "include" "src" "test") +include_directories(${EMBB_MTAPI_CUDA_INCLUDE_DIRS} + ${CUDA_TOOLKIT_INCLUDE} + ${CMAKE_CURRENT_SOURCE_DIR}/../../base_c/include + ${CMAKE_CURRENT_BINARY_DIR}/../../base_c/include + ${CMAKE_CURRENT_SOURCE_DIR}/../../mtapi_c/include + ${CMAKE_CURRENT_SOURCE_DIR}/../../mtapi_c/src + ) + +add_library(embb_mtapi_cuda_c ${EMBB_MTAPI_CUDA_C_SOURCES} ${EMBB_MTAPI_CUDA_C_HEADERS}) +target_link_libraries(embb_mtapi_cuda_c embb_mtapi_c embb_base_c) + +if (BUILD_TESTS STREQUAL ON) + add_custom_command( + DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/test/embb_mtapi_cuda_test_kernel.cu" + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/embb_mtapi_cuda_test_kernel.ptx" + COMMAND ${CUDA_NVCC_EXECUTABLE} -ptx -m 32 + "${CMAKE_CURRENT_SOURCE_DIR}/test/embb_mtapi_cuda_test_kernel.cu" + -o "${CMAKE_CURRENT_BINARY_DIR}/embb_mtapi_cuda_test_kernel.ptx" + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + ) + add_custom_command( + DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/embb_mtapi_cuda_test_kernel.ptx" + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/embb_mtapi_cuda_test_kernel.h" + COMMAND ${CUDA_TOOLKIT_ROOT_DIR}/bin/bin2c -p 0 + "${CMAKE_CURRENT_BINARY_DIR}/embb_mtapi_cuda_test_kernel.ptx" > + "${CMAKE_CURRENT_BINARY_DIR}/embb_mtapi_cuda_test_kernel.h" + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + ) + include_directories( + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_BINARY_DIR}/../../partest/include) + add_executable (embb_mtapi_cuda_c_test ${EMBB_MTAPI_CUDA_TEST_SOURCES} "${CMAKE_CURRENT_BINARY_DIR}/embb_mtapi_cuda_test_kernel.h") + target_link_libraries(embb_mtapi_cuda_c_test embb_mtapi_cuda_c embb_mtapi_c partest embb_base_c ${compiler_libs} ${EMBB_MTAPI_CUDA_C_LIBS} ${CUDA_CUDA_LIBRARY}) + CopyBin(BIN embb_mtapi_cuda_c_test DEST ${local_install_dir}) +endif() + +install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/ + DESTINATION include FILES_MATCHING PATTERN "*.h") +install(TARGETS embb_mtapi_cuda_c DESTINATION lib) diff --git a/mtapi_plugins_c/mtapi_cuda_c/include/embb/mtapi/c/mtapi_cuda.h b/mtapi_plugins_c/mtapi_cuda_c/include/embb/mtapi/c/mtapi_cuda.h new file mode 100644 index 0000000..fd49d1b --- /dev/null +++ b/mtapi_plugins_c/mtapi_cuda_c/include/embb/mtapi/c/mtapi_cuda.h @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2014, Siemens AG. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef EMBB_MTAPI_C_MTAPI_CUDA_H_ +#define EMBB_MTAPI_C_MTAPI_CUDA_H_ + + +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * \defgroup C_MTAPI_CUDA MTAPI CUDA Plugin + * + * \ingroup C_MTAPI_EXT + * + * Provides functionality to execute tasks on CUDA devices. + */ + + +/** + * Initializes the MTAPI CUDA environment on a previously initialized MTAPI + * node. + * + * It must be called on all nodes using the MTAPI CUDA plugin. + * + * Application software using MTAPI network must call + * mtapi_cuda_plugin_initialize() once per node. It is an error to call + * mtapi_cuda_plugin_initialize() multiple times + * from a given node, unless mtapi_cuda_plugin_finalize() is called in + * between. + * + * On success, \c *status is set to \c MTAPI_SUCCESS. On error, \c *status is + * set to the appropriate error defined below. + * Error code | Description + * --------------------------- | ---------------------------------------------- + * \c MTAPI_ERR_UNKNOWN | MTAPI CUDA couldn't be initialized. + * + * \see mtapi_cuda_plugin_finalize() + * + * \notthreadsafe + * \ingroup C_MTAPI_CUDA + */ +void mtapi_cuda_plugin_initialize( + MTAPI_OUT mtapi_status_t* status /**< [out] Pointer to error code, + may be \c MTAPI_NULL */ +); + +/** + * Finalizes the MTAPI CUDA environment on the local MTAPI node. + * + * It has to be called by each node using MTAPI CUDA. It is an error to call + * mtapi_cuda_plugin_finalize() without first calling + * mtapi_cuda_plugin_initialize(). An MTAPI node can call + * mtapi_cuda_plugin_finalize() once for each call to + * mtapi_cuda_plugin_initialize(), but it is an error to call + * mtapi_cuda_plugin_finalize() multiple times from a given node + * unless mtapi_cuda_plugin_initialize() has been called prior to each + * mtapi_cuda_plugin_finalize() call. + * + * All network tasks that have not completed and that have been started on the + * node where mtapi_cuda_plugin_finalize() is called will be canceled + * (see mtapi_task_cancel()). mtapi_opencl_plugin_finalize() blocks until all + * tasks that have been started on the same node return. Tasks that execute + * actions on the node where mtapi_opencl_plugin_finalize() is called, also + * block finalization of the MTAPI CUDA system on that node. + * + * On success, \c *status is set to \c MTAPI_SUCCESS. On error, \c *status is + * set to the appropriate error defined below. + * Error code | Description + * ----------------------------- | -------------------------------------------- + * \c MTAPI_ERR_UNKNOWN | MTAPI CUDA couldn't be finalized. + * + * \see mtapi_opencl_plugin_initialize(), mtapi_task_cancel() + * + * \notthreadsafe + * \ingroup C_MTAPI_CUDA + */ +void mtapi_cuda_plugin_finalize( + MTAPI_OUT mtapi_status_t* status /**< [out] Pointer to error code, + may be \c MTAPI_NULL */ +); + +/** + * This function creates an CUDA action. + * + * It is called on the node where the user wants to execute an action on an + * CUDA device. A CUDA action contains a reference to a local job, the + * kernel source to compile and execute on the CUDA device, the name of the + * kernel function, a local work size (see CUDA specification for details) + * and the size of one element in the result buffer. + * After an CUDA action is created, it is referenced by the application using + * a node-local handle of type \c mtapi_action_hndl_t, or indirectly through a + * node-local job handle of type \c mtapi_job_hndl_t. An CUDA action's + * life-cycle begins with mtapi_cuda_action_create(), and ends when + * mtapi_action_delete() or mtapi_finalize() is called. + * + * To create an action, the application must supply the domain-wide job ID of + * the job associated with the action. Job IDs must be predefined in the + * application and runtime, of type \c mtapi_job_id_t, which is an + * implementation-defined type. The job ID is unique in the sense that it is + * unique for the job implemented by the action. However several actions may + * implement the same job for load balancing purposes. + * + * If \c node_local_data_size is not zero, \c node_local_data specifies the + * start of node local data shared by kernel functions executed on the same + * node. \c node_local_data_size can be used by the runtime for cache coherency + * operations. + * + * On success, an action handle is returned and \c *status is set to + * \c MTAPI_SUCCESS. On error, \c *status is set to the appropriate error + * defined below. In the case where the action already exists, \c status will + * be set to \c MTAPI_ERR_ACTION_EXISTS and the handle returned will not be a + * valid handle. + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Error codeDescription
\c MTAPI_ERR_JOB_INVALIDThe \c job_id is not a valid job ID, i.e., no action was created for + * that ID or the action has been deleted.
\c MTAPI_ERR_ACTION_EXISTSThis action is already created.
\c MTAPI_ERR_ACTION_LIMITExceeded maximum number of actions allowed.
\c MTAPI_ERR_NODE_NOTINITThe calling node is not initialized.
\c MTAPI_ERR_UNKNOWNThe kernel could not be compiled or no CUDA device was + * available.
+ * + * \see mtapi_action_delete(), mtapi_finalize() + * + * \returns Handle to newly created CUDA action, invalid handle on error + * \threadsafe + * \ingroup C_MTAPI_CUDA + */ +mtapi_action_hndl_t mtapi_cuda_action_create( + MTAPI_IN mtapi_job_id_t job_id, /**< [in] Job id */ + MTAPI_IN char* kernel_source, /**< [in] Pointer to kernel source */ + MTAPI_IN char* kernel_name, /**< [in] Name of the kernel function */ + MTAPI_IN mtapi_size_t local_work_size, + /**< [in] Size of local work group */ + MTAPI_IN mtapi_size_t element_size, /**< [in] Size of one element in the + result buffer */ + MTAPI_IN void* node_local_data, /**< [in] Data shared across tasks */ + MTAPI_IN mtapi_size_t node_local_data_size, + /**< [in] Size of shared data */ + MTAPI_OUT mtapi_status_t* status /**< [out] Pointer to error code, + may be \c MTAPI_NULL */ +); + + +#ifdef __cplusplus +} +#endif + + +#endif // EMBB_MTAPI_C_MTAPI_CUDA_H_ diff --git a/mtapi_plugins_c/mtapi_cuda_c/src/embb_mtapi_cuda.c b/mtapi_plugins_c/mtapi_cuda_c/src/embb_mtapi_cuda.c new file mode 100644 index 0000000..ce34e03 --- /dev/null +++ b/mtapi_plugins_c/mtapi_cuda_c/src/embb_mtapi_cuda.c @@ -0,0 +1,370 @@ +/* + * Copyright (c) 2014, Siemens AG. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include +#include + +struct embb_mtapi_cuda_plugin_struct { + CUdevice device; + CUcontext context; + CUstream stream; + int work_group_size; +}; + +typedef struct embb_mtapi_cuda_plugin_struct embb_mtapi_cuda_plugin_t; + +static embb_mtapi_cuda_plugin_t embb_mtapi_cuda_plugin; + +struct embb_mtapi_cuda_action_struct { + CUmodule module; + CUfunction function; + CUdeviceptr node_local_data; + int node_local_data_size; + size_t local_work_size; + size_t element_size; +}; + +typedef struct embb_mtapi_cuda_action_struct embb_mtapi_cuda_action_t; + +struct embb_mtapi_cuda_task_struct { + CUdeviceptr arguments; + int arguments_size; + CUdeviceptr result_buffer; + int result_buffer_size; + mtapi_task_hndl_t task; +}; + +typedef struct embb_mtapi_cuda_task_struct embb_mtapi_cuda_task_t; + +static size_t round_up(size_t group_size, size_t global_size) { + size_t r = global_size % group_size; + if (r == 0) { + return global_size; + } else { + return global_size + group_size - r; + } +} + +static void CUDA_CB cuda_task_complete( + CUstream stream, CUresult status, void *data) { + EMBB_UNUSED(stream); + EMBB_UNUSED(status); + CUresult err; + EMBB_UNUSED_IN_RELEASE(err); + embb_mtapi_cuda_task_t * cuda_task = (embb_mtapi_cuda_task_t*)data; + + if (embb_mtapi_node_is_initialized()) { + embb_mtapi_node_t * node = embb_mtapi_node_get_instance(); + + if (embb_mtapi_task_pool_is_handle_valid( + node->task_pool, cuda_task->task)) { + embb_mtapi_task_t * local_task = + embb_mtapi_task_pool_get_storage_for_handle( + node->task_pool, cuda_task->task); + + if (0 != cuda_task->result_buffer) { + err = cuMemFree_v2(cuda_task->result_buffer); + assert(CUDA_SUCCESS == err); + } + if (0 != cuda_task->arguments) { + err = cuMemFree_v2(cuda_task->arguments); + assert(CUDA_SUCCESS == err); + } + + embb_free(cuda_task); + + embb_mtapi_task_set_state(local_task, MTAPI_TASK_COMPLETED); + } + } +} + +static void cuda_task_start( + MTAPI_IN mtapi_task_hndl_t task, + MTAPI_OUT mtapi_status_t* status) { + mtapi_status_t local_status = MTAPI_ERR_UNKNOWN; + CUresult err = CUDA_SUCCESS; + embb_mtapi_cuda_plugin_t * plugin = &embb_mtapi_cuda_plugin; + + if (embb_mtapi_node_is_initialized()) { + embb_mtapi_node_t * node = embb_mtapi_node_get_instance(); + + if (embb_mtapi_task_pool_is_handle_valid(node->task_pool, task)) { + embb_mtapi_task_t * local_task = + embb_mtapi_task_pool_get_storage_for_handle(node->task_pool, task); + + if (embb_mtapi_action_pool_is_handle_valid( + node->action_pool, local_task->action)) { + embb_mtapi_action_t * local_action = + embb_mtapi_action_pool_get_storage_for_handle( + node->action_pool, local_task->action); + + embb_mtapi_cuda_action_t * cuda_action = + (embb_mtapi_cuda_action_t*)local_action->plugin_data; + embb_mtapi_cuda_task_t * cuda_task = + (embb_mtapi_cuda_task_t*)embb_alloc( + sizeof(embb_mtapi_cuda_task_t)); + + size_t elements = local_task->result_size / + cuda_action->element_size; + size_t global_work_size; + + if (0 == elements) + elements = 1; + global_work_size = + round_up(cuda_action->local_work_size, elements); + + cuda_task->task = task; + + cuda_task->arguments_size = (int)local_task->arguments_size; + if (0 < local_task->arguments_size) { + err = cuMemAlloc_v2(&cuda_task->arguments, + local_task->arguments_size); + } else { + cuda_task->arguments = 0; + } + cuda_task->result_buffer_size = (int)local_task->result_size; + if (0 < local_task->result_size) { + err = cuMemAlloc_v2(&cuda_task->result_buffer, + local_task->result_size); + } else { + cuda_task->result_buffer = 0; + } + + if (0 != cuda_task->arguments) { + err = cuMemcpyHtoDAsync_v2(cuda_task->arguments, + local_task->arguments, (size_t)cuda_task->arguments_size, + plugin->stream); + } + + if (CUDA_SUCCESS == err) { + embb_mtapi_task_set_state(local_task, MTAPI_TASK_RUNNING); + void * args[6]; + args[0] = &cuda_task->arguments; + args[1] = &cuda_task->arguments_size; + args[2] = &cuda_task->result_buffer; + args[3] = &cuda_task->result_buffer_size; + args[4] = &cuda_action->node_local_data; + args[5] = &cuda_action->node_local_data_size; + + err = cuLaunchKernel(cuda_action->function, + global_work_size, 1, 1, + cuda_action->local_work_size, 1, 1, + 1024, plugin->stream, args, NULL); + + if (CUDA_SUCCESS == err) { + if (0 != cuda_task->result_buffer) { + err = cuMemcpyDtoHAsync_v2(local_task->result_buffer, + cuda_task->result_buffer, cuda_task->result_buffer_size, + plugin->stream); + } + + err = cuStreamAddCallback(plugin->stream, cuda_task_complete, + cuda_task, 0); + } + } + + if (CUDA_SUCCESS != err) { + embb_mtapi_task_set_state(local_task, MTAPI_TASK_ERROR); + local_status = MTAPI_ERR_ACTION_FAILED; + } else { + local_status = MTAPI_SUCCESS; + } + } + } + } + + mtapi_status_set(status, local_status); +} + +static void cuda_task_cancel( + MTAPI_IN mtapi_task_hndl_t task, + MTAPI_OUT mtapi_status_t* status + ) { + mtapi_status_t local_status = MTAPI_ERR_UNKNOWN; + + EMBB_UNUSED(task); + + mtapi_status_set(status, local_status); +} + +static void cuda_action_finalize( + MTAPI_IN mtapi_action_hndl_t action, + MTAPI_OUT mtapi_status_t* status + ) { + mtapi_status_t local_status = MTAPI_ERR_UNKNOWN; + CUresult err; + EMBB_UNUSED_IN_RELEASE(err); + + if (embb_mtapi_node_is_initialized()) { + embb_mtapi_node_t * node = embb_mtapi_node_get_instance(); + if (embb_mtapi_action_pool_is_handle_valid(node->action_pool, action)) { + embb_mtapi_action_t * local_action = + embb_mtapi_action_pool_get_storage_for_handle( + node->action_pool, action); + embb_mtapi_cuda_action_t * cuda_action = + (embb_mtapi_cuda_action_t *)local_action->plugin_data; + if (0 != cuda_action->node_local_data) { + err = cuMemFree_v2(cuda_action->node_local_data); + assert(CUDA_SUCCESS == err); + } + + err = cuModuleUnload(cuda_action->module); + assert(CUDA_SUCCESS == err); + + embb_free(cuda_action); + local_status = MTAPI_SUCCESS; + } + } + + mtapi_status_set(status, local_status); +} + +char buffer[1024]; + +void mtapi_cuda_plugin_initialize( + MTAPI_OUT mtapi_status_t* status) { + mtapi_status_t local_status = MTAPI_ERR_UNKNOWN; + CUresult err; + embb_mtapi_cuda_plugin_t * plugin = &embb_mtapi_cuda_plugin; + + mtapi_status_set(status, MTAPI_ERR_UNKNOWN); + + err = cuInit(0); + if (CUDA_SUCCESS != err) return; + err = cuDeviceGet(&plugin->device, 0); + if (CUDA_SUCCESS != err) return; + err = cuCtxCreate_v2(&plugin->context, 0, plugin->device); + if (CUDA_SUCCESS != err) return; + cuDeviceGetAttribute(&plugin->work_group_size, + CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, plugin->device); + err = cuStreamCreate(&plugin->stream, CU_STREAM_NON_BLOCKING); + if (CUDA_SUCCESS != err) return; + local_status = MTAPI_SUCCESS; + + mtapi_status_set(status, local_status); +} + +void mtapi_cuda_plugin_finalize( + MTAPI_OUT mtapi_status_t* status) { + mtapi_status_t local_status = MTAPI_ERR_UNKNOWN; + + CUresult err; + EMBB_UNUSED_IN_RELEASE(err); + embb_mtapi_cuda_plugin_t * plugin = &embb_mtapi_cuda_plugin; + + /* finalization */ + err = cuStreamDestroy_v2(plugin->stream); + assert(CUDA_SUCCESS == err); + err = cuCtxDestroy_v2(plugin->context); + assert(CUDA_SUCCESS == err); + + local_status = MTAPI_SUCCESS; + mtapi_status_set(status, local_status); +} + +mtapi_action_hndl_t mtapi_cuda_action_create( + MTAPI_IN mtapi_job_id_t job_id, + MTAPI_IN char* kernel_source, + MTAPI_IN char* kernel_name, + MTAPI_IN mtapi_size_t local_work_size, + MTAPI_IN mtapi_size_t element_size, + MTAPI_IN void* node_local_data, + MTAPI_IN mtapi_size_t node_local_data_size, + MTAPI_OUT mtapi_status_t* status) { + mtapi_status_t local_status = MTAPI_ERR_UNKNOWN; + mtapi_status_set(status, MTAPI_ERR_UNKNOWN); + + CUresult err; + embb_mtapi_cuda_action_t * action = + (embb_mtapi_cuda_action_t*)embb_alloc( + sizeof(embb_mtapi_cuda_action_t)); + mtapi_action_hndl_t action_hndl = { 0, 0 }; // invalid handle + mtapi_boolean_t free_module_on_error = MTAPI_FALSE; + mtapi_boolean_t free_node_local_data_on_error = MTAPI_FALSE; + + action->local_work_size = local_work_size; + action->element_size = element_size; + + /* initialization */ + err = cuModuleLoadData(&action->module, kernel_source); + if (CUDA_SUCCESS == err) { + free_module_on_error = MTAPI_TRUE; + err = cuModuleGetFunction(&action->function, action->module, kernel_name); + } + if (CUDA_SUCCESS == err) { + if (0 < node_local_data_size) { + err = cuMemAlloc_v2(&action->node_local_data, node_local_data_size); + if (CUDA_SUCCESS == err) { + free_node_local_data_on_error = MTAPI_TRUE; + } + action->node_local_data_size = (int)node_local_data_size; + if (CUDA_SUCCESS == err) { + err = cuMemcpyHtoD_v2( + action->node_local_data, node_local_data, node_local_data_size); + } + } else { + action->node_local_data = 0; + action->node_local_data_size = 0; + } + } + + if (CUDA_SUCCESS == err) { + action_hndl = mtapi_ext_plugin_action_create( + job_id, + cuda_task_start, + cuda_task_cancel, + cuda_action_finalize, + action, + node_local_data, + node_local_data_size, + MTAPI_NULL, + &local_status); + } else { + if (free_node_local_data_on_error) { + cuMemFree_v2(action->node_local_data); + } + if (free_module_on_error) { + cuModuleUnload(action->module); + } + embb_free(action); + } + + mtapi_status_set(status, local_status); + + return action_hndl; +} diff --git a/mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_kernel.cu b/mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_kernel.cu new file mode 100644 index 0000000..c46828b --- /dev/null +++ b/mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_kernel.cu @@ -0,0 +1,17 @@ +extern "C" __global__ void test( + void* arguments, + int arguments_size, + void* result_buffer, + int result_buffer_size, + void* node_local_data, + int node_local_data_size) { + int ii = blockDim.x * blockIdx.x + threadIdx.x; + int elements = arguments_size / sizeof(float) / 2; + if (ii >= elements) + return; + float* a = (float*)arguments; + float* b = ((float*)arguments) + elements; + float* c = (float*)result_buffer; + float* d = (float*)node_local_data; + c[ii] = a[ii] + b[ii] + d[0]; +} diff --git a/mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_task.cc b/mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_task.cc new file mode 100644 index 0000000..9c191b4 --- /dev/null +++ b/mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_task.cc @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2014, Siemens AG. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include + +#define MTAPI_CHECK_STATUS(status) \ +PT_ASSERT(MTAPI_SUCCESS == status) + +#define CUDA_DOMAIN 1 +#define CUDA_NODE 2 +#define CUDA_JOB 2 + +// CUDA Kernel Function for element by element vector addition +#include "embb_mtapi_cuda_test_kernel.h" + +TaskTest::TaskTest() { + CreateUnit("mtapi cuda task test").Add(&TaskTest::TestBasic, this); +} + +void TaskTest::TestBasic() { + mtapi_status_t status; + mtapi_job_hndl_t job; + mtapi_task_hndl_t task; + mtapi_action_hndl_t action; + + const int kElements = 64; + float arguments[kElements * 2]; + float results[kElements]; + + for (int ii = 0; ii < kElements; ii++) { + arguments[ii] = static_cast(ii); + arguments[ii + kElements] = static_cast(ii); + } + + mtapi_cuda_plugin_initialize(&status); + if (status == MTAPI_ERR_FUNC_NOT_IMPLEMENTED) { + // CUDA unavailable + return; + } + MTAPI_CHECK_STATUS(status); + + mtapi_initialize( + CUDA_DOMAIN, + CUDA_NODE, + MTAPI_NULL, + MTAPI_NULL, + &status); + MTAPI_CHECK_STATUS(status); + + float node_local = 1.0f; + action = mtapi_cuda_action_create( + CUDA_JOB, + reinterpret_cast(imageBytes), "test", 32, 4, + &node_local, sizeof(float), + &status); + MTAPI_CHECK_STATUS(status); + + status = MTAPI_ERR_UNKNOWN; + job = mtapi_job_get(CUDA_JOB, CUDA_DOMAIN, &status); + MTAPI_CHECK_STATUS(status); + + task = mtapi_task_start( + MTAPI_TASK_ID_NONE, + job, + arguments, kElements * 2 * sizeof(float), + results, kElements*sizeof(float), + MTAPI_DEFAULT_TASK_ATTRIBUTES, + MTAPI_GROUP_NONE, + &status); + MTAPI_CHECK_STATUS(status); + + mtapi_task_wait(task, MTAPI_INFINITE, &status); + MTAPI_CHECK_STATUS(status); + + for (int ii = 0; ii < kElements; ii++) { + PT_EXPECT_EQ(results[ii], ii * 2 + 1); + } + + mtapi_action_delete(action, MTAPI_INFINITE, &status); + MTAPI_CHECK_STATUS(status); + + mtapi_finalize(&status); + MTAPI_CHECK_STATUS(status); + + mtapi_cuda_plugin_finalize(&status); + MTAPI_CHECK_STATUS(status); +} diff --git a/mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_task.h b/mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_task.h new file mode 100644 index 0000000..bc9701f --- /dev/null +++ b/mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_task.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2014, Siemens AG. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MTAPI_PLUGINS_C_MTAPI_CUDA_C_TEST_EMBB_MTAPI_CUDA_TEST_TASK_H_ +#define MTAPI_PLUGINS_C_MTAPI_CUDA_C_TEST_EMBB_MTAPI_CUDA_TEST_TASK_H_ + +#include + +class TaskTest : public partest::TestCase { + public: + TaskTest(); + + private: + void TestBasic(); +}; + +#endif // MTAPI_PLUGINS_C_MTAPI_CUDA_C_TEST_EMBB_MTAPI_CUDA_TEST_TASK_H_ diff --git a/mtapi_plugins_c/mtapi_cuda_c/test/main.cc b/mtapi_plugins_c/mtapi_cuda_c/test/main.cc new file mode 100644 index 0000000..2c5faaf --- /dev/null +++ b/mtapi_plugins_c/mtapi_cuda_c/test/main.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2014, Siemens AG. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include + + +PT_MAIN("MTAPI CUDA") { + PT_RUN(TaskTest); +} -- libgit2 0.26.0