diff --git a/CMakeLists.txt b/CMakeLists.txt index ecc3d70..c443536 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,6 +30,8 @@ set (EMBB_BASE_VERSION_MAJOR 0) set (EMBB_BASE_VERSION_MINOR 4) set (EMBB_BASE_VERSION_PATCH 0) +include(FindCUDA) + # Fix compilation for CMake versions >= 3.1 # # New Policy 0054: @@ -143,6 +145,9 @@ set(EXPECTED_EMBB_TEST_EXECUTABLES "embb_algorithms_cpp_test" if(BUILD_OPENCL_PLUGIN STREQUAL ON) list(APPEND EXPECTED_EMBB_TEST_EXECUTABLES "embb_mtapi_opencl_c_test") endif() +if(CUDA_FOUND) + list(APPEND EXPECTED_EMBB_TEST_EXECUTABLES "embb_mtapi_cuda_c_test") +endif() ## Copy test execution script to local binaries folder @@ -179,6 +184,9 @@ add_subdirectory(mtapi_plugins_c/mtapi_network_c) if(BUILD_OPENCL_PLUGIN STREQUAL ON) add_subdirectory(mtapi_plugins_c/mtapi_opencl_c) endif() +if(CUDA_FOUND) + add_subdirectory(mtapi_plugins_c/mtapi_cuda_c) +endif() add_subdirectory(mtapi_cpp) add_subdirectory(containers_cpp) add_subdirectory(algorithms_cpp) diff --git a/doc/reference/Doxyfile.in b/doc/reference/Doxyfile.in index 760d09e..e27e7f8 100644 --- a/doc/reference/Doxyfile.in +++ b/doc/reference/Doxyfile.in @@ -28,10 +28,10 @@ DOXYFILE_ENCODING = UTF-8 PROJECT_NAME = "Embedded Multicore Building Blocks V@EMBB_BASE_VERSION_MAJOR@.@EMBB_BASE_VERSION_MINOR@.@EMBB_BASE_VERSION_PATCH@" -PROJECT_NUMBER = -PROJECT_BRIEF = -PROJECT_LOGO = -OUTPUT_DIRECTORY = +PROJECT_NUMBER = +PROJECT_BRIEF = +PROJECT_LOGO = +OUTPUT_DIRECTORY = CREATE_SUBDIRS = NO OUTPUT_LANGUAGE = English BRIEF_MEMBER_DESC = YES @@ -51,7 +51,7 @@ ALWAYS_DETAILED_SEC = YES INLINE_INHERITED_MEMB = YES FULL_PATH_NAMES = YES STRIP_FROM_PATH = "@CMAKE_SOURCE_DIR@" -STRIP_FROM_INC_PATH = +STRIP_FROM_INC_PATH = SHORT_NAMES = NO JAVADOC_AUTOBRIEF = YES QT_AUTOBRIEF = NO @@ -68,12 +68,12 @@ ALIASES = "notthreadsafe=\par Concurrency\nNot thread-safe\n" \ "post=\par Postcondition\n" \ "note=\par Note\n" \ "concept{1}=@ingroup \1\n@par Implemented concepts:\n@ref \1" -TCL_SUBST = +TCL_SUBST = OPTIMIZE_OUTPUT_FOR_C = NO OPTIMIZE_OUTPUT_JAVA = NO OPTIMIZE_FOR_FORTRAN = NO OPTIMIZE_OUTPUT_VHDL = NO -EXTENSION_MAPPING = +EXTENSION_MAPPING = MARKDOWN_SUPPORT = YES AUTOLINK_SUPPORT = YES BUILTIN_STL_SUPPORT = NO @@ -119,14 +119,14 @@ GENERATE_TODOLIST = YES GENERATE_TESTLIST = YES GENERATE_BUGLIST = YES GENERATE_DEPRECATEDLIST= YES -ENABLED_SECTIONS = +ENABLED_SECTIONS = MAX_INITIALIZER_LINES = 30 SHOW_USED_FILES = NO SHOW_FILES = YES SHOW_NAMESPACES = YES -FILE_VERSION_FILTER = +FILE_VERSION_FILTER = LAYOUT_FILE = "@CMAKE_SOURCE_DIR@/doc/reference/DoxygenLayout.xml" -CITE_BIB_FILES = +CITE_BIB_FILES = # ============================================================================== # Options related to warning and progress messages @@ -138,7 +138,7 @@ WARN_IF_UNDOCUMENTED = YES WARN_IF_DOC_ERROR = YES WARN_NO_PARAMDOC = YES WARN_FORMAT = "$file:$line: $text" -WARN_LOGFILE = +WARN_LOGFILE = # ============================================================================== # Options related to input files @@ -153,7 +153,8 @@ INPUT = "@CMAKE_SOURCE_DIR@/doc/reference/embb.dox" \ "@CMAKE_SOURCE_DIR@/mtapi_c/include" \ "@CMAKE_SOURCE_DIR@/base_c/include" \ "@CMAKE_SOURCE_DIR@/mtapi_plugins_c/mtapi_opencl_c/include" \ - "@CMAKE_SOURCE_DIR@/mtapi_plugins_c/mtapi_network_c/include" + "@CMAKE_SOURCE_DIR@/mtapi_plugins_c/mtapi_network_c/include" \ + "@CMAKE_SOURCE_DIR@/mtapi_plugins_c/mtapi_cuda_c/include" INPUT_ENCODING = UTF-8 FILE_PATTERNS = *.h \ @@ -161,21 +162,21 @@ FILE_PATTERNS = *.h \ *.c \ *.dox RECURSIVE = YES -EXCLUDE = +EXCLUDE = EXCLUDE_SYMLINKS = NO EXCLUDE_PATTERNS = */test/* \ */internal/* EXCLUDE_SYMBOLS = *test* \ *::internal::* -EXAMPLE_PATH = +EXAMPLE_PATH = EXAMPLE_PATTERNS = * EXAMPLE_RECURSIVE = NO IMAGE_PATH = "@CMAKE_SOURCE_DIR@/doc" -INPUT_FILTER = -FILTER_PATTERNS = +INPUT_FILTER = +FILTER_PATTERNS = FILTER_SOURCE_FILES = NO -FILTER_SOURCE_PATTERNS = -USE_MDFILE_AS_MAINPAGE = +FILTER_SOURCE_PATTERNS = +USE_MDFILE_AS_MAINPAGE = # ============================================================================== # Options related to source browsing @@ -206,10 +207,10 @@ IGNORE_PREFIX = cm GENERATE_HTML = YES HTML_OUTPUT = html HTML_FILE_EXTENSION = .html -HTML_FOOTER = -HTML_STYLESHEET = +HTML_FOOTER = +HTML_STYLESHEET = HTML_EXTRA_STYLESHEET = "@CMAKE_SOURCE_DIR@/doc/reference/DoxygenHTMLStyle.css" -HTML_EXTRA_FILES = +HTML_EXTRA_FILES = HTML_COLORSTYLE_HUE = 220 HTML_COLORSTYLE_SAT = 100 HTML_COLORSTYLE_GAMMA = 80 @@ -222,20 +223,20 @@ DOCSET_BUNDLE_ID = org.doxygen.Project DOCSET_PUBLISHER_ID = org.doxygen.Publisher DOCSET_PUBLISHER_NAME = Publisher GENERATE_HTMLHELP = NO -CHM_FILE = -HHC_LOCATION = +CHM_FILE = +HHC_LOCATION = GENERATE_CHI = NO -CHM_INDEX_ENCODING = +CHM_INDEX_ENCODING = BINARY_TOC = NO TOC_EXPAND = NO GENERATE_QHP = NO -QCH_FILE = +QCH_FILE = QHP_NAMESPACE = org.doxygen.Project QHP_VIRTUAL_FOLDER = doc -QHP_CUST_FILTER_NAME = -QHP_CUST_FILTER_ATTRS = -QHP_SECT_FILTER_ATTRS = -QHG_LOCATION = +QHP_CUST_FILTER_NAME = +QHP_CUST_FILTER_ATTRS = +QHP_SECT_FILTER_ATTRS = +QHG_LOCATION = GENERATE_ECLIPSEHELP = NO ECLIPSE_DOC_ID = org.doxygen.Project DISABLE_INDEX = NO @@ -248,15 +249,15 @@ FORMULA_TRANSPARENT = YES USE_MATHJAX = NO MATHJAX_FORMAT = HTML-CSS MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest -MATHJAX_EXTENSIONS = -MATHJAX_CODEFILE = +MATHJAX_EXTENSIONS = +MATHJAX_CODEFILE = SEARCHENGINE = YES SERVER_BASED_SEARCH = NO EXTERNAL_SEARCH = NO -SEARCHENGINE_URL = +SEARCHENGINE_URL = SEARCHDATA_FILE = searchdata.xml -EXTERNAL_SEARCH_ID = -EXTRA_SEARCH_MAPPINGS = +EXTERNAL_SEARCH_ID = +EXTRA_SEARCH_MAPPINGS = # ============================================================================== # Options related to LaTeX output @@ -268,10 +269,10 @@ LATEX_CMD_NAME = latex MAKEINDEX_CMD_NAME = makeindex COMPACT_LATEX = NO PAPER_TYPE = a4 -EXTRA_PACKAGES = -LATEX_HEADER = -LATEX_FOOTER = -LATEX_EXTRA_FILES = +EXTRA_PACKAGES = +LATEX_HEADER = +LATEX_FOOTER = +LATEX_EXTRA_FILES = PDF_HYPERLINKS = YES USE_PDFLATEX = YES LATEX_BATCHMODE = NO @@ -287,8 +288,8 @@ GENERATE_RTF = NO RTF_OUTPUT = rtf COMPACT_RTF = NO RTF_HYPERLINKS = NO -RTF_STYLESHEET_FILE = -RTF_EXTENSIONS_FILE = +RTF_STYLESHEET_FILE = +RTF_EXTENSIONS_FILE = # ============================================================================== # Options related to man page output @@ -327,7 +328,7 @@ GENERATE_AUTOGEN_DEF = NO GENERATE_PERLMOD = NO PERLMOD_LATEX = NO PERLMOD_PRETTY = YES -PERLMOD_MAKEVAR_PREFIX = +PERLMOD_MAKEVAR_PREFIX = # ============================================================================== # Options related to preprocessor @@ -337,19 +338,19 @@ ENABLE_PREPROCESSING = YES MACRO_EXPANSION = YES EXPAND_ONLY_PREDEF = NO SEARCH_INCLUDES = YES -INCLUDE_PATH = -INCLUDE_FILE_PATTERNS = +INCLUDE_PATH = +INCLUDE_FILE_PATTERNS = PREDEFINED = protected=private \ DOXYGEN -EXPAND_AS_DEFINED = +EXPAND_AS_DEFINED = SKIP_FUNCTION_MACROS = YES # ============================================================================== # Options related to external references # ============================================================================== -TAGFILES = -GENERATE_TAGFILE = +TAGFILES = +GENERATE_TAGFILE = ALLEXTERNALS = NO EXTERNAL_GROUPS = YES EXTERNAL_PAGES = NO @@ -360,14 +361,14 @@ PERL_PATH = /usr/bin/perl # ============================================================================== CLASS_DIAGRAMS = NO -MSCGEN_PATH = -DIA_PATH = +MSCGEN_PATH = +DIA_PATH = HIDE_UNDOC_RELATIONS = YES HAVE_DOT = NO DOT_NUM_THREADS = 0 DOT_FONTNAME = Helvetica DOT_FONTSIZE = 10 -DOT_FONTPATH = +DOT_FONTPATH = CLASS_GRAPH = NO COLLABORATION_GRAPH = NO GROUP_GRAPHS = YES @@ -383,9 +384,9 @@ DIRECTORY_GRAPH = YES DOT_IMAGE_FORMAT = png INTERACTIVE_SVG = NO DOT_PATH = @DOT_PATH@ -DOTFILE_DIRS = -MSCFILE_DIRS = -DIAFILE_DIRS = +DOTFILE_DIRS = +MSCFILE_DIRS = +DIAFILE_DIRS = DOT_GRAPH_MAX_NODES = 50 MAX_DOT_GRAPH_DEPTH = 0 DOT_TRANSPARENT = NO diff --git a/mtapi_plugins_c/mtapi_cuda_c/CMakeLists.txt b/mtapi_plugins_c/mtapi_cuda_c/CMakeLists.txt new file mode 100644 index 0000000..03656b8 --- /dev/null +++ b/mtapi_plugins_c/mtapi_cuda_c/CMakeLists.txt @@ -0,0 +1,71 @@ +project (project_embb_mtapi_cuda_c) + +file(GLOB_RECURSE EMBB_MTAPI_CUDA_C_SOURCES "src/*.c" "src/*.h") +file(GLOB_RECURSE EMBB_MTAPI_CUDA_C_HEADERS "include/*.h") + +file(GLOB_RECURSE EMBB_MTAPI_CUDA_TEST_SOURCES "test/*.cc" "test/*.cu" "test/*.h") + +IF(MSVC8 OR MSVC9 OR MSVC10 OR MSVC11) +FOREACH(src_tmp ${EMBB_MTAPI_CUDA_TEST_SOURCES}) + SET_PROPERTY(SOURCE ${src_tmp} PROPERTY LANGUAGE CXX) +ENDFOREACH(src_tmp) +FOREACH(src_tmp ${EMBB_MTAPI_CUDA_C_SOURCES}) + SET_PROPERTY(SOURCE ${src_tmp} PROPERTY LANGUAGE CXX) +ENDFOREACH(src_tmp) +ENDIF() + +IF(CMAKE_COMPILER_IS_GNUCC) + set (EMBB_MTAPI_CUDA_C_LIBS dl) +ENDIF() + +# Execute the GroupSources macro +include(${CMAKE_SOURCE_DIR}/CMakeCommon/GroupSourcesMSVC.cmake) +GroupSourcesMSVC(include) +GroupSourcesMSVC(src) +GroupSourcesMSVC(test) + +set (EMBB_MTAPI_CUDA_INCLUDE_DIRS "include" "src" "test") +include_directories(${EMBB_MTAPI_CUDA_INCLUDE_DIRS} + ${CUDA_TOOLKIT_INCLUDE} + ${CMAKE_CURRENT_SOURCE_DIR}/../../base_c/include + ${CMAKE_CURRENT_BINARY_DIR}/../../base_c/include + ${CMAKE_CURRENT_SOURCE_DIR}/../../mtapi_c/include + ${CMAKE_CURRENT_SOURCE_DIR}/../../mtapi_c/src + ) + +add_library(embb_mtapi_cuda_c ${EMBB_MTAPI_CUDA_C_SOURCES} ${EMBB_MTAPI_CUDA_C_HEADERS}) +target_link_libraries(embb_mtapi_cuda_c embb_mtapi_c embb_base_c) + +if (BUILD_TESTS STREQUAL ON) + add_custom_command( + DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/test/embb_mtapi_cuda_test_kernel.cu" + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/embb_mtapi_cuda_test_kernel.ptx" + COMMAND ${CUDA_NVCC_EXECUTABLE} -ptx -m 32 + "${CMAKE_CURRENT_SOURCE_DIR}/test/embb_mtapi_cuda_test_kernel.cu" + -o "${CMAKE_CURRENT_BINARY_DIR}/embb_mtapi_cuda_test_kernel.ptx" + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + ) + add_custom_command( + DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/embb_mtapi_cuda_test_kernel.ptx" + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/embb_mtapi_cuda_test_kernel.h" + COMMAND ${CUDA_TOOLKIT_ROOT_DIR}/bin/bin2c -p 0 + "${CMAKE_CURRENT_BINARY_DIR}/embb_mtapi_cuda_test_kernel.ptx" > + "${CMAKE_CURRENT_BINARY_DIR}/embb_mtapi_cuda_test_kernel.h" + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + ) + include_directories( + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_BINARY_DIR}/../../partest/include) + add_executable (embb_mtapi_cuda_c_test ${EMBB_MTAPI_CUDA_TEST_SOURCES} "${CMAKE_CURRENT_BINARY_DIR}/embb_mtapi_cuda_test_kernel.h") + target_link_libraries(embb_mtapi_cuda_c_test embb_mtapi_cuda_c embb_mtapi_c partest embb_base_c ${compiler_libs} ${EMBB_MTAPI_CUDA_C_LIBS} ${CUDA_CUDA_LIBRARY}) + CopyBin(BIN embb_mtapi_cuda_c_test DEST ${local_install_dir}) +endif() + +install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/ + DESTINATION include FILES_MATCHING PATTERN "*.h") +install(TARGETS embb_mtapi_cuda_c DESTINATION lib) +if (MSVC) + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/Debug/embb_mtapi_cuda_c.pdb + DESTINATION lib + CONFIGURATIONS Debug) +endif() diff --git a/mtapi_plugins_c/mtapi_cuda_c/include/embb/mtapi/c/mtapi_cuda.h b/mtapi_plugins_c/mtapi_cuda_c/include/embb/mtapi/c/mtapi_cuda.h new file mode 100644 index 0000000..a8d5377 --- /dev/null +++ b/mtapi_plugins_c/mtapi_cuda_c/include/embb/mtapi/c/mtapi_cuda.h @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2014, Siemens AG. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef EMBB_MTAPI_C_MTAPI_CUDA_H_ +#define EMBB_MTAPI_C_MTAPI_CUDA_H_ + + +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * \defgroup C_MTAPI_CUDA MTAPI CUDA Plugin + * + * \ingroup C_MTAPI_EXT + * + * Provides functionality to execute tasks on CUDA devices. + */ + + +/** + * Initializes the MTAPI CUDA environment on a previously initialized MTAPI + * node. + * + * It must be called on all nodes using the MTAPI CUDA plugin. + * + * Application software using MTAPI CUDA must call + * mtapi_cuda_plugin_initialize() once per node. It is an error to call + * mtapi_cuda_plugin_initialize() multiple times + * from a given node, unless mtapi_cuda_plugin_finalize() is called in + * between. + * + * On success, \c *status is set to \c MTAPI_SUCCESS. On error, \c *status is + * set to the appropriate error defined below. + * Error code | Description + * --------------------------- | ---------------------------------------------- + * \c MTAPI_ERR_UNKNOWN | MTAPI CUDA couldn't be initialized. + * + * \see mtapi_cuda_plugin_finalize() + * + * \notthreadsafe + * \ingroup C_MTAPI_CUDA + */ +void mtapi_cuda_plugin_initialize( + MTAPI_OUT mtapi_status_t* status /**< [out] Pointer to error code, + may be \c MTAPI_NULL */ +); + +/** + * Finalizes the MTAPI CUDA environment on the local MTAPI node. + * + * It has to be called by each node using MTAPI CUDA. It is an error to call + * mtapi_cuda_plugin_finalize() without first calling + * mtapi_cuda_plugin_initialize(). An MTAPI node can call + * mtapi_cuda_plugin_finalize() once for each call to + * mtapi_cuda_plugin_initialize(), but it is an error to call + * mtapi_cuda_plugin_finalize() multiple times from a given node + * unless mtapi_cuda_plugin_initialize() has been called prior to each + * mtapi_cuda_plugin_finalize() call. + * + * All CUDA tasks that have not completed and that have been started on the + * node where mtapi_cuda_plugin_finalize() is called will be canceled + * (see mtapi_task_cancel()). mtapi_cuda_plugin_finalize() blocks until all + * tasks that have been started on the same node return. Tasks that execute + * actions on the node where mtapi_cuda_plugin_finalize() is called, also + * block finalization of the MTAPI CUDA system on that node. + * + * On success, \c *status is set to \c MTAPI_SUCCESS. On error, \c *status is + * set to the appropriate error defined below. + * Error code | Description + * ----------------------------- | -------------------------------------------- + * \c MTAPI_ERR_UNKNOWN | MTAPI CUDA couldn't be finalized. + * + * \see mtapi_cuda_plugin_initialize(), mtapi_task_cancel() + * + * \notthreadsafe + * \ingroup C_MTAPI_CUDA + */ +void mtapi_cuda_plugin_finalize( + MTAPI_OUT mtapi_status_t* status /**< [out] Pointer to error code, + may be \c MTAPI_NULL */ +); + +/** + * This function creates a CUDA action. + * + * It is called on the node where the user wants to execute an action on an + * CUDA device. A CUDA action contains a reference to a local job, the + * kernel source to compile and execute on the CUDA device, the name of the + * kernel function, a local work size (see CUDA specification for details) + * and the size of one element in the result buffer. + * After a CUDA action is created, it is referenced by the application using + * a node-local handle of type \c mtapi_action_hndl_t, or indirectly through a + * node-local job handle of type \c mtapi_job_hndl_t. A CUDA action's + * life-cycle begins with mtapi_cuda_action_create(), and ends when + * mtapi_action_delete() or mtapi_finalize() is called. + * + * To create an action, the application must supply the domain-wide job ID of + * the job associated with the action. Job IDs must be predefined in the + * application and runtime, of type \c mtapi_job_id_t, which is an + * implementation-defined type. The job ID is unique in the sense that it is + * unique for the job implemented by the action. However several actions may + * implement the same job for load balancing purposes. + * + * If \c node_local_data_size is not zero, \c node_local_data specifies the + * start of node local data shared by kernel functions executed on the same + * node. \c node_local_data_size can be used by the runtime for cache coherency + * operations. + * + * On success, an action handle is returned and \c *status is set to + * \c MTAPI_SUCCESS. On error, \c *status is set to the appropriate error + * defined below. In the case where the action already exists, \c status will + * be set to \c MTAPI_ERR_ACTION_EXISTS and the handle returned will not be a + * valid handle. + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Error codeDescription
\c MTAPI_ERR_JOB_INVALIDThe \c job_id is not a valid job ID, i.e., no action was created for + * that ID or the action has been deleted.
\c MTAPI_ERR_ACTION_EXISTSThis action is already created.
\c MTAPI_ERR_ACTION_LIMITExceeded maximum number of actions allowed.
\c MTAPI_ERR_NODE_NOTINITThe calling node is not initialized.
\c MTAPI_ERR_UNKNOWNThe kernel could not be compiled or no CUDA device was + * available.
+ * + * \see mtapi_action_delete(), mtapi_finalize() + * + * \returns Handle to newly created CUDA action, invalid handle on error + * \threadsafe + * \ingroup C_MTAPI_CUDA + */ +mtapi_action_hndl_t mtapi_cuda_action_create( + MTAPI_IN mtapi_job_id_t job_id, /**< [in] Job id */ + MTAPI_IN char* kernel_source, /**< [in] Pointer to kernel source */ + MTAPI_IN char* kernel_name, /**< [in] Name of the kernel function */ + MTAPI_IN mtapi_size_t local_work_size, + /**< [in] Size of local work group */ + MTAPI_IN mtapi_size_t element_size, /**< [in] Size of one element in the + result buffer */ + MTAPI_IN void* node_local_data, /**< [in] Data shared across tasks */ + MTAPI_IN mtapi_size_t node_local_data_size, + /**< [in] Size of shared data */ + MTAPI_OUT mtapi_status_t* status /**< [out] Pointer to error code, + may be \c MTAPI_NULL */ +); + + +#ifdef __cplusplus +} +#endif + + +#endif // EMBB_MTAPI_C_MTAPI_CUDA_H_ diff --git a/mtapi_plugins_c/mtapi_cuda_c/src/embb_mtapi_cuda.c b/mtapi_plugins_c/mtapi_cuda_c/src/embb_mtapi_cuda.c new file mode 100644 index 0000000..ce34e03 --- /dev/null +++ b/mtapi_plugins_c/mtapi_cuda_c/src/embb_mtapi_cuda.c @@ -0,0 +1,370 @@ +/* + * Copyright (c) 2014, Siemens AG. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include +#include + +struct embb_mtapi_cuda_plugin_struct { + CUdevice device; + CUcontext context; + CUstream stream; + int work_group_size; +}; + +typedef struct embb_mtapi_cuda_plugin_struct embb_mtapi_cuda_plugin_t; + +static embb_mtapi_cuda_plugin_t embb_mtapi_cuda_plugin; + +struct embb_mtapi_cuda_action_struct { + CUmodule module; + CUfunction function; + CUdeviceptr node_local_data; + int node_local_data_size; + size_t local_work_size; + size_t element_size; +}; + +typedef struct embb_mtapi_cuda_action_struct embb_mtapi_cuda_action_t; + +struct embb_mtapi_cuda_task_struct { + CUdeviceptr arguments; + int arguments_size; + CUdeviceptr result_buffer; + int result_buffer_size; + mtapi_task_hndl_t task; +}; + +typedef struct embb_mtapi_cuda_task_struct embb_mtapi_cuda_task_t; + +static size_t round_up(size_t group_size, size_t global_size) { + size_t r = global_size % group_size; + if (r == 0) { + return global_size; + } else { + return global_size + group_size - r; + } +} + +static void CUDA_CB cuda_task_complete( + CUstream stream, CUresult status, void *data) { + EMBB_UNUSED(stream); + EMBB_UNUSED(status); + CUresult err; + EMBB_UNUSED_IN_RELEASE(err); + embb_mtapi_cuda_task_t * cuda_task = (embb_mtapi_cuda_task_t*)data; + + if (embb_mtapi_node_is_initialized()) { + embb_mtapi_node_t * node = embb_mtapi_node_get_instance(); + + if (embb_mtapi_task_pool_is_handle_valid( + node->task_pool, cuda_task->task)) { + embb_mtapi_task_t * local_task = + embb_mtapi_task_pool_get_storage_for_handle( + node->task_pool, cuda_task->task); + + if (0 != cuda_task->result_buffer) { + err = cuMemFree_v2(cuda_task->result_buffer); + assert(CUDA_SUCCESS == err); + } + if (0 != cuda_task->arguments) { + err = cuMemFree_v2(cuda_task->arguments); + assert(CUDA_SUCCESS == err); + } + + embb_free(cuda_task); + + embb_mtapi_task_set_state(local_task, MTAPI_TASK_COMPLETED); + } + } +} + +static void cuda_task_start( + MTAPI_IN mtapi_task_hndl_t task, + MTAPI_OUT mtapi_status_t* status) { + mtapi_status_t local_status = MTAPI_ERR_UNKNOWN; + CUresult err = CUDA_SUCCESS; + embb_mtapi_cuda_plugin_t * plugin = &embb_mtapi_cuda_plugin; + + if (embb_mtapi_node_is_initialized()) { + embb_mtapi_node_t * node = embb_mtapi_node_get_instance(); + + if (embb_mtapi_task_pool_is_handle_valid(node->task_pool, task)) { + embb_mtapi_task_t * local_task = + embb_mtapi_task_pool_get_storage_for_handle(node->task_pool, task); + + if (embb_mtapi_action_pool_is_handle_valid( + node->action_pool, local_task->action)) { + embb_mtapi_action_t * local_action = + embb_mtapi_action_pool_get_storage_for_handle( + node->action_pool, local_task->action); + + embb_mtapi_cuda_action_t * cuda_action = + (embb_mtapi_cuda_action_t*)local_action->plugin_data; + embb_mtapi_cuda_task_t * cuda_task = + (embb_mtapi_cuda_task_t*)embb_alloc( + sizeof(embb_mtapi_cuda_task_t)); + + size_t elements = local_task->result_size / + cuda_action->element_size; + size_t global_work_size; + + if (0 == elements) + elements = 1; + global_work_size = + round_up(cuda_action->local_work_size, elements); + + cuda_task->task = task; + + cuda_task->arguments_size = (int)local_task->arguments_size; + if (0 < local_task->arguments_size) { + err = cuMemAlloc_v2(&cuda_task->arguments, + local_task->arguments_size); + } else { + cuda_task->arguments = 0; + } + cuda_task->result_buffer_size = (int)local_task->result_size; + if (0 < local_task->result_size) { + err = cuMemAlloc_v2(&cuda_task->result_buffer, + local_task->result_size); + } else { + cuda_task->result_buffer = 0; + } + + if (0 != cuda_task->arguments) { + err = cuMemcpyHtoDAsync_v2(cuda_task->arguments, + local_task->arguments, (size_t)cuda_task->arguments_size, + plugin->stream); + } + + if (CUDA_SUCCESS == err) { + embb_mtapi_task_set_state(local_task, MTAPI_TASK_RUNNING); + void * args[6]; + args[0] = &cuda_task->arguments; + args[1] = &cuda_task->arguments_size; + args[2] = &cuda_task->result_buffer; + args[3] = &cuda_task->result_buffer_size; + args[4] = &cuda_action->node_local_data; + args[5] = &cuda_action->node_local_data_size; + + err = cuLaunchKernel(cuda_action->function, + global_work_size, 1, 1, + cuda_action->local_work_size, 1, 1, + 1024, plugin->stream, args, NULL); + + if (CUDA_SUCCESS == err) { + if (0 != cuda_task->result_buffer) { + err = cuMemcpyDtoHAsync_v2(local_task->result_buffer, + cuda_task->result_buffer, cuda_task->result_buffer_size, + plugin->stream); + } + + err = cuStreamAddCallback(plugin->stream, cuda_task_complete, + cuda_task, 0); + } + } + + if (CUDA_SUCCESS != err) { + embb_mtapi_task_set_state(local_task, MTAPI_TASK_ERROR); + local_status = MTAPI_ERR_ACTION_FAILED; + } else { + local_status = MTAPI_SUCCESS; + } + } + } + } + + mtapi_status_set(status, local_status); +} + +static void cuda_task_cancel( + MTAPI_IN mtapi_task_hndl_t task, + MTAPI_OUT mtapi_status_t* status + ) { + mtapi_status_t local_status = MTAPI_ERR_UNKNOWN; + + EMBB_UNUSED(task); + + mtapi_status_set(status, local_status); +} + +static void cuda_action_finalize( + MTAPI_IN mtapi_action_hndl_t action, + MTAPI_OUT mtapi_status_t* status + ) { + mtapi_status_t local_status = MTAPI_ERR_UNKNOWN; + CUresult err; + EMBB_UNUSED_IN_RELEASE(err); + + if (embb_mtapi_node_is_initialized()) { + embb_mtapi_node_t * node = embb_mtapi_node_get_instance(); + if (embb_mtapi_action_pool_is_handle_valid(node->action_pool, action)) { + embb_mtapi_action_t * local_action = + embb_mtapi_action_pool_get_storage_for_handle( + node->action_pool, action); + embb_mtapi_cuda_action_t * cuda_action = + (embb_mtapi_cuda_action_t *)local_action->plugin_data; + if (0 != cuda_action->node_local_data) { + err = cuMemFree_v2(cuda_action->node_local_data); + assert(CUDA_SUCCESS == err); + } + + err = cuModuleUnload(cuda_action->module); + assert(CUDA_SUCCESS == err); + + embb_free(cuda_action); + local_status = MTAPI_SUCCESS; + } + } + + mtapi_status_set(status, local_status); +} + +char buffer[1024]; + +void mtapi_cuda_plugin_initialize( + MTAPI_OUT mtapi_status_t* status) { + mtapi_status_t local_status = MTAPI_ERR_UNKNOWN; + CUresult err; + embb_mtapi_cuda_plugin_t * plugin = &embb_mtapi_cuda_plugin; + + mtapi_status_set(status, MTAPI_ERR_UNKNOWN); + + err = cuInit(0); + if (CUDA_SUCCESS != err) return; + err = cuDeviceGet(&plugin->device, 0); + if (CUDA_SUCCESS != err) return; + err = cuCtxCreate_v2(&plugin->context, 0, plugin->device); + if (CUDA_SUCCESS != err) return; + cuDeviceGetAttribute(&plugin->work_group_size, + CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, plugin->device); + err = cuStreamCreate(&plugin->stream, CU_STREAM_NON_BLOCKING); + if (CUDA_SUCCESS != err) return; + local_status = MTAPI_SUCCESS; + + mtapi_status_set(status, local_status); +} + +void mtapi_cuda_plugin_finalize( + MTAPI_OUT mtapi_status_t* status) { + mtapi_status_t local_status = MTAPI_ERR_UNKNOWN; + + CUresult err; + EMBB_UNUSED_IN_RELEASE(err); + embb_mtapi_cuda_plugin_t * plugin = &embb_mtapi_cuda_plugin; + + /* finalization */ + err = cuStreamDestroy_v2(plugin->stream); + assert(CUDA_SUCCESS == err); + err = cuCtxDestroy_v2(plugin->context); + assert(CUDA_SUCCESS == err); + + local_status = MTAPI_SUCCESS; + mtapi_status_set(status, local_status); +} + +mtapi_action_hndl_t mtapi_cuda_action_create( + MTAPI_IN mtapi_job_id_t job_id, + MTAPI_IN char* kernel_source, + MTAPI_IN char* kernel_name, + MTAPI_IN mtapi_size_t local_work_size, + MTAPI_IN mtapi_size_t element_size, + MTAPI_IN void* node_local_data, + MTAPI_IN mtapi_size_t node_local_data_size, + MTAPI_OUT mtapi_status_t* status) { + mtapi_status_t local_status = MTAPI_ERR_UNKNOWN; + mtapi_status_set(status, MTAPI_ERR_UNKNOWN); + + CUresult err; + embb_mtapi_cuda_action_t * action = + (embb_mtapi_cuda_action_t*)embb_alloc( + sizeof(embb_mtapi_cuda_action_t)); + mtapi_action_hndl_t action_hndl = { 0, 0 }; // invalid handle + mtapi_boolean_t free_module_on_error = MTAPI_FALSE; + mtapi_boolean_t free_node_local_data_on_error = MTAPI_FALSE; + + action->local_work_size = local_work_size; + action->element_size = element_size; + + /* initialization */ + err = cuModuleLoadData(&action->module, kernel_source); + if (CUDA_SUCCESS == err) { + free_module_on_error = MTAPI_TRUE; + err = cuModuleGetFunction(&action->function, action->module, kernel_name); + } + if (CUDA_SUCCESS == err) { + if (0 < node_local_data_size) { + err = cuMemAlloc_v2(&action->node_local_data, node_local_data_size); + if (CUDA_SUCCESS == err) { + free_node_local_data_on_error = MTAPI_TRUE; + } + action->node_local_data_size = (int)node_local_data_size; + if (CUDA_SUCCESS == err) { + err = cuMemcpyHtoD_v2( + action->node_local_data, node_local_data, node_local_data_size); + } + } else { + action->node_local_data = 0; + action->node_local_data_size = 0; + } + } + + if (CUDA_SUCCESS == err) { + action_hndl = mtapi_ext_plugin_action_create( + job_id, + cuda_task_start, + cuda_task_cancel, + cuda_action_finalize, + action, + node_local_data, + node_local_data_size, + MTAPI_NULL, + &local_status); + } else { + if (free_node_local_data_on_error) { + cuMemFree_v2(action->node_local_data); + } + if (free_module_on_error) { + cuModuleUnload(action->module); + } + embb_free(action); + } + + mtapi_status_set(status, local_status); + + return action_hndl; +} diff --git a/mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_kernel.cu b/mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_kernel.cu new file mode 100644 index 0000000..c46828b --- /dev/null +++ b/mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_kernel.cu @@ -0,0 +1,17 @@ +extern "C" __global__ void test( + void* arguments, + int arguments_size, + void* result_buffer, + int result_buffer_size, + void* node_local_data, + int node_local_data_size) { + int ii = blockDim.x * blockIdx.x + threadIdx.x; + int elements = arguments_size / sizeof(float) / 2; + if (ii >= elements) + return; + float* a = (float*)arguments; + float* b = ((float*)arguments) + elements; + float* c = (float*)result_buffer; + float* d = (float*)node_local_data; + c[ii] = a[ii] + b[ii] + d[0]; +} diff --git a/mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_task.cc b/mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_task.cc new file mode 100644 index 0000000..9c191b4 --- /dev/null +++ b/mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_task.cc @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2014, Siemens AG. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include + +#define MTAPI_CHECK_STATUS(status) \ +PT_ASSERT(MTAPI_SUCCESS == status) + +#define CUDA_DOMAIN 1 +#define CUDA_NODE 2 +#define CUDA_JOB 2 + +// CUDA Kernel Function for element by element vector addition +#include "embb_mtapi_cuda_test_kernel.h" + +TaskTest::TaskTest() { + CreateUnit("mtapi cuda task test").Add(&TaskTest::TestBasic, this); +} + +void TaskTest::TestBasic() { + mtapi_status_t status; + mtapi_job_hndl_t job; + mtapi_task_hndl_t task; + mtapi_action_hndl_t action; + + const int kElements = 64; + float arguments[kElements * 2]; + float results[kElements]; + + for (int ii = 0; ii < kElements; ii++) { + arguments[ii] = static_cast(ii); + arguments[ii + kElements] = static_cast(ii); + } + + mtapi_cuda_plugin_initialize(&status); + if (status == MTAPI_ERR_FUNC_NOT_IMPLEMENTED) { + // CUDA unavailable + return; + } + MTAPI_CHECK_STATUS(status); + + mtapi_initialize( + CUDA_DOMAIN, + CUDA_NODE, + MTAPI_NULL, + MTAPI_NULL, + &status); + MTAPI_CHECK_STATUS(status); + + float node_local = 1.0f; + action = mtapi_cuda_action_create( + CUDA_JOB, + reinterpret_cast(imageBytes), "test", 32, 4, + &node_local, sizeof(float), + &status); + MTAPI_CHECK_STATUS(status); + + status = MTAPI_ERR_UNKNOWN; + job = mtapi_job_get(CUDA_JOB, CUDA_DOMAIN, &status); + MTAPI_CHECK_STATUS(status); + + task = mtapi_task_start( + MTAPI_TASK_ID_NONE, + job, + arguments, kElements * 2 * sizeof(float), + results, kElements*sizeof(float), + MTAPI_DEFAULT_TASK_ATTRIBUTES, + MTAPI_GROUP_NONE, + &status); + MTAPI_CHECK_STATUS(status); + + mtapi_task_wait(task, MTAPI_INFINITE, &status); + MTAPI_CHECK_STATUS(status); + + for (int ii = 0; ii < kElements; ii++) { + PT_EXPECT_EQ(results[ii], ii * 2 + 1); + } + + mtapi_action_delete(action, MTAPI_INFINITE, &status); + MTAPI_CHECK_STATUS(status); + + mtapi_finalize(&status); + MTAPI_CHECK_STATUS(status); + + mtapi_cuda_plugin_finalize(&status); + MTAPI_CHECK_STATUS(status); +} diff --git a/mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_task.h b/mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_task.h new file mode 100644 index 0000000..bc9701f --- /dev/null +++ b/mtapi_plugins_c/mtapi_cuda_c/test/embb_mtapi_cuda_test_task.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2014, Siemens AG. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MTAPI_PLUGINS_C_MTAPI_CUDA_C_TEST_EMBB_MTAPI_CUDA_TEST_TASK_H_ +#define MTAPI_PLUGINS_C_MTAPI_CUDA_C_TEST_EMBB_MTAPI_CUDA_TEST_TASK_H_ + +#include + +class TaskTest : public partest::TestCase { + public: + TaskTest(); + + private: + void TestBasic(); +}; + +#endif // MTAPI_PLUGINS_C_MTAPI_CUDA_C_TEST_EMBB_MTAPI_CUDA_TEST_TASK_H_ diff --git a/mtapi_plugins_c/mtapi_cuda_c/test/main.cc b/mtapi_plugins_c/mtapi_cuda_c/test/main.cc new file mode 100644 index 0000000..2c5faaf --- /dev/null +++ b/mtapi_plugins_c/mtapi_cuda_c/test/main.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2014, Siemens AG. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include + + +PT_MAIN("MTAPI CUDA") { + PT_RUN(TaskTest); +} diff --git a/mtapi_plugins_c/mtapi_opencl_c/include/embb/mtapi/c/mtapi_opencl.h b/mtapi_plugins_c/mtapi_opencl_c/include/embb/mtapi/c/mtapi_opencl.h index 185bb09..5c51386 100644 --- a/mtapi_plugins_c/mtapi_opencl_c/include/embb/mtapi/c/mtapi_opencl.h +++ b/mtapi_plugins_c/mtapi_opencl_c/include/embb/mtapi/c/mtapi_opencl.h @@ -51,7 +51,7 @@ extern "C" { * * It must be called on all nodes using the MTAPI OpenCL plugin. * - * Application software using MTAPI network must call + * Application software using MTAPI OpenCL must call * mtapi_opencl_plugin_initialize() once per node. It is an error to call * mtapi_opencl_plugin_initialize() multiple times * from a given node, unless mtapi_opencl_plugin_finalize() is called in @@ -85,7 +85,7 @@ void mtapi_opencl_plugin_initialize( * unless mtapi_opencl_plugin_initialize() has been called prior to each * mtapi_opencl_plugin_finalize() call. * - * All network tasks that have not completed and that have been started on the + * All OpenCL tasks that have not completed and that have been started on the * node where mtapi_opencl_plugin_finalize() is called will be canceled * (see mtapi_task_cancel()). mtapi_opencl_plugin_finalize() blocks until all * tasks that have been started on the same node return. Tasks that execute diff --git a/scripts/run_cpplint.sh b/scripts/run_cpplint.sh index c9d633b..3612f22 100755 --- a/scripts/run_cpplint.sh +++ b/scripts/run_cpplint.sh @@ -79,7 +79,7 @@ retval=0 ##Excluded files RAND_FILES=( embb_mtapi_test_group.cc embb_mtapi_test_queue.cc embb_mtapi_test_task.cc queue_test-inl.h ) -for project in base_c mtapi_c mtapi_plugins_c/mtapi_network_c mtapi_plugins_c/mtapi_opencl_c base_cpp mtapi_cpp algorithms_cpp containers_cpp dataflow_cpp +for project in base_c mtapi_c mtapi_plugins_c/mtapi_network_c mtapi_plugins_c/mtapi_opencl_c mtapi_plugins_c/mtapi_cuda_c base_cpp mtapi_cpp algorithms_cpp containers_cpp dataflow_cpp do echo "-> Doing project: $project" dir=$d/$project