mtapi_cuda.h 7.72 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
/*
 * Copyright (c) 2014, Siemens AG. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 * this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 * this list of conditions and the following disclaimer in the documentation
 * and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef EMBB_MTAPI_C_MTAPI_CUDA_H_
#define EMBB_MTAPI_C_MTAPI_CUDA_H_


#include <embb/mtapi/c/mtapi_ext.h>


#ifdef __cplusplus
extern "C" {
#endif


/**
 * \defgroup C_MTAPI_CUDA MTAPI CUDA Plugin
 *
 * \ingroup C_MTAPI_EXT
 *
 * Provides functionality to execute tasks on CUDA devices.
 */


/**
 * Initializes the MTAPI CUDA environment on a previously initialized MTAPI
 * node.
 *
 * It must be called on all nodes using the MTAPI CUDA plugin.
 *
54
 * Application software using MTAPI CUDA must call
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
 * mtapi_cuda_plugin_initialize() once per node. It is an error to call
 * mtapi_cuda_plugin_initialize() multiple times
 * from a given node, unless mtapi_cuda_plugin_finalize() is called in
 * between.
 *
 * On success, \c *status is set to \c MTAPI_SUCCESS. On error, \c *status is
 * set to the appropriate error defined below.
 * Error code                  | Description
 * --------------------------- | ----------------------------------------------
 * \c MTAPI_ERR_UNKNOWN        | MTAPI CUDA couldn't be initialized.
 *
 * \see mtapi_cuda_plugin_finalize()
 *
 * \notthreadsafe
 * \ingroup C_MTAPI_CUDA
 */
void mtapi_cuda_plugin_initialize(
  MTAPI_OUT mtapi_status_t* status     /**< [out] Pointer to error code,
                                            may be \c MTAPI_NULL */
);

/**
 * Finalizes the MTAPI CUDA environment on the local MTAPI node.
 *
 * It has to be called by each node using MTAPI CUDA. It is an error to call
 * mtapi_cuda_plugin_finalize() without first calling
 * mtapi_cuda_plugin_initialize(). An MTAPI node can call
 * mtapi_cuda_plugin_finalize() once for each call to
 * mtapi_cuda_plugin_initialize(), but it is an error to call
 * mtapi_cuda_plugin_finalize() multiple times from a given node
 * unless mtapi_cuda_plugin_initialize() has been called prior to each
 * mtapi_cuda_plugin_finalize() call.
 *
88
 * All CUDA tasks that have not completed and that have been started on the
89
 * node where mtapi_cuda_plugin_finalize() is called will be canceled
90
 * (see mtapi_task_cancel()). mtapi_cuda_plugin_finalize() blocks until all
91
 * tasks that have been started on the same node return. Tasks that execute
92
 * actions on the node where mtapi_cuda_plugin_finalize() is called, also
93 94 95 96 97 98 99 100
 * block finalization of the MTAPI CUDA system on that node.
 *
 * On success, \c *status is set to \c MTAPI_SUCCESS. On error, \c *status is
 * set to the appropriate error defined below.
 * Error code                    | Description
 * ----------------------------- | --------------------------------------------
 * \c MTAPI_ERR_UNKNOWN          | MTAPI CUDA couldn't be finalized.
 *
101
 * \see mtapi_cuda_plugin_initialize(), mtapi_task_cancel()
102 103 104 105 106 107 108 109 110 111
 *
 * \notthreadsafe
 * \ingroup C_MTAPI_CUDA
 */
void mtapi_cuda_plugin_finalize(
  MTAPI_OUT mtapi_status_t* status     /**< [out] Pointer to error code,
                                            may be \c MTAPI_NULL */
);

/**
112
 * This function creates a CUDA action.
113 114 115 116 117 118
 *
 * It is called on the node where the user wants to execute an action on an
 * CUDA device. A CUDA action contains a reference to a local job, the
 * kernel source to compile and execute on the CUDA device, the name of the
 * kernel function, a local work size (see CUDA specification for details)
 * and the size of one element in the result buffer.
119
 * After a CUDA action is created, it is referenced by the application using
120
 * a node-local handle of type \c mtapi_action_hndl_t, or indirectly through a
121
 * node-local job handle of type \c mtapi_job_hndl_t. A CUDA action's
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
 * life-cycle begins with mtapi_cuda_action_create(), and ends when
 * mtapi_action_delete() or mtapi_finalize() is called.
 *
 * To create an action, the application must supply the domain-wide job ID of
 * the job associated with the action. Job IDs must be predefined in the
 * application and runtime, of type \c mtapi_job_id_t, which is an
 * implementation-defined type. The job ID is unique in the sense that it is
 * unique for the job implemented by the action. However several actions may
 * implement the same job for load balancing purposes.
 *
 * If \c node_local_data_size is not zero, \c node_local_data specifies the
 * start of node local data shared by kernel functions executed on the same
 * node. \c node_local_data_size can be used by the runtime for cache coherency
 * operations.
 *
 * On success, an action handle is returned and \c *status is set to
 * \c MTAPI_SUCCESS. On error, \c *status is set to the appropriate error
 * defined below. In the case where the action already exists, \c status will
 * be set to \c MTAPI_ERR_ACTION_EXISTS and the handle returned will not be a
 * valid handle.
 * <table>
 *   <tr>
 *     <th>Error code</th>
 *     <th>Description</th>
 *   </tr>
 *   <tr>
 *     <td>\c MTAPI_ERR_JOB_INVALID</td>
 *     <td>The \c job_id is not a valid job ID, i.e., no action was created for
 *         that ID or the action has been deleted.</td>
 *   </tr>
 *   <tr>
 *     <td>\c MTAPI_ERR_ACTION_EXISTS</td>
 *     <td>This action is already created.</td>
 *   </tr>
 *   <tr>
 *     <td>\c MTAPI_ERR_ACTION_LIMIT</td>
 *     <td>Exceeded maximum number of actions allowed.</td>
 *   </tr>
 *   <tr>
 *     <td>\c MTAPI_ERR_NODE_NOTINIT</td>
 *     <td>The calling node is not initialized.</td>
 *   </tr>
 *   <tr>
 *     <td>\c MTAPI_ERR_UNKNOWN</td>
 *     <td>The kernel could not be compiled or no CUDA device was
 *         available.</td>
 *   </tr>
 * </table>
 *
 * \see mtapi_action_delete(), mtapi_finalize()
 *
 * \returns Handle to newly created CUDA action, invalid handle on error
 * \threadsafe
 * \ingroup C_MTAPI_CUDA
 */
mtapi_action_hndl_t mtapi_cuda_action_create(
  MTAPI_IN mtapi_job_id_t job_id,      /**< [in] Job id */
  MTAPI_IN char* kernel_source,        /**< [in] Pointer to kernel source */
  MTAPI_IN char* kernel_name,          /**< [in] Name of the kernel function */
  MTAPI_IN mtapi_size_t local_work_size,
                                       /**< [in] Size of local work group */
  MTAPI_IN mtapi_size_t element_size,  /**< [in] Size of one element in the
                                            result buffer */
  MTAPI_IN void* node_local_data,      /**< [in] Data shared across tasks */
  MTAPI_IN mtapi_size_t node_local_data_size,
                                       /**< [in] Size of shared data */
  MTAPI_OUT mtapi_status_t* status     /**< [out] Pointer to error code,
                                            may be \c MTAPI_NULL */
);


#ifdef __cplusplus
}
#endif


#endif // EMBB_MTAPI_C_MTAPI_CUDA_H_