diff --git a/mtapi_opencl_c/CMakeLists.txt b/mtapi_opencl_c/CMakeLists.txt index f2f513f..112b273 100644 --- a/mtapi_opencl_c/CMakeLists.txt +++ b/mtapi_opencl_c/CMakeLists.txt @@ -25,6 +25,7 @@ include_directories(${EMBB_MTAPI_OPENCL_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}/../base_c/include ${CMAKE_CURRENT_BINARY_DIR}/../base_c/include ${CMAKE_CURRENT_SOURCE_DIR}/../mtapi_c/include + ${CMAKE_CURRENT_SOURCE_DIR}/../mtapi_c/src ) add_library(embb_mtapi_opencl_c ${EMBB_MTAPI_OPENCL_C_SOURCES} ${EMBB_MTAPI_OPENCL_C_HEADERS}) diff --git a/mtapi_opencl_c/include/embb/mtapi/c/mtapi_opencl.h b/mtapi_opencl_c/include/embb/mtapi/c/mtapi_opencl.h new file mode 100644 index 0000000..0205367 --- /dev/null +++ b/mtapi_opencl_c/include/embb/mtapi/c/mtapi_opencl.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2014, Siemens AG. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef EMBB_MTAPI_C_MTAPI_OPENCL_H_ +#define EMBB_MTAPI_C_MTAPI_OPENCL_H_ + + +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +void mtapi_opencl_plugin_initialize( + MTAPI_OUT mtapi_status_t* status +); + +void mtapi_opencl_plugin_finalize( + MTAPI_OUT mtapi_status_t* status +); + +mtapi_action_hndl_t mtapi_opencl_action_create( + MTAPI_IN mtapi_job_id_t job_id, + MTAPI_IN char* kernel_source, + MTAPI_IN char* kernel_name, + MTAPI_IN mtapi_size_t local_work_size, + MTAPI_IN mtapi_size_t element_size, + MTAPI_IN void* node_local_data, + MTAPI_IN mtapi_size_t node_local_data_size, + MTAPI_OUT mtapi_status_t* status +); + + +#ifdef __cplusplus +} +#endif + + +#endif // EMBB_MTAPI_C_MTAPI_OPENCL_H_ diff --git a/mtapi_opencl_c/src/embb_mtapi_opencl.c b/mtapi_opencl_c/src/embb_mtapi_opencl.c new file mode 100644 index 0000000..fecb2cc --- /dev/null +++ b/mtapi_opencl_c/src/embb_mtapi_opencl.c @@ -0,0 +1,289 @@ +#include +#include + +#include +#include +#include + +#include + +#include + +#include +#include +#include +#include + +struct embb_mtapi_opencl_plugin_struct { + cl_platform_id platform_id; + cl_device_id device_id; + cl_context context; + cl_command_queue command_queue; + cl_uint work_group_size; + cl_uint work_item_sizes[3]; +}; + +typedef struct embb_mtapi_opencl_plugin_struct embb_mtapi_opencl_plugin_t; + +static embb_mtapi_opencl_plugin_t embb_mtapi_opencl_plugin; + +struct embb_mtapi_opencl_action_struct { + cl_program program; + cl_kernel kernel; + cl_mem node_local_data; + int node_local_data_size; + size_t local_work_size; + size_t element_size; +}; + +typedef struct embb_mtapi_opencl_action_struct embb_mtapi_opencl_action_t; + +struct embb_mtapi_opencl_task_struct { + cl_mem arguments; + int arguments_size; + cl_mem result_buffer; + int result_buffer_size; + cl_event kernel_finish_event; + mtapi_task_hndl_t task; +}; + +typedef struct embb_mtapi_opencl_task_struct embb_mtapi_opencl_task_t; + +static int round_up(int group_size, int global_size) { + int r = global_size % group_size; + if (r == 0) { + return global_size; + } + else { + return global_size + group_size - r; + } +} + +static void CL_API_CALL opencl_task_complete(cl_event ev, cl_int status, void * data) { + EMBB_UNUSED(ev); + EMBB_UNUSED(status); + + cl_int err; + embb_mtapi_opencl_task_t * opencl_task = (embb_mtapi_opencl_task_t*)data; + + if (embb_mtapi_node_is_initialized()) { + embb_mtapi_node_t * node = embb_mtapi_node_get_instance(); + + if (embb_mtapi_task_pool_is_handle_valid(node->task_pool, opencl_task->task)) { + embb_mtapi_task_t * local_task = + embb_mtapi_task_pool_get_storage_for_handle(node->task_pool, opencl_task->task); + + err = clWaitForEvents(1, &opencl_task->kernel_finish_event); + + if (NULL != opencl_task->result_buffer) { + err = clReleaseMemObject(opencl_task->result_buffer); + } + if (NULL != opencl_task->arguments) { + err = clReleaseMemObject(opencl_task->arguments); + } + + embb_mtapi_task_set_state(local_task, MTAPI_TASK_COMPLETED); + } + } +} + +static void opencl_task_start( + MTAPI_IN mtapi_task_hndl_t task, + MTAPI_OUT mtapi_status_t* status) { + mtapi_status_t local_status = MTAPI_ERR_UNKNOWN; + cl_int err; + + if (embb_mtapi_node_is_initialized()) { + embb_mtapi_node_t * node = embb_mtapi_node_get_instance(); + + if (embb_mtapi_task_pool_is_handle_valid(node->task_pool, task)) { + embb_mtapi_task_t * local_task = + embb_mtapi_task_pool_get_storage_for_handle(node->task_pool, task); + + if (embb_mtapi_action_pool_is_handle_valid(node->action_pool, local_task->action)) + { + embb_mtapi_action_t * local_action = + embb_mtapi_action_pool_get_storage_for_handle(node->action_pool, local_task->action); + + embb_mtapi_opencl_plugin_t * plugin = &embb_mtapi_opencl_plugin; + embb_mtapi_opencl_action_t * opencl_action = (embb_mtapi_opencl_action_t*)local_action->plugin_data; + embb_mtapi_opencl_task_t * opencl_task = (embb_mtapi_opencl_task_t*)embb_alloc(sizeof(embb_mtapi_opencl_task_t)); + + size_t elements = local_task->result_size / opencl_action->element_size; + size_t global_work_size; + + if (0 == elements) + elements = 1; + global_work_size = round_up((int)opencl_action->local_work_size, elements); + + opencl_task->task = task; + + opencl_task->arguments_size = local_task->arguments_size; + if (0 < local_task->arguments_size) { + opencl_task->arguments = clCreateBuffer(plugin->context, CL_MEM_READ_ONLY, local_task->arguments_size, NULL, &err); + } else { + opencl_task->arguments = NULL; + } + opencl_task->result_buffer_size = local_task->result_size; + if (0 < local_task->result_size) { + opencl_task->result_buffer = clCreateBuffer(plugin->context, CL_MEM_WRITE_ONLY, local_task->result_size, NULL, &err); + } else { + opencl_task->result_buffer = NULL; + } + + err = clSetKernelArg(opencl_action->kernel, 0, sizeof(cl_mem), (const void*)&opencl_task->arguments); + err = clSetKernelArg(opencl_action->kernel, 1, sizeof(cl_int), (const void*)&opencl_task->arguments_size); + + err = clSetKernelArg(opencl_action->kernel, 2, sizeof(cl_mem), (const void*)&opencl_task->result_buffer); + err = clSetKernelArg(opencl_action->kernel, 3, sizeof(cl_int), (const void*)&opencl_task->result_buffer_size); + + err = clEnqueueWriteBuffer(plugin->command_queue, opencl_task->arguments, CL_FALSE, 0, opencl_task->arguments_size, local_task->arguments, 0, NULL, NULL); + err = clEnqueueNDRangeKernel(plugin->command_queue, opencl_action->kernel, 1, NULL, &global_work_size, &opencl_action->local_work_size, 0, NULL, NULL); + err = clEnqueueReadBuffer(plugin->command_queue, opencl_task->result_buffer, CL_FALSE, 0, opencl_task->result_buffer_size, local_task->result_buffer, 0, NULL, &opencl_task->kernel_finish_event); + err = clSetEventCallback(opencl_task->kernel_finish_event, CL_COMPLETE, opencl_task_complete, opencl_task); + err = clFlush(plugin->command_queue); + + embb_mtapi_task_set_state(local_task, MTAPI_TASK_RUNNING); + local_status = MTAPI_SUCCESS; + } + } + } + + mtapi_status_set(status, local_status); +} + +static void opencl_task_cancel( + MTAPI_IN mtapi_task_hndl_t task, + MTAPI_OUT mtapi_status_t* status + ) { + mtapi_status_t local_status = MTAPI_ERR_UNKNOWN; + + EMBB_UNUSED(task); + + mtapi_status_set(status, local_status); +} + +static void opencl_action_finalize( + MTAPI_IN mtapi_action_hndl_t action, + MTAPI_OUT mtapi_status_t* status + ) { + mtapi_status_t local_status = MTAPI_ERR_UNKNOWN; + cl_int err; + + if (embb_mtapi_node_is_initialized()) { + embb_mtapi_node_t * node = embb_mtapi_node_get_instance(); + if (embb_mtapi_action_pool_is_handle_valid(node->action_pool, action)) + { + embb_mtapi_action_t * local_action = + embb_mtapi_action_pool_get_storage_for_handle(node->action_pool, action); + embb_mtapi_opencl_action_t * opencl_action = + (embb_mtapi_opencl_action_t *)local_action->plugin_data; + if (NULL != opencl_action->node_local_data) { + cl_int err; + err = clReleaseMemObject(opencl_action->node_local_data); + } + + err = clReleaseKernel(opencl_action->kernel); + err = clReleaseProgram(opencl_action->program); + + embb_free(opencl_action); + local_status = MTAPI_SUCCESS; + } + } + + mtapi_status_set(status, local_status); +} + +char buffer[1024]; + +void mtapi_opencl_plugin_initialize( + MTAPI_OUT mtapi_status_t* status) { + mtapi_status_t local_status = MTAPI_ERR_UNKNOWN; + cl_int err; + embb_mtapi_opencl_plugin_t * plugin = &embb_mtapi_opencl_plugin; + + embb_mtapi_opencl_link_at_runtime(); + + err = clGetPlatformIDs(1, &plugin->platform_id, NULL); + err = clGetDeviceIDs(plugin->platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &plugin->device_id, NULL); + plugin->context = clCreateContext(NULL, 1, &plugin->device_id, NULL, NULL, &err); + + err = clGetDeviceInfo(plugin->device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(cl_uint), &plugin->work_group_size, NULL); + err = clGetDeviceInfo(plugin->device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES, 3 * sizeof(cl_uint), &plugin->work_item_sizes[0], NULL); + + plugin->command_queue = clCreateCommandQueue(plugin->context, plugin->device_id, 0, &err); + + local_status = MTAPI_SUCCESS; + mtapi_status_set(status, local_status); +} + +void mtapi_opencl_plugin_finalize( + MTAPI_OUT mtapi_status_t* status) { + mtapi_status_t local_status = MTAPI_ERR_UNKNOWN; + + cl_int err; + embb_mtapi_opencl_plugin_t * plugin = &embb_mtapi_opencl_plugin; + + /* finalization */ + err = clReleaseCommandQueue(plugin->command_queue); + err = clReleaseContext(plugin->context); + + local_status = MTAPI_SUCCESS; + mtapi_status_set(status, local_status); +} + +mtapi_action_hndl_t mtapi_opencl_action_create( + MTAPI_IN mtapi_job_id_t job_id, + MTAPI_IN char* kernel_source, + MTAPI_IN char* kernel_name, + MTAPI_IN mtapi_size_t local_work_size, + MTAPI_IN mtapi_size_t element_size, + MTAPI_IN void* node_local_data, + MTAPI_IN mtapi_size_t node_local_data_size, + MTAPI_OUT mtapi_status_t* status) { + mtapi_status_t local_status = MTAPI_ERR_UNKNOWN; + + cl_int err; + embb_mtapi_opencl_plugin_t * plugin = &embb_mtapi_opencl_plugin; + embb_mtapi_opencl_action_t * action = (embb_mtapi_opencl_action_t*)embb_alloc(sizeof(embb_mtapi_opencl_action_t)); + mtapi_action_hndl_t action_hndl; + size_t kernel_length = strlen(kernel_source); + + action->local_work_size = local_work_size; + action->element_size = element_size; + + /* initialization */ + action->program = clCreateProgramWithSource(plugin->context, 1, &kernel_source, &kernel_length, &err); + err = clBuildProgram(action->program, 1, &plugin->device_id, NULL, NULL, NULL); + if (CL_SUCCESS != err) { + err = clGetProgramBuildInfo(action->program, plugin->device_id, CL_PROGRAM_BUILD_LOG, 1024, buffer, NULL); + } + action->kernel = clCreateKernel(action->program, kernel_name, &err); + + if (0 < node_local_data_size) { + action->node_local_data = clCreateBuffer(plugin->context, CL_MEM_READ_ONLY, node_local_data_size, NULL, &err); + action->node_local_data_size = node_local_data_size; + err = clEnqueueWriteBuffer(plugin->command_queue, action->node_local_data, CL_TRUE, 0, action->node_local_data_size, node_local_data, 0, NULL, NULL); + } else { + action->node_local_data = NULL; + action->node_local_data_size = 0; + } + + err = clSetKernelArg(action->kernel, 4, sizeof(cl_mem), (const void*)&action->node_local_data); + err = clSetKernelArg(action->kernel, 5, sizeof(cl_int), (const void*)&action->node_local_data_size); + + action_hndl = mtapi_ext_plugin_action_create( + job_id, + opencl_task_start, + opencl_task_cancel, + opencl_action_finalize, + action, + node_local_data, + node_local_data_size, + MTAPI_NULL, + &local_status); + + mtapi_status_set(status, local_status); + + return action_hndl; +} diff --git a/mtapi_opencl_c/src/embb_mtapi_opencl_runtimelinker.c b/mtapi_opencl_c/src/embb_mtapi_opencl_runtimelinker.c index b7f353b..81b21f0 100644 --- a/mtapi_opencl_c/src/embb_mtapi_opencl_runtimelinker.c +++ b/mtapi_opencl_c/src/embb_mtapi_opencl_runtimelinker.c @@ -53,6 +53,11 @@ DECLARECLFUNC(cl_int, clBuildProgram, (cl_program program, cl_uint num_devices, return clBuildProgram_Dynamic(program, num_devices, device_list, options, pfn_notify, user_data); } +DECLARECLFUNC(cl_int, clGetProgramBuildInfo, (cl_program program, cl_device_id device, cl_program_build_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret)) +{ + return clGetProgramBuildInfo_Dynamic(program, device, param_name, param_value_size, param_value, param_value_size_ret); +} + DECLARECLFUNC(cl_kernel, clCreateKernel, (cl_program program, const char * kernel_name, cl_int * errcode_ret)) { return clCreateKernel_Dynamic(program, kernel_name, errcode_ret); @@ -78,6 +83,16 @@ DECLARECLFUNC(cl_int, clEnqueueReadBuffer, (cl_command_queue command_queue, cl_m return clEnqueueReadBuffer_Dynamic(command_queue, buffer, blocking_read, offset, cb, ptr, num_events_in_wait_list, event_wait_list, event); } +DECLARECLFUNC(cl_int, clSetEventCallback, (cl_event event, cl_int command_exec_callback_type, void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *), void * user_data)) +{ + return clSetEventCallback_Dynamic(event, command_exec_callback_type, pfn_notify, user_data); +} + +DECLARECLFUNC(cl_int, clWaitForEvents, (cl_uint num_events, const cl_event * event_list)) +{ + return clWaitForEvents_Dynamic(num_events, event_list); +} + DECLARECLFUNC(cl_int, clReleaseKernel, (cl_kernel kernel)) { return clReleaseKernel_Dynamic(kernel); @@ -103,6 +118,11 @@ DECLARECLFUNC(cl_int, clReleaseMemObject, (cl_mem memobj)) return clReleaseMemObject_Dynamic(memobj); } +DECLARECLFUNC(cl_int, clFlush, (cl_command_queue command_queue)) +{ + return clFlush_Dynamic(command_queue); +} + DECLARECLFUNC(cl_int, clFinish, (cl_command_queue command_queue)) { return clFinish_Dynamic(command_queue); @@ -166,16 +186,20 @@ int embb_mtapi_opencl_link_at_runtime() { CHECKEDIMPORT(clCreateBuffer); CHECKEDIMPORT(clCreateProgramWithSource); CHECKEDIMPORT(clBuildProgram); + CHECKEDIMPORT(clGetProgramBuildInfo); CHECKEDIMPORT(clCreateKernel); CHECKEDIMPORT(clSetKernelArg); CHECKEDIMPORT(clEnqueueWriteBuffer); CHECKEDIMPORT(clEnqueueNDRangeKernel); CHECKEDIMPORT(clEnqueueReadBuffer); + CHECKEDIMPORT(clSetEventCallback); + CHECKEDIMPORT(clWaitForEvents); CHECKEDIMPORT(clReleaseKernel); CHECKEDIMPORT(clReleaseProgram); CHECKEDIMPORT(clReleaseCommandQueue); CHECKEDIMPORT(clReleaseContext); CHECKEDIMPORT(clReleaseMemObject); + CHECKEDIMPORT(clFlush); CHECKEDIMPORT(clFinish); CHECKEDIMPORT(clCreateSampler); CHECKEDIMPORT(clReleaseSampler);