/* * cl_kernel.cpp - CL kernel * * Copyright (c) 2015 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Author: Wind Yuan */ #include "cl_kernel.h" #include "cl_context.h" #include "cl_device.h" #include "file_handle.h" #include #define ENABLE_DEBUG_KERNEL 0 #define XCAM_CL_KERNEL_DEFAULT_LOCAL_WORK_SIZE 0 namespace XCam { CLKernel::KernelMap CLKernel::_kernel_map; Mutex CLKernel::_kernel_map_mutex; static char* default_cache_path () { static char path[XCAM_MAX_STR_SIZE] = {0}; snprintf ( path, XCAM_MAX_STR_SIZE - 1, "%s/%s", std::getenv ("HOME"), ".xcam/"); return path; } const char* CLKernel::_kernel_cache_path = default_cache_path (); CLKernel::CLKernel (const SmartPtr &context, const char *name) : _name (NULL) , _kernel_id (NULL) , _context (context) { XCAM_ASSERT (context.ptr ()); //XCAM_ASSERT (name); if (name) _name = strndup (name, XCAM_MAX_STR_SIZE); set_default_work_size (); XCAM_OBJ_PROFILING_INIT; } CLKernel::~CLKernel () { destroy (); if (_name) xcam_free (_name); } void CLKernel::destroy () { if (!_parent_kernel.ptr ()) _context->destroy_kernel_id (_kernel_id); } static void get_string_key_id (const char *str, uint32_t len, uint8_t key_id[8]) { uint32_t key[2]; uint32_t *ptr = (uint32_t*)(str); uint32_t aligned_len = 0; uint32_t i = 0; xcam_mem_clear (key); if (!len) len = strlen (str); aligned_len = XCAM_ALIGN_DOWN (len, 8); for (i = 0; i < aligned_len / 8; ++i) { key[0] ^= ptr[0]; key[1] ^= ptr[1]; ptr += 2; } memcpy (key_id, key, 8); len -= aligned_len; str += aligned_len; for (i = 0; i < len; ++i) { key_id[i] ^= (uint8_t)str[i]; } } XCamReturn CLKernel::build_kernel (const XCamKernelInfo& info, const char* options) { KernelMap::iterator i_kernel; SmartPtr single_kernel; char key_str[1024]; uint8_t body_key[8]; std::string key; XCamReturn ret = XCAM_RETURN_NO_ERROR; XCAM_FAIL_RETURN (ERROR, info.kernel_name, XCAM_RETURN_ERROR_PARAM, "build kernel failed since kernel name null"); xcam_mem_clear (body_key); get_string_key_id (info.kernel_body, info.kernel_body_len, body_key); snprintf ( key_str, sizeof(key_str), "%s#%02x%02x%02x%02x%02x%02x%02x%02x#%s", info.kernel_name, body_key[0], body_key[1], body_key[2], body_key[3], body_key[4], body_key[5], body_key[6], body_key[7], XCAM_STR(options)); key = key_str; char temp_filename[XCAM_MAX_STR_SIZE] = {0}; char cache_filename[XCAM_MAX_STR_SIZE] = {0}; FileHandle temp_file; FileHandle cache_file; size_t read_cache_size = 0; size_t write_cache_size = 0; uint8_t *kernel_cache = NULL; bool load_cache = false; struct timeval ts; const char* cache_path = std::getenv ("XCAM_CL_KERNEL_CACHE_PATH"); if (NULL == cache_path) { cache_path = _kernel_cache_path; } snprintf ( cache_filename, XCAM_MAX_STR_SIZE - 1, "%s/%s", cache_path, key_str); { SmartLock locker (_kernel_map_mutex); i_kernel = _kernel_map.find (key); if (i_kernel == _kernel_map.end ()) { SmartPtr context = get_context (); single_kernel = new CLKernel (context, info.kernel_name); XCAM_ASSERT (single_kernel.ptr ()); if (access (cache_path, F_OK) == -1) { mkdir (cache_path, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); } ret = cache_file.open (cache_filename, "r"); if (ret == XCAM_RETURN_NO_ERROR) { cache_file.get_file_size (read_cache_size); if (read_cache_size > 0) { kernel_cache = (uint8_t*) xcam_malloc0 (sizeof (uint8_t) * (read_cache_size + 1)); if (NULL != kernel_cache) { cache_file.read_file (kernel_cache, read_cache_size); cache_file.close (); ret = single_kernel->load_from_binary (kernel_cache, read_cache_size); xcam_free (kernel_cache); kernel_cache = NULL; XCAM_FAIL_RETURN ( ERROR, ret == XCAM_RETURN_NO_ERROR, ret, "build kernel(%s) from binary failed", key_str); load_cache = true; } } } else { XCAM_LOG_DEBUG ("open kernel cache file to read failed ret(%d)", ret); } if (load_cache == false) { ret = single_kernel->load_from_source (info.kernel_body, strlen (info.kernel_body), &kernel_cache, &write_cache_size, options); XCAM_FAIL_RETURN ( ERROR, ret == XCAM_RETURN_NO_ERROR, ret, "build kernel(%s) from source failed", key_str); } _kernel_map.insert (std::make_pair (key, single_kernel)); //_kernel_map[key] = single_kernel; } else { single_kernel = i_kernel->second; } } if (load_cache == false && NULL != kernel_cache) { gettimeofday (&ts, NULL); snprintf ( temp_filename, XCAM_MAX_STR_SIZE - 1, "%s." XCAM_TIMESTAMP_FORMAT, cache_filename, XCAM_TIMESTAMP_ARGS (XCAM_TIMEVAL_2_USEC (ts))); ret = temp_file.open (temp_filename, "wb"); if (ret == XCAM_RETURN_NO_ERROR) { ret = temp_file.write_file (kernel_cache, write_cache_size); temp_file.close (); if (ret == XCAM_RETURN_NO_ERROR && write_cache_size > 0) { rename (temp_filename, cache_filename); } else { remove (temp_filename); } } else { XCAM_LOG_ERROR ("open kernel cache file to write failed ret(%d)", ret); } xcam_free (kernel_cache); kernel_cache = NULL; } XCAM_FAIL_RETURN ( ERROR, (single_kernel.ptr () && single_kernel->is_valid ()), XCAM_RETURN_ERROR_UNKNOWN, "build kernel(%s) failed, unknown error", key_str); ret = this->clone (single_kernel); XCAM_FAIL_RETURN ( ERROR, ret == XCAM_RETURN_NO_ERROR, ret, "load kernel(%s) from kernel failed", key_str); return ret; } XCamReturn CLKernel::load_from_source ( const char *source, size_t length, uint8_t **gen_binary, size_t *binary_size, const char *build_option) { cl_kernel new_kernel_id = NULL; XCAM_ASSERT (source); if (!source) { XCAM_LOG_WARNING ("kernel:%s source empty", XCAM_STR (_name)); return XCAM_RETURN_ERROR_PARAM; } if (_kernel_id) { XCAM_LOG_WARNING ("kernel:%s already build yet", XCAM_STR (_name)); return XCAM_RETURN_ERROR_PARAM; } XCAM_ASSERT (_context.ptr ()); if (length == 0) length = strlen (source); new_kernel_id = _context->generate_kernel_id ( this, (const uint8_t *)source, length, CLContext::KERNEL_BUILD_SOURCE, gen_binary, binary_size, build_option); XCAM_FAIL_RETURN( WARNING, new_kernel_id != NULL, XCAM_RETURN_ERROR_CL, "cl kernel(%s) load from source failed", XCAM_STR (_name)); _kernel_id = new_kernel_id; return XCAM_RETURN_NO_ERROR; } XCamReturn CLKernel::load_from_binary (const uint8_t *binary, size_t length) { cl_kernel new_kernel_id = NULL; XCAM_ASSERT (binary); if (!binary || !length) { XCAM_LOG_WARNING ("kernel:%s binary empty", XCAM_STR (_name)); return XCAM_RETURN_ERROR_PARAM; } if (_kernel_id) { XCAM_LOG_WARNING ("kernel:%s already build yet", XCAM_STR (_name)); return XCAM_RETURN_ERROR_PARAM; } XCAM_ASSERT (_context.ptr ()); new_kernel_id = _context->generate_kernel_id ( this, binary, length, CLContext::KERNEL_BUILD_BINARY, NULL, NULL, NULL); XCAM_FAIL_RETURN( WARNING, new_kernel_id != NULL, XCAM_RETURN_ERROR_CL, "cl kernel(%s) load from binary failed", XCAM_STR (_name)); _kernel_id = new_kernel_id; return XCAM_RETURN_NO_ERROR; } XCamReturn CLKernel::clone (SmartPtr kernel) { XCAM_FAIL_RETURN ( WARNING, kernel.ptr () && kernel->is_valid (), XCAM_RETURN_ERROR_CL, "cl kernel(%s) load from kernel failed", XCAM_STR (_name)); _kernel_id = kernel->get_kernel_id (); _parent_kernel = kernel; if (!_name && kernel->get_kernel_name ()) { _name = strndup (kernel->get_kernel_name (), XCAM_MAX_STR_SIZE); } return XCAM_RETURN_NO_ERROR; } XCamReturn CLKernel::set_arguments (const CLArgList &args, const CLWorkSize &work_size) { XCamReturn ret = XCAM_RETURN_NO_ERROR; uint32_t i_count = 0; XCAM_FAIL_RETURN ( ERROR, _arg_list.empty (), XCAM_RETURN_ERROR_PARAM, "cl image kernel(%s) arguments was already set, can NOT be set twice", get_kernel_name ()); for (CLArgList::const_iterator iter = args.begin (); iter != args.end (); ++iter, ++i_count) { const SmartPtr &arg = *iter; XCAM_FAIL_RETURN ( WARNING, arg.ptr (), XCAM_RETURN_ERROR_PARAM, "cl image kernel(%s) argc(%d) is NULL", get_kernel_name (), i_count); void *adress = NULL; uint32_t size = 0; arg->get_value (adress, size); ret = set_argument (i_count, adress, size); XCAM_FAIL_RETURN ( WARNING, ret == XCAM_RETURN_NO_ERROR, ret, "cl image kernel(%s) set argc(%d) failed", get_kernel_name (), i_count); } ret = set_work_size (work_size); XCAM_FAIL_RETURN ( WARNING, ret == XCAM_RETURN_NO_ERROR, ret, "cl image kernel(%s) set worksize(global:%dx%dx%d, local:%dx%dx%d) failed", XCAM_STR(get_kernel_name ()), (int)work_size.global[0], (int)work_size.global[1], (int)work_size.global[2], (int)work_size.local[0], (int)work_size.local[1], (int)work_size.local[2]); _arg_list = args; return ret; } XCamReturn CLKernel::set_argument (uint32_t arg_i, void *arg_addr, uint32_t arg_size) { cl_int error_code = clSetKernelArg (_kernel_id, arg_i, arg_size, arg_addr); if (error_code != CL_SUCCESS) { XCAM_LOG_DEBUG ("kernel(%s) set arg_i(%d) failed", _name, arg_i); return XCAM_RETURN_ERROR_CL; } return XCAM_RETURN_NO_ERROR; } XCamReturn CLKernel::set_work_size (const CLWorkSize &work_size) { uint32_t i = 0; uint32_t work_group_size = 1; const CLDevieInfo &dev_info = CLDevice::instance ()->get_device_info (); XCAM_FAIL_RETURN ( WARNING, work_size.dim <= dev_info.max_work_item_dims, XCAM_RETURN_ERROR_PARAM, "kernel(%s) work dims(%d) greater than device max dims(%d)", _name, work_size.dim, dev_info.max_work_item_dims); for (i = 0; i < work_size.dim; ++i) { work_group_size *= work_size.local [i]; XCAM_FAIL_RETURN ( WARNING, work_size.local [i] <= dev_info.max_work_item_sizes [i], XCAM_RETURN_ERROR_PARAM, "kernel(%s) work item(%d) size:%d is greater than device max work item size(%d)", _name, i, (uint32_t)work_size.local [i], (uint32_t)dev_info.max_work_item_sizes [i]); } XCAM_FAIL_RETURN ( WARNING, work_group_size == 0 || work_group_size <= dev_info.max_work_group_size, XCAM_RETURN_ERROR_PARAM, "kernel(%s) work-group-size:%d is greater than device max work-group-size(%d)", _name, work_group_size, (uint32_t)dev_info.max_work_group_size); _work_size = work_size; return XCAM_RETURN_NO_ERROR; } void CLKernel::set_default_work_size () { _work_size.dim = XCAM_DEFAULT_IMAGE_DIM; for (uint32_t i = 0; i < _work_size.dim; ++i) { //_global_work_size [i] = XCAM_CL_KERNEL_DEFAULT_GLOBAL_WORK_SIZE; _work_size.local [i] = XCAM_CL_KERNEL_DEFAULT_LOCAL_WORK_SIZE; } } struct KernelUserData { SmartPtr kernel; SmartPtr event; CLArgList arg_list; KernelUserData (const SmartPtr &k, SmartPtr &e) : kernel (k) , event (e) {} }; void CLKernel::event_notify (cl_event event, cl_int status, void* data) { KernelUserData *kernel_data = (KernelUserData *)data; XCAM_ASSERT (event == kernel_data->event->get_event_id ()); XCAM_UNUSED (status); XCAM_UNUSED (event); delete kernel_data; } XCamReturn CLKernel::execute ( const SmartPtr self, bool block, CLEventList &events, SmartPtr &event_out) { XCAM_ASSERT (self.ptr () == this); XCAM_ASSERT (_context.ptr ()); SmartPtr kernel_event = event_out; if (!block && !kernel_event.ptr ()) { kernel_event = new CLEvent; } #if ENABLE_DEBUG_KERNEL XCAM_OBJ_PROFILING_START; #endif XCamReturn ret = _context->execute_kernel (self, NULL, events, kernel_event); XCAM_FAIL_RETURN ( ERROR, ret == XCAM_RETURN_NO_ERROR, ret, "kernel(%s) execute failed", XCAM_STR(_name)); if (block) { _context->finish (); } else { XCAM_ASSERT (kernel_event.ptr () && kernel_event->get_event_id ()); KernelUserData *user_data = new KernelUserData (self, kernel_event); user_data->arg_list.swap (_arg_list); ret = _context->set_event_callback (kernel_event, CL_COMPLETE, event_notify, user_data); if (ret != XCAM_RETURN_NO_ERROR) { XCAM_LOG_WARNING ("kernel(%s) set event callback failed", XCAM_STR (_name)); _context->finish (); delete user_data; } } _arg_list.clear (); #if ENABLE_DEBUG_KERNEL _context->finish (); char name[1024]; snprintf (name, 1024, "%s-%p", XCAM_STR (_name), this); XCAM_OBJ_PROFILING_END (name, XCAM_OBJ_DUR_FRAME_NUM); #endif return ret; } };