1 /*
2 * cl_kernel.cpp - CL kernel
3 *
4 * Copyright (c) 2015 Intel Corporation
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * Author: Wind Yuan <feng.yuan@intel.com>
19 */
20
21 #include "cl_kernel.h"
22 #include "cl_context.h"
23 #include "cl_device.h"
24 #include "file_handle.h"
25
26 #include <sys/stat.h>
27
28 #define ENABLE_DEBUG_KERNEL 0
29
30 #define XCAM_CL_KERNEL_DEFAULT_LOCAL_WORK_SIZE 0
31
32 namespace XCam {
33
34 CLKernel::KernelMap CLKernel::_kernel_map;
35 Mutex CLKernel::_kernel_map_mutex;
36
37 static char*
default_cache_path()38 default_cache_path () {
39 static char path[XCAM_MAX_STR_SIZE] = {0};
40 snprintf (
41 path, XCAM_MAX_STR_SIZE - 1,
42 "%s/%s", std::getenv ("HOME"), ".xcam/");
43
44 return path;
45 }
46
47 const char* CLKernel::_kernel_cache_path = default_cache_path ();
48
CLKernel(const SmartPtr<CLContext> & context,const char * name)49 CLKernel::CLKernel (const SmartPtr<CLContext> &context, const char *name)
50 : _name (NULL)
51 , _kernel_id (NULL)
52 , _context (context)
53 {
54 XCAM_ASSERT (context.ptr ());
55 //XCAM_ASSERT (name);
56
57 if (name)
58 _name = strndup (name, XCAM_MAX_STR_SIZE);
59
60 set_default_work_size ();
61
62 XCAM_OBJ_PROFILING_INIT;
63 }
64
~CLKernel()65 CLKernel::~CLKernel ()
66 {
67 destroy ();
68 if (_name)
69 xcam_free (_name);
70 }
71
72 void
destroy()73 CLKernel::destroy ()
74 {
75 if (!_parent_kernel.ptr ())
76 _context->destroy_kernel_id (_kernel_id);
77 }
78
79 static void
get_string_key_id(const char * str,uint32_t len,uint8_t key_id[8])80 get_string_key_id (const char *str, uint32_t len, uint8_t key_id[8])
81 {
82 uint32_t key[2];
83 uint32_t *ptr = (uint32_t*)(str);
84 uint32_t aligned_len = 0;
85 uint32_t i = 0;
86
87 xcam_mem_clear (key);
88 if (!len)
89 len = strlen (str);
90 aligned_len = XCAM_ALIGN_DOWN (len, 8);
91
92 for (i = 0; i < aligned_len / 8; ++i) {
93 key[0] ^= ptr[0];
94 key[1] ^= ptr[1];
95 ptr += 2;
96 }
97 memcpy (key_id, key, 8);
98 len -= aligned_len;
99 str += aligned_len;
100 for (i = 0; i < len; ++i) {
101 key_id[i] ^= (uint8_t)str[i];
102 }
103 }
104
105 XCamReturn
build_kernel(const XCamKernelInfo & info,const char * options)106 CLKernel::build_kernel (const XCamKernelInfo& info, const char* options)
107 {
108 KernelMap::iterator i_kernel;
109 SmartPtr<CLKernel> single_kernel;
110 char key_str[1024];
111 uint8_t body_key[8];
112 std::string key;
113 XCamReturn ret = XCAM_RETURN_NO_ERROR;
114
115 XCAM_FAIL_RETURN (ERROR, info.kernel_name, XCAM_RETURN_ERROR_PARAM, "build kernel failed since kernel name null");
116
117 xcam_mem_clear (body_key);
118 get_string_key_id (info.kernel_body, info.kernel_body_len, body_key);
119 snprintf (
120 key_str, sizeof(key_str),
121 "%s#%02x%02x%02x%02x%02x%02x%02x%02x#%s",
122 info.kernel_name,
123 body_key[0], body_key[1], body_key[2], body_key[3], body_key[4], body_key[5], body_key[6], body_key[7],
124 XCAM_STR(options));
125 key = key_str;
126
127 char temp_filename[XCAM_MAX_STR_SIZE] = {0};
128 char cache_filename[XCAM_MAX_STR_SIZE] = {0};
129 FileHandle temp_file;
130 FileHandle cache_file;
131 size_t read_cache_size = 0;
132 size_t write_cache_size = 0;
133 uint8_t *kernel_cache = NULL;
134 bool load_cache = false;
135 struct timeval ts;
136
137 const char* cache_path = std::getenv ("XCAM_CL_KERNEL_CACHE_PATH");
138 if (NULL == cache_path) {
139 cache_path = _kernel_cache_path;
140 }
141
142 snprintf (
143 cache_filename, XCAM_MAX_STR_SIZE - 1,
144 "%s/%s",
145 cache_path, key_str);
146
147 {
148 SmartLock locker (_kernel_map_mutex);
149
150 i_kernel = _kernel_map.find (key);
151 if (i_kernel == _kernel_map.end ()) {
152 SmartPtr<CLContext> context = get_context ();
153 single_kernel = new CLKernel (context, info.kernel_name);
154 XCAM_ASSERT (single_kernel.ptr ());
155
156 if (access (cache_path, F_OK) == -1) {
157 mkdir (cache_path, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
158 }
159
160 ret = cache_file.open (cache_filename, "r");
161 if (ret == XCAM_RETURN_NO_ERROR) {
162 cache_file.get_file_size (read_cache_size);
163 if (read_cache_size > 0) {
164 kernel_cache = (uint8_t*) xcam_malloc0 (sizeof (uint8_t) * (read_cache_size + 1));
165 if (NULL != kernel_cache) {
166 cache_file.read_file (kernel_cache, read_cache_size);
167 cache_file.close ();
168
169 ret = single_kernel->load_from_binary (kernel_cache, read_cache_size);
170 xcam_free (kernel_cache);
171 kernel_cache = NULL;
172
173 XCAM_FAIL_RETURN (
174 ERROR, ret == XCAM_RETURN_NO_ERROR, ret,
175 "build kernel(%s) from binary failed", key_str);
176
177 load_cache = true;
178 }
179 }
180 } else {
181 XCAM_LOG_DEBUG ("open kernel cache file to read failed ret(%d)", ret);
182 }
183
184 if (load_cache == false) {
185 ret = single_kernel->load_from_source (info.kernel_body, strlen (info.kernel_body), &kernel_cache, &write_cache_size, options);
186 XCAM_FAIL_RETURN (
187 ERROR, ret == XCAM_RETURN_NO_ERROR, ret,
188 "build kernel(%s) from source failed", key_str);
189 }
190
191 _kernel_map.insert (std::make_pair (key, single_kernel));
192 //_kernel_map[key] = single_kernel;
193 } else {
194 single_kernel = i_kernel->second;
195 }
196 }
197
198 if (load_cache == false && NULL != kernel_cache) {
199 gettimeofday (&ts, NULL);
200 snprintf (
201 temp_filename, XCAM_MAX_STR_SIZE - 1,
202 "%s." XCAM_TIMESTAMP_FORMAT,
203 cache_filename, XCAM_TIMESTAMP_ARGS (XCAM_TIMEVAL_2_USEC (ts)));
204
205 ret = temp_file.open (temp_filename, "wb");
206 if (ret == XCAM_RETURN_NO_ERROR) {
207 ret = temp_file.write_file (kernel_cache, write_cache_size);
208 temp_file.close ();
209 if (ret == XCAM_RETURN_NO_ERROR && write_cache_size > 0) {
210 rename (temp_filename, cache_filename);
211 } else {
212 remove (temp_filename);
213 }
214 } else {
215 XCAM_LOG_ERROR ("open kernel cache file to write failed ret(%d)", ret);
216 }
217 xcam_free (kernel_cache);
218 kernel_cache = NULL;
219 }
220
221 XCAM_FAIL_RETURN (
222 ERROR, (single_kernel.ptr () && single_kernel->is_valid ()), XCAM_RETURN_ERROR_UNKNOWN,
223 "build kernel(%s) failed, unknown error", key_str);
224
225 ret = this->clone (single_kernel);
226 XCAM_FAIL_RETURN (
227 ERROR, ret == XCAM_RETURN_NO_ERROR, ret,
228 "load kernel(%s) from kernel failed", key_str);
229 return ret;
230 }
231
232 XCamReturn
load_from_source(const char * source,size_t length,uint8_t ** gen_binary,size_t * binary_size,const char * build_option)233 CLKernel::load_from_source (
234 const char *source, size_t length,
235 uint8_t **gen_binary, size_t *binary_size,
236 const char *build_option)
237 {
238 cl_kernel new_kernel_id = NULL;
239
240 XCAM_ASSERT (source);
241 if (!source) {
242 XCAM_LOG_WARNING ("kernel:%s source empty", XCAM_STR (_name));
243 return XCAM_RETURN_ERROR_PARAM;
244 }
245
246 if (_kernel_id) {
247 XCAM_LOG_WARNING ("kernel:%s already build yet", XCAM_STR (_name));
248 return XCAM_RETURN_ERROR_PARAM;
249 }
250
251 XCAM_ASSERT (_context.ptr ());
252
253 if (length == 0)
254 length = strlen (source);
255
256 new_kernel_id =
257 _context->generate_kernel_id (
258 this,
259 (const uint8_t *)source, length,
260 CLContext::KERNEL_BUILD_SOURCE,
261 gen_binary, binary_size,
262 build_option);
263 XCAM_FAIL_RETURN(
264 WARNING,
265 new_kernel_id != NULL,
266 XCAM_RETURN_ERROR_CL,
267 "cl kernel(%s) load from source failed", XCAM_STR (_name));
268
269 _kernel_id = new_kernel_id;
270 return XCAM_RETURN_NO_ERROR;
271 }
272
273 XCamReturn
load_from_binary(const uint8_t * binary,size_t length)274 CLKernel::load_from_binary (const uint8_t *binary, size_t length)
275 {
276 cl_kernel new_kernel_id = NULL;
277
278 XCAM_ASSERT (binary);
279 if (!binary || !length) {
280 XCAM_LOG_WARNING ("kernel:%s binary empty", XCAM_STR (_name));
281 return XCAM_RETURN_ERROR_PARAM;
282 }
283
284 if (_kernel_id) {
285 XCAM_LOG_WARNING ("kernel:%s already build yet", XCAM_STR (_name));
286 return XCAM_RETURN_ERROR_PARAM;
287 }
288
289 XCAM_ASSERT (_context.ptr ());
290
291 new_kernel_id =
292 _context->generate_kernel_id (
293 this,
294 binary, length,
295 CLContext::KERNEL_BUILD_BINARY,
296 NULL, NULL,
297 NULL);
298 XCAM_FAIL_RETURN(
299 WARNING,
300 new_kernel_id != NULL,
301 XCAM_RETURN_ERROR_CL,
302 "cl kernel(%s) load from binary failed", XCAM_STR (_name));
303
304 _kernel_id = new_kernel_id;
305 return XCAM_RETURN_NO_ERROR;
306 }
307
308 XCamReturn
clone(SmartPtr<CLKernel> kernel)309 CLKernel::clone (SmartPtr<CLKernel> kernel)
310 {
311 XCAM_FAIL_RETURN (
312 WARNING,
313 kernel.ptr () && kernel->is_valid (),
314 XCAM_RETURN_ERROR_CL,
315 "cl kernel(%s) load from kernel failed", XCAM_STR (_name));
316 _kernel_id = kernel->get_kernel_id ();
317 _parent_kernel = kernel;
318 if (!_name && kernel->get_kernel_name ()) {
319 _name = strndup (kernel->get_kernel_name (), XCAM_MAX_STR_SIZE);
320 }
321 return XCAM_RETURN_NO_ERROR;
322 }
323
324 XCamReturn
set_arguments(const CLArgList & args,const CLWorkSize & work_size)325 CLKernel::set_arguments (const CLArgList &args, const CLWorkSize &work_size)
326 {
327 XCamReturn ret = XCAM_RETURN_NO_ERROR;
328 uint32_t i_count = 0;
329
330 XCAM_FAIL_RETURN (
331 ERROR, _arg_list.empty (), XCAM_RETURN_ERROR_PARAM,
332 "cl image kernel(%s) arguments was already set, can NOT be set twice", get_kernel_name ());
333
334 for (CLArgList::const_iterator iter = args.begin (); iter != args.end (); ++iter, ++i_count) {
335 const SmartPtr<CLArgument> &arg = *iter;
336 XCAM_FAIL_RETURN (
337 WARNING, arg.ptr (),
338 XCAM_RETURN_ERROR_PARAM, "cl image kernel(%s) argc(%d) is NULL", get_kernel_name (), i_count);
339
340 void *adress = NULL;
341 uint32_t size = 0;
342 arg->get_value (adress, size);
343 ret = set_argument (i_count, adress, size);
344 XCAM_FAIL_RETURN (
345 WARNING, ret == XCAM_RETURN_NO_ERROR,
346 ret, "cl image kernel(%s) set argc(%d) failed", get_kernel_name (), i_count);
347 }
348
349 ret = set_work_size (work_size);
350 XCAM_FAIL_RETURN (
351 WARNING, ret == XCAM_RETURN_NO_ERROR, ret,
352 "cl image kernel(%s) set worksize(global:%dx%dx%d, local:%dx%dx%d) failed",
353 XCAM_STR(get_kernel_name ()),
354 (int)work_size.global[0], (int)work_size.global[1], (int)work_size.global[2],
355 (int)work_size.local[0], (int)work_size.local[1], (int)work_size.local[2]);
356
357 _arg_list = args;
358 return ret;
359 }
360
361 XCamReturn
set_argument(uint32_t arg_i,void * arg_addr,uint32_t arg_size)362 CLKernel::set_argument (uint32_t arg_i, void *arg_addr, uint32_t arg_size)
363 {
364 cl_int error_code = clSetKernelArg (_kernel_id, arg_i, arg_size, arg_addr);
365 if (error_code != CL_SUCCESS) {
366 XCAM_LOG_DEBUG ("kernel(%s) set arg_i(%d) failed", _name, arg_i);
367 return XCAM_RETURN_ERROR_CL;
368 }
369 return XCAM_RETURN_NO_ERROR;
370 }
371
372 XCamReturn
set_work_size(const CLWorkSize & work_size)373 CLKernel::set_work_size (const CLWorkSize &work_size)
374 {
375 uint32_t i = 0;
376 uint32_t work_group_size = 1;
377 const CLDevieInfo &dev_info = CLDevice::instance ()->get_device_info ();
378
379 XCAM_FAIL_RETURN (
380 WARNING,
381 work_size.dim <= dev_info.max_work_item_dims,
382 XCAM_RETURN_ERROR_PARAM,
383 "kernel(%s) work dims(%d) greater than device max dims(%d)",
384 _name, work_size.dim, dev_info.max_work_item_dims);
385
386 for (i = 0; i < work_size.dim; ++i) {
387 work_group_size *= work_size.local [i];
388
389 XCAM_FAIL_RETURN (
390 WARNING,
391 work_size.local [i] <= dev_info.max_work_item_sizes [i],
392 XCAM_RETURN_ERROR_PARAM,
393 "kernel(%s) work item(%d) size:%d is greater than device max work item size(%d)",
394 _name, i, (uint32_t)work_size.local [i], (uint32_t)dev_info.max_work_item_sizes [i]);
395 }
396
397 XCAM_FAIL_RETURN (
398 WARNING,
399 work_group_size == 0 || work_group_size <= dev_info.max_work_group_size,
400 XCAM_RETURN_ERROR_PARAM,
401 "kernel(%s) work-group-size:%d is greater than device max work-group-size(%d)",
402 _name, work_group_size, (uint32_t)dev_info.max_work_group_size);
403
404 _work_size = work_size;
405
406 return XCAM_RETURN_NO_ERROR;
407 }
408
409 void
set_default_work_size()410 CLKernel::set_default_work_size ()
411 {
412 _work_size.dim = XCAM_DEFAULT_IMAGE_DIM;
413 for (uint32_t i = 0; i < _work_size.dim; ++i) {
414 //_global_work_size [i] = XCAM_CL_KERNEL_DEFAULT_GLOBAL_WORK_SIZE;
415 _work_size.local [i] = XCAM_CL_KERNEL_DEFAULT_LOCAL_WORK_SIZE;
416 }
417 }
418
419 struct KernelUserData {
420 SmartPtr<CLKernel> kernel;
421 SmartPtr<CLEvent> event;
422 CLArgList arg_list;
423
KernelUserDataXCam::KernelUserData424 KernelUserData (const SmartPtr<CLKernel> &k, SmartPtr<CLEvent> &e)
425 : kernel (k)
426 , event (e)
427 {}
428 };
429
430 void
event_notify(cl_event event,cl_int status,void * data)431 CLKernel::event_notify (cl_event event, cl_int status, void* data)
432 {
433 KernelUserData *kernel_data = (KernelUserData *)data;
434 XCAM_ASSERT (event == kernel_data->event->get_event_id ());
435 XCAM_UNUSED (status);
436 XCAM_UNUSED (event);
437
438 delete kernel_data;
439 }
440
441 XCamReturn
execute(const SmartPtr<CLKernel> self,bool block,CLEventList & events,SmartPtr<CLEvent> & event_out)442 CLKernel::execute (
443 const SmartPtr<CLKernel> self,
444 bool block,
445 CLEventList &events,
446 SmartPtr<CLEvent> &event_out)
447 {
448 XCAM_ASSERT (self.ptr () == this);
449 XCAM_ASSERT (_context.ptr ());
450 SmartPtr<CLEvent> kernel_event = event_out;
451
452 if (!block && !kernel_event.ptr ()) {
453 kernel_event = new CLEvent;
454 }
455
456 #if ENABLE_DEBUG_KERNEL
457 XCAM_OBJ_PROFILING_START;
458 #endif
459
460 XCamReturn ret = _context->execute_kernel (self, NULL, events, kernel_event);
461
462 XCAM_FAIL_RETURN (
463 ERROR,
464 ret == XCAM_RETURN_NO_ERROR,
465 ret,
466 "kernel(%s) execute failed", XCAM_STR(_name));
467
468
469 if (block) {
470 _context->finish ();
471 } else {
472 XCAM_ASSERT (kernel_event.ptr () && kernel_event->get_event_id ());
473 KernelUserData *user_data = new KernelUserData (self, kernel_event);
474 user_data->arg_list.swap (_arg_list);
475 ret = _context->set_event_callback (kernel_event, CL_COMPLETE, event_notify, user_data);
476 if (ret != XCAM_RETURN_NO_ERROR) {
477 XCAM_LOG_WARNING ("kernel(%s) set event callback failed", XCAM_STR (_name));
478 _context->finish ();
479 delete user_data;
480 }
481 }
482 _arg_list.clear ();
483
484 #if ENABLE_DEBUG_KERNEL
485 _context->finish ();
486 char name[1024];
487 snprintf (name, 1024, "%s-%p", XCAM_STR (_name), this);
488 XCAM_OBJ_PROFILING_END (name, XCAM_OBJ_DUR_FRAME_NUM);
489 #endif
490 return ret;
491 }
492
493 };
494