1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 //
22
23 #include "api/util.hpp"
24 #include "core/kernel.hpp"
25 #include "core/event.hpp"
26
27 using namespace clover;
28
29 CLOVER_API cl_kernel
clCreateKernel(cl_program d_prog,const char * name,cl_int * r_errcode)30 clCreateKernel(cl_program d_prog, const char *name, cl_int *r_errcode) try {
31 auto &prog = obj(d_prog);
32
33 if (!name)
34 throw error(CL_INVALID_VALUE);
35
36 auto &sym = find(name_equals(name), prog.symbols());
37
38 ret_error(r_errcode, CL_SUCCESS);
39 return new kernel(prog, name, range(sym.args));
40
41 } catch (std::out_of_range &) {
42 ret_error(r_errcode, CL_INVALID_KERNEL_NAME);
43 return NULL;
44
45 } catch (error &e) {
46 ret_error(r_errcode, e);
47 return NULL;
48 }
49
50 CLOVER_API cl_int
clCreateKernelsInProgram(cl_program d_prog,cl_uint count,cl_kernel * rd_kerns,cl_uint * r_count)51 clCreateKernelsInProgram(cl_program d_prog, cl_uint count,
52 cl_kernel *rd_kerns, cl_uint *r_count) try {
53 auto &prog = obj(d_prog);
54 auto &syms = prog.symbols();
55
56 if (rd_kerns && count < syms.size())
57 throw error(CL_INVALID_VALUE);
58
59 if (rd_kerns)
60 copy(map([&](const binary::symbol &sym) {
61 return desc(new kernel(prog,
62 std::string(sym.name.begin(),
63 sym.name.end()),
64 range(sym.args)));
65 }, syms),
66 rd_kerns);
67
68 if (r_count)
69 *r_count = syms.size();
70
71 return CL_SUCCESS;
72
73 } catch (error &e) {
74 return e.get();
75 }
76
77 CLOVER_API cl_int
clRetainKernel(cl_kernel d_kern)78 clRetainKernel(cl_kernel d_kern) try {
79 obj(d_kern).retain();
80 return CL_SUCCESS;
81
82 } catch (error &e) {
83 return e.get();
84 }
85
86 CLOVER_API cl_int
clReleaseKernel(cl_kernel d_kern)87 clReleaseKernel(cl_kernel d_kern) try {
88 if (obj(d_kern).release())
89 delete pobj(d_kern);
90
91 return CL_SUCCESS;
92
93 } catch (error &e) {
94 return e.get();
95 }
96
97 CLOVER_API cl_int
clSetKernelArg(cl_kernel d_kern,cl_uint idx,size_t size,const void * value)98 clSetKernelArg(cl_kernel d_kern, cl_uint idx, size_t size,
99 const void *value) try {
100 obj(d_kern).args().at(idx).set(size, value);
101 return CL_SUCCESS;
102
103 } catch (std::out_of_range &) {
104 return CL_INVALID_ARG_INDEX;
105
106 } catch (error &e) {
107 return e.get();
108 }
109
110 CLOVER_API cl_int
clGetKernelInfo(cl_kernel d_kern,cl_kernel_info param,size_t size,void * r_buf,size_t * r_size)111 clGetKernelInfo(cl_kernel d_kern, cl_kernel_info param,
112 size_t size, void *r_buf, size_t *r_size) try {
113 property_buffer buf { r_buf, size, r_size };
114 auto &kern = obj(d_kern);
115
116 switch (param) {
117 case CL_KERNEL_FUNCTION_NAME:
118 buf.as_string() = kern.name();
119 break;
120
121 case CL_KERNEL_NUM_ARGS:
122 buf.as_scalar<cl_uint>() = kern.args().size();
123 break;
124
125 case CL_KERNEL_REFERENCE_COUNT:
126 buf.as_scalar<cl_uint>() = kern.ref_count();
127 break;
128
129 case CL_KERNEL_CONTEXT:
130 buf.as_scalar<cl_context>() = desc(kern.program().context());
131 break;
132
133 case CL_KERNEL_PROGRAM:
134 buf.as_scalar<cl_program>() = desc(kern.program());
135 break;
136
137 case CL_KERNEL_ATTRIBUTES:
138 buf.as_string() = find(name_equals(kern.name()), kern.program().symbols()).attributes;
139 break;
140
141 default:
142 throw error(CL_INVALID_VALUE);
143 }
144
145 return CL_SUCCESS;
146
147 } catch (error &e) {
148 return e.get();
149 }
150
151 CLOVER_API cl_int
clGetKernelWorkGroupInfo(cl_kernel d_kern,cl_device_id d_dev,cl_kernel_work_group_info param,size_t size,void * r_buf,size_t * r_size)152 clGetKernelWorkGroupInfo(cl_kernel d_kern, cl_device_id d_dev,
153 cl_kernel_work_group_info param,
154 size_t size, void *r_buf, size_t *r_size) try {
155 property_buffer buf { r_buf, size, r_size };
156 auto &kern = obj(d_kern);
157 auto &dev = (d_dev ? *pobj(d_dev) : unique(kern.program().devices()));
158
159 if (!count(dev, kern.program().devices()))
160 throw error(CL_INVALID_DEVICE);
161
162 switch (param) {
163 case CL_KERNEL_WORK_GROUP_SIZE:
164 buf.as_scalar<size_t>() = dev.max_threads_per_block();
165 break;
166
167 case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
168 buf.as_vector<size_t>() = kern.required_block_size();
169 break;
170
171 case CL_KERNEL_LOCAL_MEM_SIZE:
172 buf.as_scalar<cl_ulong>() = kern.mem_local();
173 break;
174
175 case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
176 buf.as_scalar<size_t>() = dev.subgroup_size();
177 break;
178
179 case CL_KERNEL_PRIVATE_MEM_SIZE:
180 buf.as_scalar<cl_ulong>() = kern.mem_private();
181 break;
182
183 default:
184 throw error(CL_INVALID_VALUE);
185 }
186
187 return CL_SUCCESS;
188
189 } catch (error &e) {
190 return e.get();
191
192 } catch (std::out_of_range &) {
193 return CL_INVALID_DEVICE;
194 }
195
196 CLOVER_API cl_int
clGetKernelArgInfo(cl_kernel d_kern,cl_uint idx,cl_kernel_arg_info param,size_t size,void * r_buf,size_t * r_size)197 clGetKernelArgInfo(cl_kernel d_kern,
198 cl_uint idx, cl_kernel_arg_info param,
199 size_t size, void *r_buf, size_t *r_size) try {
200 property_buffer buf { r_buf, size, r_size };
201
202 auto info = obj(d_kern).args_infos().at(idx);
203
204 if (info.arg_name.empty())
205 return CL_KERNEL_ARG_INFO_NOT_AVAILABLE;
206
207 switch (param) {
208 case CL_KERNEL_ARG_ADDRESS_QUALIFIER:
209 buf.as_scalar<cl_kernel_arg_address_qualifier>() = info.address_qualifier;
210 break;
211
212 case CL_KERNEL_ARG_ACCESS_QUALIFIER:
213 buf.as_scalar<cl_kernel_arg_access_qualifier>() = info.access_qualifier;
214 break;
215
216 case CL_KERNEL_ARG_TYPE_NAME:
217 buf.as_string() = info.type_name;
218 break;
219
220 case CL_KERNEL_ARG_TYPE_QUALIFIER:
221 buf.as_scalar<cl_kernel_arg_type_qualifier>() = info.type_qualifier;
222 break;
223
224 case CL_KERNEL_ARG_NAME:
225 buf.as_string() = info.arg_name;
226 break;
227
228 default:
229 throw error(CL_INVALID_VALUE);
230 }
231
232 return CL_SUCCESS;
233
234 } catch (std::out_of_range &) {
235 return CL_INVALID_ARG_INDEX;
236
237 } catch (error &e) {
238 return e.get();
239 }
240
241 namespace {
242 ///
243 /// Common argument checking shared by kernel invocation commands.
244 ///
245 void
validate_common(const command_queue & q,kernel & kern,const ref_vector<event> & deps)246 validate_common(const command_queue &q, kernel &kern,
247 const ref_vector<event> &deps) {
248 if (kern.program().context() != q.context() ||
249 any_of([&](const event &ev) {
250 return ev.context() != q.context();
251 }, deps))
252 throw error(CL_INVALID_CONTEXT);
253
254 if (any_of([](kernel::argument &arg) {
255 return !arg.set();
256 }, kern.args()))
257 throw error(CL_INVALID_KERNEL_ARGS);
258
259 // If the command queue's device is not associated to the program, we get
260 // a binary, with no sections, which will also fail the following test.
261 auto &b = kern.program().build(q.device()).bin;
262 if (!any_of(type_equals(binary::section::text_executable), b.secs))
263 throw error(CL_INVALID_PROGRAM_EXECUTABLE);
264 }
265
266 std::vector<size_t>
validate_grid_size(const command_queue & q,cl_uint dims,const size_t * d_grid_size)267 validate_grid_size(const command_queue &q, cl_uint dims,
268 const size_t *d_grid_size) {
269 auto grid_size = range(d_grid_size, dims);
270
271 if (dims < 1 || dims > q.device().max_block_size().size())
272 throw error(CL_INVALID_WORK_DIMENSION);
273
274 return grid_size;
275 }
276
277 std::vector<size_t>
validate_grid_offset(const command_queue & q,cl_uint dims,const size_t * d_grid_offset)278 validate_grid_offset(const command_queue &q, cl_uint dims,
279 const size_t *d_grid_offset) {
280 if (d_grid_offset)
281 return range(d_grid_offset, dims);
282 else
283 return std::vector<size_t>(dims, 0);
284 }
285
286 std::vector<size_t>
validate_block_size(const command_queue & q,const kernel & kern,cl_uint dims,const size_t * d_grid_size,const size_t * d_block_size)287 validate_block_size(const command_queue &q, const kernel &kern,
288 cl_uint dims, const size_t *d_grid_size,
289 const size_t *d_block_size) {
290 auto grid_size = range(d_grid_size, dims);
291
292 if (d_block_size) {
293 auto block_size = range(d_block_size, dims);
294
295 if (any_of(is_zero(), block_size) ||
296 any_of(greater(), block_size, q.device().max_block_size()))
297 throw error(CL_INVALID_WORK_ITEM_SIZE);
298
299 if (any_of(modulus(), grid_size, block_size))
300 throw error(CL_INVALID_WORK_GROUP_SIZE);
301
302 if (fold(multiplies(), 1u, block_size) >
303 q.device().max_threads_per_block())
304 throw error(CL_INVALID_WORK_GROUP_SIZE);
305
306 return block_size;
307
308 } else {
309 return kern.optimal_block_size(q, grid_size);
310 }
311 }
312 }
313
314 CLOVER_API cl_int
clEnqueueNDRangeKernel(cl_command_queue d_q,cl_kernel d_kern,cl_uint dims,const size_t * d_grid_offset,const size_t * d_grid_size,const size_t * d_block_size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)315 clEnqueueNDRangeKernel(cl_command_queue d_q, cl_kernel d_kern,
316 cl_uint dims, const size_t *d_grid_offset,
317 const size_t *d_grid_size, const size_t *d_block_size,
318 cl_uint num_deps, const cl_event *d_deps,
319 cl_event *rd_ev) try {
320 auto &q = obj(d_q);
321 auto &kern = obj(d_kern);
322 auto deps = objs<wait_list_tag>(d_deps, num_deps);
323 auto grid_size = validate_grid_size(q, dims, d_grid_size);
324 auto grid_offset = validate_grid_offset(q, dims, d_grid_offset);
325 auto block_size = validate_block_size(q, kern, dims,
326 d_grid_size, d_block_size);
327
328 validate_common(q, kern, deps);
329
330 auto hev = create<hard_event>(
331 q, CL_COMMAND_NDRANGE_KERNEL, deps,
332 [=, &kern, &q](event &) {
333 kern.launch(q, grid_offset, grid_size, block_size);
334 });
335
336 ret_object(rd_ev, hev);
337 return CL_SUCCESS;
338
339 } catch (error &e) {
340 return e.get();
341 }
342
343 CLOVER_API cl_int
clEnqueueTask(cl_command_queue d_q,cl_kernel d_kern,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)344 clEnqueueTask(cl_command_queue d_q, cl_kernel d_kern,
345 cl_uint num_deps, const cl_event *d_deps,
346 cl_event *rd_ev) try {
347 auto &q = obj(d_q);
348 auto &kern = obj(d_kern);
349 auto deps = objs<wait_list_tag>(d_deps, num_deps);
350
351 validate_common(q, kern, deps);
352
353 auto hev = create<hard_event>(
354 q, CL_COMMAND_TASK, deps,
355 [=, &kern, &q](event &) {
356 kern.launch(q, { 0 }, { 1 }, { 1 });
357 });
358
359 ret_object(rd_ev, hev);
360 return CL_SUCCESS;
361
362 } catch (error &e) {
363 return e.get();
364 }
365
366 CLOVER_API cl_int
clEnqueueNativeKernel(cl_command_queue d_q,void (* func)(void *),void * args,size_t args_size,cl_uint num_mems,const cl_mem * d_mems,const void ** mem_handles,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)367 clEnqueueNativeKernel(cl_command_queue d_q, void (*func)(void *),
368 void *args, size_t args_size,
369 cl_uint num_mems, const cl_mem *d_mems,
370 const void **mem_handles, cl_uint num_deps,
371 const cl_event *d_deps, cl_event *rd_ev) {
372 return CL_INVALID_OPERATION;
373 }
374
375 CLOVER_API cl_int
clSetKernelArgSVMPointer(cl_kernel d_kern,cl_uint arg_index,const void * arg_value)376 clSetKernelArgSVMPointer(cl_kernel d_kern,
377 cl_uint arg_index,
378 const void *arg_value) try {
379 if (!any_of(std::mem_fn(&device::svm_support), obj(d_kern).program().devices()))
380 return CL_INVALID_OPERATION;
381 obj(d_kern).args().at(arg_index).set_svm(arg_value);
382 return CL_SUCCESS;
383
384 } catch (std::out_of_range &) {
385 return CL_INVALID_ARG_INDEX;
386
387 } catch (error &e) {
388 return e.get();
389 }
390
391 CLOVER_API cl_int
clSetKernelExecInfo(cl_kernel d_kern,cl_kernel_exec_info param_name,size_t param_value_size,const void * param_value)392 clSetKernelExecInfo(cl_kernel d_kern,
393 cl_kernel_exec_info param_name,
394 size_t param_value_size,
395 const void *param_value) try {
396
397 if (!any_of(std::mem_fn(&device::svm_support), obj(d_kern).program().devices()))
398 return CL_INVALID_OPERATION;
399
400 auto &kern = obj(d_kern);
401
402 const bool has_system_svm = all_of(std::mem_fn(&device::has_system_svm),
403 kern.program().context().devices());
404
405 if (!param_value)
406 return CL_INVALID_VALUE;
407
408 switch (param_name) {
409 case CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM:
410 case CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM: {
411 if (param_value_size != sizeof(cl_bool))
412 return CL_INVALID_VALUE;
413
414 cl_bool val = *static_cast<const cl_bool*>(param_value);
415 if (val == CL_TRUE && !has_system_svm)
416 return CL_INVALID_OPERATION;
417 else
418 return CL_SUCCESS;
419 }
420
421 case CL_KERNEL_EXEC_INFO_SVM_PTRS:
422 case CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM:
423 if (has_system_svm)
424 return CL_SUCCESS;
425
426 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
427 return CL_INVALID_VALUE;
428
429 default:
430 return CL_INVALID_VALUE;
431 }
432
433 } catch (error &e) {
434 return e.get();
435 }
436