• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 //
22 
23 #include "api/util.hpp"
24 #include "core/kernel.hpp"
25 #include "core/event.hpp"
26 
27 using namespace clover;
28 
29 CLOVER_API cl_kernel
clCreateKernel(cl_program d_prog,const char * name,cl_int * r_errcode)30 clCreateKernel(cl_program d_prog, const char *name, cl_int *r_errcode) try {
31    auto &prog = obj(d_prog);
32 
33    if (!name)
34       throw error(CL_INVALID_VALUE);
35 
36    auto &sym = find(name_equals(name), prog.symbols());
37 
38    ret_error(r_errcode, CL_SUCCESS);
39    return new kernel(prog, name, range(sym.args));
40 
41 } catch (std::out_of_range &e) {
42    ret_error(r_errcode, CL_INVALID_KERNEL_NAME);
43    return NULL;
44 
45 } catch (error &e) {
46    ret_error(r_errcode, e);
47    return NULL;
48 }
49 
50 CLOVER_API cl_int
clCreateKernelsInProgram(cl_program d_prog,cl_uint count,cl_kernel * rd_kerns,cl_uint * r_count)51 clCreateKernelsInProgram(cl_program d_prog, cl_uint count,
52                          cl_kernel *rd_kerns, cl_uint *r_count) try {
53    auto &prog = obj(d_prog);
54    auto &syms = prog.symbols();
55 
56    if (rd_kerns && count < syms.size())
57       throw error(CL_INVALID_VALUE);
58 
59    if (rd_kerns)
60       copy(map([&](const module::symbol &sym) {
61                return desc(new kernel(prog,
62                                       std::string(sym.name.begin(),
63                                                   sym.name.end()),
64                                       range(sym.args)));
65             }, syms),
66          rd_kerns);
67 
68    if (r_count)
69       *r_count = syms.size();
70 
71    return CL_SUCCESS;
72 
73 } catch (error &e) {
74    return e.get();
75 }
76 
77 CLOVER_API cl_int
clRetainKernel(cl_kernel d_kern)78 clRetainKernel(cl_kernel d_kern) try {
79    obj(d_kern).retain();
80    return CL_SUCCESS;
81 
82 } catch (error &e) {
83    return e.get();
84 }
85 
86 CLOVER_API cl_int
clReleaseKernel(cl_kernel d_kern)87 clReleaseKernel(cl_kernel d_kern) try {
88    if (obj(d_kern).release())
89       delete pobj(d_kern);
90 
91    return CL_SUCCESS;
92 
93 } catch (error &e) {
94    return e.get();
95 }
96 
97 CLOVER_API cl_int
clSetKernelArg(cl_kernel d_kern,cl_uint idx,size_t size,const void * value)98 clSetKernelArg(cl_kernel d_kern, cl_uint idx, size_t size,
99                const void *value) try {
100    obj(d_kern).args().at(idx).set(size, value);
101    return CL_SUCCESS;
102 
103 } catch (std::out_of_range &e) {
104    return CL_INVALID_ARG_INDEX;
105 
106 } catch (error &e) {
107    return e.get();
108 }
109 
110 CLOVER_API cl_int
clGetKernelInfo(cl_kernel d_kern,cl_kernel_info param,size_t size,void * r_buf,size_t * r_size)111 clGetKernelInfo(cl_kernel d_kern, cl_kernel_info param,
112                 size_t size, void *r_buf, size_t *r_size) try {
113    property_buffer buf { r_buf, size, r_size };
114    auto &kern = obj(d_kern);
115 
116    switch (param) {
117    case CL_KERNEL_FUNCTION_NAME:
118       buf.as_string() = kern.name();
119       break;
120 
121    case CL_KERNEL_NUM_ARGS:
122       buf.as_scalar<cl_uint>() = kern.args().size();
123       break;
124 
125    case CL_KERNEL_REFERENCE_COUNT:
126       buf.as_scalar<cl_uint>() = kern.ref_count();
127       break;
128 
129    case CL_KERNEL_CONTEXT:
130       buf.as_scalar<cl_context>() = desc(kern.program().context());
131       break;
132 
133    case CL_KERNEL_PROGRAM:
134       buf.as_scalar<cl_program>() = desc(kern.program());
135       break;
136 
137    case CL_KERNEL_ATTRIBUTES:
138       buf.as_string() = find(name_equals(kern.name()), kern.program().symbols()).attributes;
139       break;
140 
141    default:
142       throw error(CL_INVALID_VALUE);
143    }
144 
145    return CL_SUCCESS;
146 
147 } catch (error &e) {
148    return e.get();
149 }
150 
151 CLOVER_API cl_int
clGetKernelWorkGroupInfo(cl_kernel d_kern,cl_device_id d_dev,cl_kernel_work_group_info param,size_t size,void * r_buf,size_t * r_size)152 clGetKernelWorkGroupInfo(cl_kernel d_kern, cl_device_id d_dev,
153                          cl_kernel_work_group_info param,
154                          size_t size, void *r_buf, size_t *r_size) try {
155    property_buffer buf { r_buf, size, r_size };
156    auto &kern = obj(d_kern);
157    auto &dev = (d_dev ? *pobj(d_dev) : unique(kern.program().devices()));
158 
159    if (!count(dev, kern.program().devices()))
160       throw error(CL_INVALID_DEVICE);
161 
162    switch (param) {
163    case CL_KERNEL_WORK_GROUP_SIZE:
164       buf.as_scalar<size_t>() = dev.max_threads_per_block();
165       break;
166 
167    case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
168       buf.as_vector<size_t>() = kern.required_block_size();
169       break;
170 
171    case CL_KERNEL_LOCAL_MEM_SIZE:
172       buf.as_scalar<cl_ulong>() = kern.mem_local();
173       break;
174 
175    case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
176       buf.as_scalar<size_t>() = dev.subgroup_size();
177       break;
178 
179    case CL_KERNEL_PRIVATE_MEM_SIZE:
180       buf.as_scalar<cl_ulong>() = kern.mem_private();
181       break;
182 
183    default:
184       throw error(CL_INVALID_VALUE);
185    }
186 
187    return CL_SUCCESS;
188 
189 } catch (error &e) {
190    return e.get();
191 
192 } catch (std::out_of_range &e) {
193    return CL_INVALID_DEVICE;
194 }
195 
196 CLOVER_API cl_int
clGetKernelArgInfo(cl_kernel d_kern,cl_uint idx,cl_kernel_arg_info param,size_t size,void * r_buf,size_t * r_size)197 clGetKernelArgInfo(cl_kernel d_kern,
198                    cl_uint idx, cl_kernel_arg_info param,
199                    size_t size, void *r_buf, size_t *r_size) try {
200    property_buffer buf { r_buf, size, r_size };
201 
202    auto info = obj(d_kern).args_infos().at(idx);
203 
204    if (info.arg_name.empty())
205       return CL_KERNEL_ARG_INFO_NOT_AVAILABLE;
206 
207    switch (param) {
208    case CL_KERNEL_ARG_ADDRESS_QUALIFIER:
209       buf.as_scalar<cl_kernel_arg_address_qualifier>() = info.address_qualifier;
210       break;
211 
212    case CL_KERNEL_ARG_ACCESS_QUALIFIER:
213       buf.as_scalar<cl_kernel_arg_access_qualifier>() = info.access_qualifier;
214       break;
215 
216    case CL_KERNEL_ARG_TYPE_NAME:
217       buf.as_string() = info.type_name;
218       break;
219 
220    case CL_KERNEL_ARG_TYPE_QUALIFIER:
221       buf.as_scalar<cl_kernel_arg_type_qualifier>() = info.type_qualifier;
222       break;
223 
224    case CL_KERNEL_ARG_NAME:
225       buf.as_string() = info.arg_name;
226       break;
227 
228    default:
229       throw error(CL_INVALID_VALUE);
230    }
231 
232    return CL_SUCCESS;
233 
234 } catch (std::out_of_range &e) {
235    return CL_INVALID_ARG_INDEX;
236 
237 } catch (error &e) {
238    return e.get();
239 }
240 
241 namespace {
242    ///
243    /// Common argument checking shared by kernel invocation commands.
244    ///
245    void
validate_common(const command_queue & q,kernel & kern,const ref_vector<event> & deps)246    validate_common(const command_queue &q, kernel &kern,
247                    const ref_vector<event> &deps) {
248       if (kern.program().context() != q.context() ||
249           any_of([&](const event &ev) {
250                 return ev.context() != q.context();
251              }, deps))
252          throw error(CL_INVALID_CONTEXT);
253 
254       if (any_of([](kernel::argument &arg) {
255                return !arg.set();
256             }, kern.args()))
257          throw error(CL_INVALID_KERNEL_ARGS);
258 
259       // If the command queue's device is not associated to the program, we get
260       // a module, with no sections, which will also fail the following test.
261       auto &m = kern.program().build(q.device()).binary;
262       if (!any_of(type_equals(module::section::text_executable), m.secs))
263          throw error(CL_INVALID_PROGRAM_EXECUTABLE);
264    }
265 
266    std::vector<size_t>
validate_grid_size(const command_queue & q,cl_uint dims,const size_t * d_grid_size)267    validate_grid_size(const command_queue &q, cl_uint dims,
268                       const size_t *d_grid_size) {
269       auto grid_size = range(d_grid_size, dims);
270 
271       if (dims < 1 || dims > q.device().max_block_size().size())
272          throw error(CL_INVALID_WORK_DIMENSION);
273 
274       if (!d_grid_size || any_of(is_zero(), grid_size))
275          throw error(CL_INVALID_GLOBAL_WORK_SIZE);
276 
277       return grid_size;
278    }
279 
280    std::vector<size_t>
validate_grid_offset(const command_queue & q,cl_uint dims,const size_t * d_grid_offset)281    validate_grid_offset(const command_queue &q, cl_uint dims,
282                         const size_t *d_grid_offset) {
283       if (d_grid_offset)
284          return range(d_grid_offset, dims);
285       else
286          return std::vector<size_t>(dims, 0);
287    }
288 
289    std::vector<size_t>
validate_block_size(const command_queue & q,const kernel & kern,cl_uint dims,const size_t * d_grid_size,const size_t * d_block_size)290    validate_block_size(const command_queue &q, const kernel &kern,
291                        cl_uint dims, const size_t *d_grid_size,
292                        const size_t *d_block_size) {
293       auto grid_size = range(d_grid_size, dims);
294 
295       if (d_block_size) {
296          auto block_size = range(d_block_size, dims);
297 
298          if (any_of(is_zero(), block_size) ||
299              any_of(greater(), block_size, q.device().max_block_size()))
300             throw error(CL_INVALID_WORK_ITEM_SIZE);
301 
302          if (any_of(modulus(), grid_size, block_size))
303             throw error(CL_INVALID_WORK_GROUP_SIZE);
304 
305          if (fold(multiplies(), 1u, block_size) >
306              q.device().max_threads_per_block())
307             throw error(CL_INVALID_WORK_GROUP_SIZE);
308 
309          return block_size;
310 
311       } else {
312          return kern.optimal_block_size(q, grid_size);
313       }
314    }
315 }
316 
317 CLOVER_API cl_int
clEnqueueNDRangeKernel(cl_command_queue d_q,cl_kernel d_kern,cl_uint dims,const size_t * d_grid_offset,const size_t * d_grid_size,const size_t * d_block_size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)318 clEnqueueNDRangeKernel(cl_command_queue d_q, cl_kernel d_kern,
319                        cl_uint dims, const size_t *d_grid_offset,
320                        const size_t *d_grid_size, const size_t *d_block_size,
321                        cl_uint num_deps, const cl_event *d_deps,
322                        cl_event *rd_ev) try {
323    auto &q = obj(d_q);
324    auto &kern = obj(d_kern);
325    auto deps = objs<wait_list_tag>(d_deps, num_deps);
326    auto grid_size = validate_grid_size(q, dims, d_grid_size);
327    auto grid_offset = validate_grid_offset(q, dims, d_grid_offset);
328    auto block_size = validate_block_size(q, kern, dims,
329                                          d_grid_size, d_block_size);
330 
331    validate_common(q, kern, deps);
332 
333    auto hev = create<hard_event>(
334       q, CL_COMMAND_NDRANGE_KERNEL, deps,
335       [=, &kern, &q](event &) {
336          kern.launch(q, grid_offset, grid_size, block_size);
337       });
338 
339    ret_object(rd_ev, hev);
340    return CL_SUCCESS;
341 
342 } catch (error &e) {
343    return e.get();
344 }
345 
346 CLOVER_API cl_int
clEnqueueTask(cl_command_queue d_q,cl_kernel d_kern,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)347 clEnqueueTask(cl_command_queue d_q, cl_kernel d_kern,
348               cl_uint num_deps, const cl_event *d_deps,
349               cl_event *rd_ev) try {
350    auto &q = obj(d_q);
351    auto &kern = obj(d_kern);
352    auto deps = objs<wait_list_tag>(d_deps, num_deps);
353 
354    validate_common(q, kern, deps);
355 
356    auto hev = create<hard_event>(
357       q, CL_COMMAND_TASK, deps,
358       [=, &kern, &q](event &) {
359          kern.launch(q, { 0 }, { 1 }, { 1 });
360       });
361 
362    ret_object(rd_ev, hev);
363    return CL_SUCCESS;
364 
365 } catch (error &e) {
366    return e.get();
367 }
368 
369 CLOVER_API cl_int
clEnqueueNativeKernel(cl_command_queue d_q,void (* func)(void *),void * args,size_t args_size,cl_uint num_mems,const cl_mem * d_mems,const void ** mem_handles,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)370 clEnqueueNativeKernel(cl_command_queue d_q, void (*func)(void *),
371                       void *args, size_t args_size,
372                       cl_uint num_mems, const cl_mem *d_mems,
373                       const void **mem_handles, cl_uint num_deps,
374                       const cl_event *d_deps, cl_event *rd_ev) {
375    return CL_INVALID_OPERATION;
376 }
377 
378 CLOVER_API cl_int
clSetKernelArgSVMPointer(cl_kernel d_kern,cl_uint arg_index,const void * arg_value)379 clSetKernelArgSVMPointer(cl_kernel d_kern,
380                          cl_uint arg_index,
381                          const void *arg_value) try {
382    obj(d_kern).args().at(arg_index).set_svm(arg_value);
383    return CL_SUCCESS;
384 
385 } catch (std::out_of_range &e) {
386    return CL_INVALID_ARG_INDEX;
387 
388 } catch (error &e) {
389    return e.get();
390 }
391 
392 CLOVER_API cl_int
clSetKernelExecInfo(cl_kernel d_kern,cl_kernel_exec_info param_name,size_t param_value_size,const void * param_value)393 clSetKernelExecInfo(cl_kernel d_kern,
394                     cl_kernel_exec_info param_name,
395                     size_t param_value_size,
396                     const void *param_value) try {
397    auto &kern = obj(d_kern);
398    const bool has_system_svm = all_of(std::mem_fn(&device::has_system_svm),
399                                       kern.program().context().devices());
400 
401    if (!param_value)
402       return CL_INVALID_VALUE;
403 
404    switch (param_name) {
405    case CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM:
406    case CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM: {
407       if (param_value_size != sizeof(cl_bool))
408          return CL_INVALID_VALUE;
409 
410       cl_bool val = *static_cast<const cl_bool*>(param_value);
411       if (val == CL_TRUE && !has_system_svm)
412          return CL_INVALID_OPERATION;
413       else
414          return CL_SUCCESS;
415    }
416 
417    case CL_KERNEL_EXEC_INFO_SVM_PTRS:
418    case CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM:
419       if (has_system_svm)
420          return CL_SUCCESS;
421 
422       CLOVER_NOT_SUPPORTED_UNTIL("2.0");
423       return CL_INVALID_VALUE;
424 
425    default:
426       return CL_INVALID_VALUE;
427    }
428 
429 } catch (error &e) {
430    return e.get();
431 }
432