• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 // THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 // OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 // SOFTWARE.
21 //
22 
23 #include "api/util.hpp"
24 #include "core/kernel.hpp"
25 #include "core/event.hpp"
26 
27 using namespace clover;
28 
29 PUBLIC cl_kernel
clCreateKernel(cl_program prog,const char * name,cl_int * errcode_ret)30 clCreateKernel(cl_program prog, const char *name,
31                cl_int *errcode_ret) try {
32    if (!prog)
33       throw error(CL_INVALID_PROGRAM);
34 
35    if (!name)
36       throw error(CL_INVALID_VALUE);
37 
38    if (prog->binaries().empty())
39       throw error(CL_INVALID_PROGRAM_EXECUTABLE);
40 
41    auto sym = prog->binaries().begin()->second.sym(name);
42 
43    ret_error(errcode_ret, CL_SUCCESS);
44    return new kernel(*prog, name, { sym.args.begin(), sym.args.end() });
45 
46 } catch (module::noent_error &e) {
47    ret_error(errcode_ret, CL_INVALID_KERNEL_NAME);
48    return NULL;
49 
50 } catch(error &e) {
51    ret_error(errcode_ret, e);
52    return NULL;
53 }
54 
55 PUBLIC cl_int
clCreateKernelsInProgram(cl_program prog,cl_uint count,cl_kernel * kerns,cl_uint * count_ret)56 clCreateKernelsInProgram(cl_program prog, cl_uint count,
57                          cl_kernel *kerns, cl_uint *count_ret) {
58    if (!prog)
59       throw error(CL_INVALID_PROGRAM);
60 
61    if (prog->binaries().empty())
62       throw error(CL_INVALID_PROGRAM_EXECUTABLE);
63 
64    auto &syms = prog->binaries().begin()->second.syms;
65 
66    if (kerns && count < syms.size())
67       throw error(CL_INVALID_VALUE);
68 
69    if (kerns)
70       std::transform(syms.begin(), syms.end(), kerns,
71                      [=](const module::symbol &sym) {
72                         return new kernel(*prog, compat::string(sym.name),
73                                           { sym.args.begin(), sym.args.end() });
74                      });
75 
76    if (count_ret)
77       *count_ret = syms.size();
78 
79    return CL_SUCCESS;
80 }
81 
82 PUBLIC cl_int
clRetainKernel(cl_kernel kern)83 clRetainKernel(cl_kernel kern) {
84    if (!kern)
85       return CL_INVALID_KERNEL;
86 
87    kern->retain();
88    return CL_SUCCESS;
89 }
90 
91 PUBLIC cl_int
clReleaseKernel(cl_kernel kern)92 clReleaseKernel(cl_kernel kern) {
93    if (!kern)
94       return CL_INVALID_KERNEL;
95 
96    if (kern->release())
97       delete kern;
98 
99    return CL_SUCCESS;
100 }
101 
102 PUBLIC cl_int
clSetKernelArg(cl_kernel kern,cl_uint idx,size_t size,const void * value)103 clSetKernelArg(cl_kernel kern, cl_uint idx, size_t size,
104                const void *value) try {
105    if (!kern)
106       throw error(CL_INVALID_KERNEL);
107 
108    if (idx >= kern->args.size())
109       throw error(CL_INVALID_ARG_INDEX);
110 
111    kern->args[idx]->set(size, value);
112 
113    return CL_SUCCESS;
114 
115 } catch(error &e) {
116    return e.get();
117 }
118 
119 PUBLIC cl_int
clGetKernelInfo(cl_kernel kern,cl_kernel_info param,size_t size,void * buf,size_t * size_ret)120 clGetKernelInfo(cl_kernel kern, cl_kernel_info param,
121                 size_t size, void *buf, size_t *size_ret) {
122    if (!kern)
123       return CL_INVALID_KERNEL;
124 
125    switch (param) {
126    case CL_KERNEL_FUNCTION_NAME:
127       return string_property(buf, size, size_ret, kern->name());
128 
129    case CL_KERNEL_NUM_ARGS:
130       return scalar_property<cl_uint>(buf, size, size_ret,
131                                       kern->args.size());
132 
133    case CL_KERNEL_REFERENCE_COUNT:
134       return scalar_property<cl_uint>(buf, size, size_ret,
135                                       kern->ref_count());
136 
137    case CL_KERNEL_CONTEXT:
138       return scalar_property<cl_context>(buf, size, size_ret,
139                                          &kern->prog.ctx);
140 
141    case CL_KERNEL_PROGRAM:
142       return scalar_property<cl_program>(buf, size, size_ret,
143                                          &kern->prog);
144 
145    default:
146       return CL_INVALID_VALUE;
147    }
148 }
149 
150 PUBLIC cl_int
clGetKernelWorkGroupInfo(cl_kernel kern,cl_device_id dev,cl_kernel_work_group_info param,size_t size,void * buf,size_t * size_ret)151 clGetKernelWorkGroupInfo(cl_kernel kern, cl_device_id dev,
152                          cl_kernel_work_group_info param,
153                          size_t size, void *buf, size_t *size_ret) {
154    if (!kern)
155       return CL_INVALID_KERNEL;
156 
157    if ((!dev && kern->prog.binaries().size() != 1) ||
158        (dev && !kern->prog.binaries().count(dev)))
159       return CL_INVALID_DEVICE;
160 
161    switch (param) {
162    case CL_KERNEL_WORK_GROUP_SIZE:
163       return scalar_property<size_t>(buf, size, size_ret,
164                                      kern->max_block_size());
165 
166    case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
167       return vector_property<size_t>(buf, size, size_ret,
168                                      kern->block_size());
169 
170    case CL_KERNEL_LOCAL_MEM_SIZE:
171       return scalar_property<cl_ulong>(buf, size, size_ret,
172                                        kern->mem_local());
173 
174    case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
175       return scalar_property<size_t>(buf, size, size_ret, 1);
176 
177    case CL_KERNEL_PRIVATE_MEM_SIZE:
178       return scalar_property<cl_ulong>(buf, size, size_ret,
179                                        kern->mem_private());
180 
181    default:
182       return CL_INVALID_VALUE;
183    }
184 }
185 
186 namespace {
187    ///
188    /// Common argument checking shared by kernel invocation commands.
189    ///
190    void
kernel_validate(cl_command_queue q,cl_kernel kern,cl_uint dims,const size_t * grid_offset,const size_t * grid_size,const size_t * block_size,cl_uint num_deps,const cl_event * deps,cl_event * ev)191    kernel_validate(cl_command_queue q, cl_kernel kern,
192                    cl_uint dims, const size_t *grid_offset,
193                    const size_t *grid_size, const size_t *block_size,
194                    cl_uint num_deps, const cl_event *deps,
195                    cl_event *ev) {
196       if (!q)
197          throw error(CL_INVALID_COMMAND_QUEUE);
198 
199       if (!kern)
200          throw error(CL_INVALID_KERNEL);
201 
202       if (&kern->prog.ctx != &q->ctx ||
203           any_of([&](const cl_event ev) {
204                 return &ev->ctx != &q->ctx;
205              }, deps, deps + num_deps))
206          throw error(CL_INVALID_CONTEXT);
207 
208       if (bool(num_deps) != bool(deps) ||
209           any_of(is_zero<cl_event>(), deps, deps + num_deps))
210          throw error(CL_INVALID_EVENT_WAIT_LIST);
211 
212       if (any_of([](std::unique_ptr<kernel::argument> &arg) {
213                return !arg->set();
214             }, kern->args.begin(), kern->args.end()))
215          throw error(CL_INVALID_KERNEL_ARGS);
216 
217       if (!kern->prog.binaries().count(&q->dev))
218          throw error(CL_INVALID_PROGRAM_EXECUTABLE);
219 
220       if (dims < 1 || dims > q->dev.max_block_size().size())
221          throw error(CL_INVALID_WORK_DIMENSION);
222 
223       if (!grid_size || any_of(is_zero<size_t>(), grid_size, grid_size + dims))
224          throw error(CL_INVALID_GLOBAL_WORK_SIZE);
225 
226       if (block_size) {
227          if (any_of([](size_t b, size_t max) {
228                   return b == 0 || b > max;
229                }, block_size, block_size + dims,
230                q->dev.max_block_size().begin()))
231             throw error(CL_INVALID_WORK_ITEM_SIZE);
232 
233          if (any_of([](size_t b, size_t g) {
234                   return g % b;
235                }, block_size, block_size + dims, grid_size))
236             throw error(CL_INVALID_WORK_GROUP_SIZE);
237 
238          if (fold(std::multiplies<size_t>(), 1u,
239                   block_size, block_size + dims) >
240              q->dev.max_threads_per_block())
241             throw error(CL_INVALID_WORK_GROUP_SIZE);
242       }
243    }
244 
245    ///
246    /// Common event action shared by kernel invocation commands.
247    ///
248    std::function<void (event &)>
kernel_op(cl_command_queue q,cl_kernel kern,const std::vector<size_t> & grid_offset,const std::vector<size_t> & grid_size,const std::vector<size_t> & block_size)249    kernel_op(cl_command_queue q, cl_kernel kern,
250              const std::vector<size_t> &grid_offset,
251              const std::vector<size_t> &grid_size,
252              const std::vector<size_t> &block_size) {
253       const std::vector<size_t> reduced_grid_size = map(
254          std::divides<size_t>(), grid_size.begin(), grid_size.end(),
255          block_size.begin());
256 
257       return [=](event &) {
258          kern->launch(*q, grid_offset, reduced_grid_size, block_size);
259       };
260    }
261 
262    template<typename T, typename S>
263    std::vector<T>
opt_vector(const T * p,S n)264    opt_vector(const T *p, S n) {
265       if (p)
266          return { p, p + n };
267       else
268          return { n };
269    }
270 }
271 
272 PUBLIC cl_int
clEnqueueNDRangeKernel(cl_command_queue q,cl_kernel kern,cl_uint dims,const size_t * pgrid_offset,const size_t * pgrid_size,const size_t * pblock_size,cl_uint num_deps,const cl_event * deps,cl_event * ev)273 clEnqueueNDRangeKernel(cl_command_queue q, cl_kernel kern,
274                        cl_uint dims, const size_t *pgrid_offset,
275                        const size_t *pgrid_size, const size_t *pblock_size,
276                        cl_uint num_deps, const cl_event *deps,
277                        cl_event *ev) try {
278    const std::vector<size_t> grid_offset = opt_vector(pgrid_offset, dims);
279    const std::vector<size_t> grid_size = opt_vector(pgrid_size, dims);
280    const std::vector<size_t> block_size = opt_vector(pblock_size, dims);
281 
282    kernel_validate(q, kern, dims, pgrid_offset, pgrid_size, pblock_size,
283                    num_deps, deps, ev);
284 
285    hard_event *hev = new hard_event(
286       *q, CL_COMMAND_NDRANGE_KERNEL, { deps, deps + num_deps },
287       kernel_op(q, kern, grid_offset, grid_size, block_size));
288 
289    ret_object(ev, hev);
290    return CL_SUCCESS;
291 
292 } catch(error &e) {
293    return e.get();
294 }
295 
296 PUBLIC cl_int
clEnqueueTask(cl_command_queue q,cl_kernel kern,cl_uint num_deps,const cl_event * deps,cl_event * ev)297 clEnqueueTask(cl_command_queue q, cl_kernel kern,
298               cl_uint num_deps, const cl_event *deps,
299               cl_event *ev) try {
300    const std::vector<size_t> grid_offset = { 0 };
301    const std::vector<size_t> grid_size = { 1 };
302    const std::vector<size_t> block_size = { 1 };
303 
304    kernel_validate(q, kern, 1, grid_offset.data(), grid_size.data(),
305                    block_size.data(), num_deps, deps, ev);
306 
307    hard_event *hev = new hard_event(
308       *q, CL_COMMAND_TASK, { deps, deps + num_deps },
309       kernel_op(q, kern, grid_offset, grid_size, block_size));
310 
311    ret_object(ev, hev);
312    return CL_SUCCESS;
313 
314 } catch(error &e) {
315    return e.get();
316 }
317 
318 PUBLIC cl_int
clEnqueueNativeKernel(cl_command_queue q,void (* func)(void *),void * args,size_t args_size,cl_uint obj_count,const cl_mem * obj_list,const void ** obj_args,cl_uint num_deps,const cl_event * deps,cl_event * ev)319 clEnqueueNativeKernel(cl_command_queue q, void (*func)(void *),
320                       void *args, size_t args_size,
321                       cl_uint obj_count, const cl_mem *obj_list,
322                       const void **obj_args, cl_uint num_deps,
323                       const cl_event *deps, cl_event *ev) {
324    return CL_INVALID_OPERATION;
325 }
326