1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 // OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 // SOFTWARE.
21 //
22
23 #include <cstring>
24
25 #include "api/util.hpp"
26 #include "core/event.hpp"
27 #include "core/resource.hpp"
28
29 using namespace clover;
30
31 namespace {
32 typedef resource::point point;
33
34 ///
35 /// Common argument checking shared by memory transfer commands.
36 ///
37 void
validate_base(cl_command_queue q,cl_uint num_deps,const cl_event * deps)38 validate_base(cl_command_queue q, cl_uint num_deps, const cl_event *deps) {
39 if (!q)
40 throw error(CL_INVALID_COMMAND_QUEUE);
41
42 if (bool(num_deps) != bool(deps) ||
43 any_of(is_zero<cl_event>(), deps, deps + num_deps))
44 throw error(CL_INVALID_EVENT_WAIT_LIST);
45
46 if (any_of([&](const cl_event ev) {
47 return &ev->ctx != &q->ctx;
48 }, deps, deps + num_deps))
49 throw error(CL_INVALID_CONTEXT);
50 }
51
52 ///
53 /// Memory object-specific argument checking shared by most memory
54 /// transfer commands.
55 ///
56 void
validate_obj(cl_command_queue q,cl_mem obj)57 validate_obj(cl_command_queue q, cl_mem obj) {
58 if (!obj)
59 throw error(CL_INVALID_MEM_OBJECT);
60
61 if (&obj->ctx != &q->ctx)
62 throw error(CL_INVALID_CONTEXT);
63 }
64
65 ///
66 /// Class that encapsulates the task of mapping an object of type
67 /// \a T. The return value of get() should be implicitly
68 /// convertible to \a void *.
69 ///
70 template<typename T> struct __map;
71
72 template<> struct __map<void *> {
73 static void *
get__anonbf9428360111::__map74 get(cl_command_queue q, void *obj, cl_map_flags flags,
75 size_t offset, size_t size) {
76 return (char *)obj + offset;
77 }
78 };
79
80 template<> struct __map<const void *> {
81 static const void *
get__anonbf9428360111::__map82 get(cl_command_queue q, const void *obj, cl_map_flags flags,
83 size_t offset, size_t size) {
84 return (const char *)obj + offset;
85 }
86 };
87
88 template<> struct __map<memory_obj *> {
89 static mapping
get__anonbf9428360111::__map90 get(cl_command_queue q, memory_obj *obj, cl_map_flags flags,
91 size_t offset, size_t size) {
92 return { *q, obj->resource(q), flags, true, { offset }, { size }};
93 }
94 };
95
96 ///
97 /// Software copy from \a src_obj to \a dst_obj. They can be
98 /// either pointers or memory objects.
99 ///
100 template<typename T, typename S>
101 std::function<void (event &)>
soft_copy_op(cl_command_queue q,T dst_obj,const point & dst_orig,const point & dst_pitch,S src_obj,const point & src_orig,const point & src_pitch,const point & region)102 soft_copy_op(cl_command_queue q,
103 T dst_obj, const point &dst_orig, const point &dst_pitch,
104 S src_obj, const point &src_orig, const point &src_pitch,
105 const point ®ion) {
106 return [=](event &) {
107 auto dst = __map<T>::get(q, dst_obj, CL_MAP_WRITE,
108 dst_pitch(dst_orig), dst_pitch(region));
109 auto src = __map<S>::get(q, src_obj, CL_MAP_READ,
110 src_pitch(src_orig), src_pitch(region));
111 point p;
112
113 for (p[2] = 0; p[2] < region[2]; ++p[2]) {
114 for (p[1] = 0; p[1] < region[1]; ++p[1]) {
115 std::memcpy(static_cast<char *>(dst) + dst_pitch(p),
116 static_cast<const char *>(src) + src_pitch(p),
117 src_pitch[0] * region[0]);
118 }
119 }
120 };
121 }
122
123 ///
124 /// Hardware copy from \a src_obj to \a dst_obj.
125 ///
126 template<typename T, typename S>
127 std::function<void (event &)>
hard_copy_op(cl_command_queue q,T dst_obj,const point & dst_orig,S src_obj,const point & src_orig,const point & region)128 hard_copy_op(cl_command_queue q, T dst_obj, const point &dst_orig,
129 S src_obj, const point &src_orig, const point ®ion) {
130 return [=](event &) {
131 dst_obj->resource(q).copy(*q, dst_orig, region,
132 src_obj->resource(q), src_orig);
133 };
134 }
135 }
136
137 PUBLIC cl_int
clEnqueueReadBuffer(cl_command_queue q,cl_mem obj,cl_bool blocking,size_t offset,size_t size,void * ptr,cl_uint num_deps,const cl_event * deps,cl_event * ev)138 clEnqueueReadBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking,
139 size_t offset, size_t size, void *ptr,
140 cl_uint num_deps, const cl_event *deps,
141 cl_event *ev) try {
142 validate_base(q, num_deps, deps);
143 validate_obj(q, obj);
144
145 if (!ptr || offset > obj->size() || offset + size > obj->size())
146 throw error(CL_INVALID_VALUE);
147
148 hard_event *hev = new hard_event(
149 *q, CL_COMMAND_READ_BUFFER, { deps, deps + num_deps },
150 soft_copy_op(q,
151 ptr, { 0 }, { 1 },
152 obj, { offset }, { 1 },
153 { size, 1, 1 }));
154
155 ret_object(ev, hev);
156 return CL_SUCCESS;
157
158 } catch (error &e) {
159 return e.get();
160 }
161
162 PUBLIC cl_int
clEnqueueWriteBuffer(cl_command_queue q,cl_mem obj,cl_bool blocking,size_t offset,size_t size,const void * ptr,cl_uint num_deps,const cl_event * deps,cl_event * ev)163 clEnqueueWriteBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking,
164 size_t offset, size_t size, const void *ptr,
165 cl_uint num_deps, const cl_event *deps,
166 cl_event *ev) try {
167 validate_base(q, num_deps, deps);
168 validate_obj(q, obj);
169
170 if (!ptr || offset > obj->size() || offset + size > obj->size())
171 throw error(CL_INVALID_VALUE);
172
173 hard_event *hev = new hard_event(
174 *q, CL_COMMAND_WRITE_BUFFER, { deps, deps + num_deps },
175 soft_copy_op(q,
176 obj, { offset }, { 1 },
177 ptr, { 0 }, { 1 },
178 { size, 1, 1 }));
179
180 ret_object(ev, hev);
181 return CL_SUCCESS;
182
183 } catch (error &e) {
184 return e.get();
185 }
186
187 PUBLIC cl_int
clEnqueueReadBufferRect(cl_command_queue q,cl_mem obj,cl_bool blocking,const size_t * obj_origin,const size_t * host_origin,const size_t * region,size_t obj_row_pitch,size_t obj_slice_pitch,size_t host_row_pitch,size_t host_slice_pitch,void * ptr,cl_uint num_deps,const cl_event * deps,cl_event * ev)188 clEnqueueReadBufferRect(cl_command_queue q, cl_mem obj, cl_bool blocking,
189 const size_t *obj_origin, const size_t *host_origin,
190 const size_t *region,
191 size_t obj_row_pitch, size_t obj_slice_pitch,
192 size_t host_row_pitch, size_t host_slice_pitch,
193 void *ptr,
194 cl_uint num_deps, const cl_event *deps,
195 cl_event *ev) try {
196 validate_base(q, num_deps, deps);
197 validate_obj(q, obj);
198
199 if (!ptr)
200 throw error(CL_INVALID_VALUE);
201
202 hard_event *hev = new hard_event(
203 *q, CL_COMMAND_READ_BUFFER_RECT, { deps, deps + num_deps },
204 soft_copy_op(q,
205 ptr, host_origin,
206 { 1, host_row_pitch, host_slice_pitch },
207 obj, obj_origin,
208 { 1, obj_row_pitch, obj_slice_pitch },
209 region));
210
211 ret_object(ev, hev);
212 return CL_SUCCESS;
213
214 } catch (error &e) {
215 return e.get();
216 }
217
218 PUBLIC cl_int
clEnqueueWriteBufferRect(cl_command_queue q,cl_mem obj,cl_bool blocking,const size_t * obj_origin,const size_t * host_origin,const size_t * region,size_t obj_row_pitch,size_t obj_slice_pitch,size_t host_row_pitch,size_t host_slice_pitch,const void * ptr,cl_uint num_deps,const cl_event * deps,cl_event * ev)219 clEnqueueWriteBufferRect(cl_command_queue q, cl_mem obj, cl_bool blocking,
220 const size_t *obj_origin, const size_t *host_origin,
221 const size_t *region,
222 size_t obj_row_pitch, size_t obj_slice_pitch,
223 size_t host_row_pitch, size_t host_slice_pitch,
224 const void *ptr,
225 cl_uint num_deps, const cl_event *deps,
226 cl_event *ev) try {
227 validate_base(q, num_deps, deps);
228 validate_obj(q, obj);
229
230 if (!ptr)
231 throw error(CL_INVALID_VALUE);
232
233 hard_event *hev = new hard_event(
234 *q, CL_COMMAND_WRITE_BUFFER_RECT, { deps, deps + num_deps },
235 soft_copy_op(q,
236 obj, obj_origin,
237 { 1, obj_row_pitch, obj_slice_pitch },
238 ptr, host_origin,
239 { 1, host_row_pitch, host_slice_pitch },
240 region));
241
242 ret_object(ev, hev);
243 return CL_SUCCESS;
244
245 } catch (error &e) {
246 return e.get();
247 }
248
249 PUBLIC cl_int
clEnqueueCopyBuffer(cl_command_queue q,cl_mem src_obj,cl_mem dst_obj,size_t src_offset,size_t dst_offset,size_t size,cl_uint num_deps,const cl_event * deps,cl_event * ev)250 clEnqueueCopyBuffer(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj,
251 size_t src_offset, size_t dst_offset, size_t size,
252 cl_uint num_deps, const cl_event *deps,
253 cl_event *ev) try {
254 validate_base(q, num_deps, deps);
255 validate_obj(q, src_obj);
256 validate_obj(q, dst_obj);
257
258 hard_event *hev = new hard_event(
259 *q, CL_COMMAND_COPY_BUFFER, { deps, deps + num_deps },
260 hard_copy_op(q, dst_obj, { dst_offset },
261 src_obj, { src_offset },
262 { size, 1, 1 }));
263
264 ret_object(ev, hev);
265 return CL_SUCCESS;
266
267 } catch (error &e) {
268 return e.get();
269 }
270
271 PUBLIC cl_int
clEnqueueCopyBufferRect(cl_command_queue q,cl_mem src_obj,cl_mem dst_obj,const size_t * src_origin,const size_t * dst_origin,const size_t * region,size_t src_row_pitch,size_t src_slice_pitch,size_t dst_row_pitch,size_t dst_slice_pitch,cl_uint num_deps,const cl_event * deps,cl_event * ev)272 clEnqueueCopyBufferRect(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj,
273 const size_t *src_origin, const size_t *dst_origin,
274 const size_t *region,
275 size_t src_row_pitch, size_t src_slice_pitch,
276 size_t dst_row_pitch, size_t dst_slice_pitch,
277 cl_uint num_deps, const cl_event *deps,
278 cl_event *ev) try {
279 validate_base(q, num_deps, deps);
280 validate_obj(q, src_obj);
281 validate_obj(q, dst_obj);
282
283 hard_event *hev = new hard_event(
284 *q, CL_COMMAND_COPY_BUFFER_RECT, { deps, deps + num_deps },
285 soft_copy_op(q,
286 dst_obj, dst_origin,
287 { 1, dst_row_pitch, dst_slice_pitch },
288 src_obj, src_origin,
289 { 1, src_row_pitch, src_slice_pitch },
290 region));
291
292 ret_object(ev, hev);
293 return CL_SUCCESS;
294
295 } catch (error &e) {
296 return e.get();
297 }
298
299 PUBLIC cl_int
clEnqueueReadImage(cl_command_queue q,cl_mem obj,cl_bool blocking,const size_t * origin,const size_t * region,size_t row_pitch,size_t slice_pitch,void * ptr,cl_uint num_deps,const cl_event * deps,cl_event * ev)300 clEnqueueReadImage(cl_command_queue q, cl_mem obj, cl_bool blocking,
301 const size_t *origin, const size_t *region,
302 size_t row_pitch, size_t slice_pitch, void *ptr,
303 cl_uint num_deps, const cl_event *deps,
304 cl_event *ev) try {
305 image *img = dynamic_cast<image *>(obj);
306
307 validate_base(q, num_deps, deps);
308 validate_obj(q, img);
309
310 if (!ptr)
311 throw error(CL_INVALID_VALUE);
312
313 hard_event *hev = new hard_event(
314 *q, CL_COMMAND_READ_IMAGE, { deps, deps + num_deps },
315 soft_copy_op(q,
316 ptr, {},
317 { 1, row_pitch, slice_pitch },
318 obj, origin,
319 { 1, img->row_pitch(), img->slice_pitch() },
320 region));
321
322 ret_object(ev, hev);
323 return CL_SUCCESS;
324
325 } catch (error &e) {
326 return e.get();
327 }
328
329 PUBLIC cl_int
clEnqueueWriteImage(cl_command_queue q,cl_mem obj,cl_bool blocking,const size_t * origin,const size_t * region,size_t row_pitch,size_t slice_pitch,const void * ptr,cl_uint num_deps,const cl_event * deps,cl_event * ev)330 clEnqueueWriteImage(cl_command_queue q, cl_mem obj, cl_bool blocking,
331 const size_t *origin, const size_t *region,
332 size_t row_pitch, size_t slice_pitch, const void *ptr,
333 cl_uint num_deps, const cl_event *deps,
334 cl_event *ev) try {
335 image *img = dynamic_cast<image *>(obj);
336
337 validate_base(q, num_deps, deps);
338 validate_obj(q, img);
339
340 if (!ptr)
341 throw error(CL_INVALID_VALUE);
342
343 hard_event *hev = new hard_event(
344 *q, CL_COMMAND_WRITE_IMAGE, { deps, deps + num_deps },
345 soft_copy_op(q,
346 obj, origin,
347 { 1, img->row_pitch(), img->slice_pitch() },
348 ptr, {},
349 { 1, row_pitch, slice_pitch },
350 region));
351
352 ret_object(ev, hev);
353 return CL_SUCCESS;
354
355 } catch (error &e) {
356 return e.get();
357 }
358
359 PUBLIC cl_int
clEnqueueCopyImage(cl_command_queue q,cl_mem src_obj,cl_mem dst_obj,const size_t * src_origin,const size_t * dst_origin,const size_t * region,cl_uint num_deps,const cl_event * deps,cl_event * ev)360 clEnqueueCopyImage(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj,
361 const size_t *src_origin, const size_t *dst_origin,
362 const size_t *region,
363 cl_uint num_deps, const cl_event *deps,
364 cl_event *ev) try {
365 image *src_img = dynamic_cast<image *>(src_obj);
366 image *dst_img = dynamic_cast<image *>(dst_obj);
367
368 validate_base(q, num_deps, deps);
369 validate_obj(q, src_img);
370 validate_obj(q, dst_img);
371
372 hard_event *hev = new hard_event(
373 *q, CL_COMMAND_COPY_IMAGE, { deps, deps + num_deps },
374 hard_copy_op(q, dst_obj, dst_origin, src_obj, src_origin, region));
375
376 ret_object(ev, hev);
377 return CL_SUCCESS;
378
379 } catch (error &e) {
380 return e.get();
381 }
382
383 PUBLIC cl_int
clEnqueueCopyImageToBuffer(cl_command_queue q,cl_mem src_obj,cl_mem dst_obj,const size_t * src_origin,const size_t * region,size_t dst_offset,cl_uint num_deps,const cl_event * deps,cl_event * ev)384 clEnqueueCopyImageToBuffer(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj,
385 const size_t *src_origin, const size_t *region,
386 size_t dst_offset,
387 cl_uint num_deps, const cl_event *deps,
388 cl_event *ev) try {
389 image *src_img = dynamic_cast<image *>(src_obj);
390
391 validate_base(q, num_deps, deps);
392 validate_obj(q, src_img);
393 validate_obj(q, dst_obj);
394
395 hard_event *hev = new hard_event(
396 *q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, { deps, deps + num_deps },
397 soft_copy_op(q,
398 dst_obj, { dst_offset },
399 { 0, 0, 0 },
400 src_obj, src_origin,
401 { 1, src_img->row_pitch(), src_img->slice_pitch() },
402 region));
403
404 ret_object(ev, hev);
405 return CL_SUCCESS;
406
407 } catch (error &e) {
408 return e.get();
409 }
410
411 PUBLIC cl_int
clEnqueueCopyBufferToImage(cl_command_queue q,cl_mem src_obj,cl_mem dst_obj,size_t src_offset,const size_t * dst_origin,const size_t * region,cl_uint num_deps,const cl_event * deps,cl_event * ev)412 clEnqueueCopyBufferToImage(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj,
413 size_t src_offset,
414 const size_t *dst_origin, const size_t *region,
415 cl_uint num_deps, const cl_event *deps,
416 cl_event *ev) try {
417 image *dst_img = dynamic_cast<image *>(src_obj);
418
419 validate_base(q, num_deps, deps);
420 validate_obj(q, src_obj);
421 validate_obj(q, dst_img);
422
423 hard_event *hev = new hard_event(
424 *q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, { deps, deps + num_deps },
425 soft_copy_op(q,
426 dst_obj, dst_origin,
427 { 1, dst_img->row_pitch(), dst_img->slice_pitch() },
428 src_obj, { src_offset },
429 { 0, 0, 0 },
430 region));
431
432 ret_object(ev, hev);
433 return CL_SUCCESS;
434
435 } catch (error &e) {
436 return e.get();
437 }
438
439 PUBLIC void *
clEnqueueMapBuffer(cl_command_queue q,cl_mem obj,cl_bool blocking,cl_map_flags flags,size_t offset,size_t size,cl_uint num_deps,const cl_event * deps,cl_event * ev,cl_int * errcode_ret)440 clEnqueueMapBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking,
441 cl_map_flags flags, size_t offset, size_t size,
442 cl_uint num_deps, const cl_event *deps,
443 cl_event *ev, cl_int *errcode_ret) try {
444 validate_base(q, num_deps, deps);
445 validate_obj(q, obj);
446
447 if (offset > obj->size() || offset + size > obj->size())
448 throw error(CL_INVALID_VALUE);
449
450 void *map = obj->resource(q).add_map(
451 *q, flags, blocking, { offset }, { size });
452
453 ret_object(ev, new hard_event(*q, CL_COMMAND_MAP_BUFFER,
454 { deps, deps + num_deps }));
455 ret_error(errcode_ret, CL_SUCCESS);
456 return map;
457
458 } catch (error &e) {
459 ret_error(errcode_ret, e);
460 return NULL;
461 }
462
463 PUBLIC void *
clEnqueueMapImage(cl_command_queue q,cl_mem obj,cl_bool blocking,cl_map_flags flags,const size_t * origin,const size_t * region,size_t * row_pitch,size_t * slice_pitch,cl_uint num_deps,const cl_event * deps,cl_event * ev,cl_int * errcode_ret)464 clEnqueueMapImage(cl_command_queue q, cl_mem obj, cl_bool blocking,
465 cl_map_flags flags,
466 const size_t *origin, const size_t *region,
467 size_t *row_pitch, size_t *slice_pitch,
468 cl_uint num_deps, const cl_event *deps,
469 cl_event *ev, cl_int *errcode_ret) try {
470 image *img = dynamic_cast<image *>(obj);
471
472 validate_base(q, num_deps, deps);
473 validate_obj(q, img);
474
475 void *map = obj->resource(q).add_map(
476 *q, flags, blocking, origin, region);
477
478 ret_object(ev, new hard_event(*q, CL_COMMAND_MAP_IMAGE,
479 { deps, deps + num_deps }));
480 ret_error(errcode_ret, CL_SUCCESS);
481 return map;
482
483 } catch (error &e) {
484 ret_error(errcode_ret, e);
485 return NULL;
486 }
487
488 PUBLIC cl_int
clEnqueueUnmapMemObject(cl_command_queue q,cl_mem obj,void * ptr,cl_uint num_deps,const cl_event * deps,cl_event * ev)489 clEnqueueUnmapMemObject(cl_command_queue q, cl_mem obj, void *ptr,
490 cl_uint num_deps, const cl_event *deps,
491 cl_event *ev) try {
492 validate_base(q, num_deps, deps);
493 validate_obj(q, obj);
494
495 hard_event *hev = new hard_event(
496 *q, CL_COMMAND_UNMAP_MEM_OBJECT, { deps, deps + num_deps },
497 [=](event &) {
498 obj->resource(q).del_map(ptr);
499 });
500
501 ret_object(ev, hev);
502 return CL_SUCCESS;
503
504 } catch (error &e) {
505 return e.get();
506 }
507