1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 //
22
23 #include <cstring>
24
25 #include "api/util.hpp"
26 #include "core/event.hpp"
27 #include "core/memory.hpp"
28
29 using namespace clover;
30
31 namespace {
32 typedef resource::vector vector_t;
33
34 vector_t
vector(const size_t * p)35 vector(const size_t *p) {
36 return range(p, 3);
37 }
38
39 vector_t
pitch(const vector_t & region,vector_t pitch)40 pitch(const vector_t ®ion, vector_t pitch) {
41 for (auto x : zip(tail(pitch),
42 map(multiplies(), region, pitch))) {
43 // The spec defines a value of zero as the natural pitch,
44 // i.e. the unaligned size of the previous dimension.
45 if (std::get<0>(x) == 0)
46 std::get<0>(x) = std::get<1>(x);
47 }
48
49 return pitch;
50 }
51
52 ///
53 /// Size of a region in bytes.
54 ///
55 size_t
size(const vector_t & pitch,const vector_t & region)56 size(const vector_t &pitch, const vector_t ®ion) {
57 if (any_of(is_zero(), region))
58 return 0;
59 else
60 return dot(pitch, region - vector_t{ 0, 1, 1 });
61 }
62
63 ///
64 /// Common argument checking shared by memory transfer commands.
65 ///
66 void
validate_common(command_queue & q,const ref_vector<event> & deps)67 validate_common(command_queue &q,
68 const ref_vector<event> &deps) {
69 if (any_of([&](const event &ev) {
70 return ev.context() != q.context();
71 }, deps))
72 throw error(CL_INVALID_CONTEXT);
73 }
74
75 ///
76 /// Common error checking for a buffer object argument.
77 ///
78 void
validate_object(command_queue & q,buffer & mem,const vector_t & origin,const vector_t & pitch,const vector_t & region)79 validate_object(command_queue &q, buffer &mem, const vector_t &origin,
80 const vector_t &pitch, const vector_t ®ion) {
81 if (mem.context() != q.context())
82 throw error(CL_INVALID_CONTEXT);
83
84 // The region must fit within the specified pitch,
85 if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
86 throw error(CL_INVALID_VALUE);
87
88 // ...and within the specified object.
89 if (dot(pitch, origin) + size(pitch, region) > mem.size())
90 throw error(CL_INVALID_VALUE);
91
92 if (any_of(is_zero(), region))
93 throw error(CL_INVALID_VALUE);
94 }
95
96 ///
97 /// Common error checking for an image argument.
98 ///
99 void
validate_object(command_queue & q,image & img,const vector_t & orig,const vector_t & region)100 validate_object(command_queue &q, image &img,
101 const vector_t &orig, const vector_t ®ion) {
102 vector_t size = { img.width(), img.height(), img.depth() };
103
104 if (!q.device().image_support())
105 throw error(CL_INVALID_OPERATION);
106
107 if (img.context() != q.context())
108 throw error(CL_INVALID_CONTEXT);
109
110 if (any_of(greater(), orig + region, size))
111 throw error(CL_INVALID_VALUE);
112
113 if (any_of(is_zero(), region))
114 throw error(CL_INVALID_VALUE);
115 }
116
117 ///
118 /// Common error checking for a host pointer argument.
119 ///
120 void
validate_object(command_queue & q,const void * ptr,const vector_t & orig,const vector_t & pitch,const vector_t & region)121 validate_object(command_queue &q, const void *ptr, const vector_t &orig,
122 const vector_t &pitch, const vector_t ®ion) {
123 if (!ptr)
124 throw error(CL_INVALID_VALUE);
125
126 // The region must fit within the specified pitch.
127 if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
128 throw error(CL_INVALID_VALUE);
129 }
130
131 ///
132 /// Common argument checking for a copy between two buffer objects.
133 ///
134 void
validate_copy(command_queue & q,buffer & dst_mem,const vector_t & dst_orig,const vector_t & dst_pitch,buffer & src_mem,const vector_t & src_orig,const vector_t & src_pitch,const vector_t & region)135 validate_copy(command_queue &q, buffer &dst_mem,
136 const vector_t &dst_orig, const vector_t &dst_pitch,
137 buffer &src_mem,
138 const vector_t &src_orig, const vector_t &src_pitch,
139 const vector_t ®ion) {
140 if (dst_mem == src_mem) {
141 auto dst_offset = dot(dst_pitch, dst_orig);
142 auto src_offset = dot(src_pitch, src_orig);
143
144 if (interval_overlaps()(
145 dst_offset, dst_offset + size(dst_pitch, region),
146 src_offset, src_offset + size(src_pitch, region)))
147 throw error(CL_MEM_COPY_OVERLAP);
148 }
149 }
150
151 ///
152 /// Common argument checking for a copy between two image objects.
153 ///
154 void
validate_copy(command_queue & q,image & dst_img,const vector_t & dst_orig,image & src_img,const vector_t & src_orig,const vector_t & region)155 validate_copy(command_queue &q,
156 image &dst_img, const vector_t &dst_orig,
157 image &src_img, const vector_t &src_orig,
158 const vector_t ®ion) {
159 if (dst_img.format() != src_img.format())
160 throw error(CL_IMAGE_FORMAT_MISMATCH);
161
162 if (dst_img == src_img) {
163 if (all_of(interval_overlaps(),
164 dst_orig, dst_orig + region,
165 src_orig, src_orig + region))
166 throw error(CL_MEM_COPY_OVERLAP);
167 }
168 }
169
170 ///
171 /// Checks that the host access flags of the memory object are
172 /// within the allowed set \a flags.
173 ///
174 void
validate_object_access(const memory_obj & mem,const cl_mem_flags flags)175 validate_object_access(const memory_obj &mem, const cl_mem_flags flags) {
176 if (mem.flags() & ~flags &
177 (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY |
178 CL_MEM_HOST_NO_ACCESS))
179 throw error(CL_INVALID_OPERATION);
180 }
181
182 ///
183 /// Checks that the mapping flags are correct.
184 ///
185 void
validate_map_flags(const memory_obj & mem,const cl_map_flags flags)186 validate_map_flags(const memory_obj &mem, const cl_map_flags flags) {
187 if ((flags & (CL_MAP_WRITE | CL_MAP_READ)) &&
188 (flags & CL_MAP_WRITE_INVALIDATE_REGION))
189 throw error(CL_INVALID_VALUE);
190
191 if (flags & CL_MAP_READ)
192 validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
193
194 if (flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
195 validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
196 }
197
198 ///
199 /// Class that encapsulates the task of mapping an object of type
200 /// \a T. The return value of get() should be implicitly
201 /// convertible to \a void *.
202 ///
203 template<typename T>
204 struct _map {
205 static mapping
get__anon25df39fe0111::_map206 get(command_queue &q, T obj, cl_map_flags flags,
207 size_t offset, size_t size) {
208 return { q, obj->resource(q), flags, true,
209 {{ offset }}, {{ size, 1, 1 }} };
210 }
211 };
212
213 template<>
214 struct _map<void *> {
215 static void *
get__anon25df39fe0111::_map216 get(command_queue &q, void *obj, cl_map_flags flags,
217 size_t offset, size_t size) {
218 return (char *)obj + offset;
219 }
220 };
221
222 template<>
223 struct _map<const void *> {
224 static const void *
get__anon25df39fe0111::_map225 get(command_queue &q, const void *obj, cl_map_flags flags,
226 size_t offset, size_t size) {
227 return (const char *)obj + offset;
228 }
229 };
230
231 ///
232 /// Software copy from \a src_obj to \a dst_obj. They can be
233 /// either pointers or memory objects.
234 ///
235 template<typename T, typename S>
236 std::function<void (event &)>
soft_copy_op(command_queue & q,T dst_obj,const vector_t & dst_orig,const vector_t & dst_pitch,S src_obj,const vector_t & src_orig,const vector_t & src_pitch,const vector_t & region)237 soft_copy_op(command_queue &q,
238 T dst_obj, const vector_t &dst_orig, const vector_t &dst_pitch,
239 S src_obj, const vector_t &src_orig, const vector_t &src_pitch,
240 const vector_t ®ion) {
241 return [=, &q](event &) {
242 auto dst = _map<T>::get(q, dst_obj, CL_MAP_WRITE,
243 dot(dst_pitch, dst_orig),
244 size(dst_pitch, region));
245 auto src = _map<S>::get(q, src_obj, CL_MAP_READ,
246 dot(src_pitch, src_orig),
247 size(src_pitch, region));
248 vector_t v = {};
249
250 for (v[2] = 0; v[2] < region[2]; ++v[2]) {
251 for (v[1] = 0; v[1] < region[1]; ++v[1]) {
252 std::memcpy(
253 static_cast<char *>(dst) + dot(dst_pitch, v),
254 static_cast<const char *>(src) + dot(src_pitch, v),
255 src_pitch[0] * region[0]);
256 }
257 }
258 };
259 }
260
261 ///
262 /// Hardware copy from \a src_obj to \a dst_obj.
263 ///
264 template<typename T, typename S>
265 std::function<void (event &)>
hard_copy_op(command_queue & q,T dst_obj,const vector_t & dst_orig,S src_obj,const vector_t & src_orig,const vector_t & region)266 hard_copy_op(command_queue &q, T dst_obj, const vector_t &dst_orig,
267 S src_obj, const vector_t &src_orig, const vector_t ®ion) {
268 return [=, &q](event &) {
269 dst_obj->resource(q).copy(q, dst_orig, region,
270 src_obj->resource(q), src_orig);
271 };
272 }
273 }
274
275 CLOVER_API cl_int
clEnqueueReadBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,size_t offset,size_t size,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)276 clEnqueueReadBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
277 size_t offset, size_t size, void *ptr,
278 cl_uint num_deps, const cl_event *d_deps,
279 cl_event *rd_ev) try {
280 auto &q = obj(d_q);
281 auto &mem = obj<buffer>(d_mem);
282 auto deps = objs<wait_list_tag>(d_deps, num_deps);
283 vector_t region = { size, 1, 1 };
284 vector_t obj_origin = { offset };
285 auto obj_pitch = pitch(region, {{ 1 }});
286
287 validate_common(q, deps);
288 validate_object(q, ptr, {}, obj_pitch, region);
289 validate_object(q, mem, obj_origin, obj_pitch, region);
290 validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
291
292 auto hev = create<hard_event>(
293 q, CL_COMMAND_READ_BUFFER, deps,
294 soft_copy_op(q, ptr, {}, obj_pitch,
295 &mem, obj_origin, obj_pitch,
296 region));
297
298 if (blocking)
299 hev().wait_signalled();
300
301 ret_object(rd_ev, hev);
302 return CL_SUCCESS;
303
304 } catch (error &e) {
305 return e.get();
306 }
307
308 CLOVER_API cl_int
clEnqueueWriteBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,size_t offset,size_t size,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)309 clEnqueueWriteBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
310 size_t offset, size_t size, const void *ptr,
311 cl_uint num_deps, const cl_event *d_deps,
312 cl_event *rd_ev) try {
313 auto &q = obj(d_q);
314 auto &mem = obj<buffer>(d_mem);
315 auto deps = objs<wait_list_tag>(d_deps, num_deps);
316 vector_t region = { size, 1, 1 };
317 vector_t obj_origin = { offset };
318 auto obj_pitch = pitch(region, {{ 1 }});
319
320 validate_common(q, deps);
321 validate_object(q, mem, obj_origin, obj_pitch, region);
322 validate_object(q, ptr, {}, obj_pitch, region);
323 validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
324
325 auto hev = create<hard_event>(
326 q, CL_COMMAND_WRITE_BUFFER, deps,
327 soft_copy_op(q, &mem, obj_origin, obj_pitch,
328 ptr, {}, obj_pitch,
329 region));
330
331 if (blocking)
332 hev().wait_signalled();
333
334 ret_object(rd_ev, hev);
335 return CL_SUCCESS;
336
337 } catch (error &e) {
338 return e.get();
339 }
340
341 CLOVER_API cl_int
clEnqueueReadBufferRect(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_obj_origin,const size_t * p_host_origin,const size_t * p_region,size_t obj_row_pitch,size_t obj_slice_pitch,size_t host_row_pitch,size_t host_slice_pitch,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)342 clEnqueueReadBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
343 const size_t *p_obj_origin,
344 const size_t *p_host_origin,
345 const size_t *p_region,
346 size_t obj_row_pitch, size_t obj_slice_pitch,
347 size_t host_row_pitch, size_t host_slice_pitch,
348 void *ptr,
349 cl_uint num_deps, const cl_event *d_deps,
350 cl_event *rd_ev) try {
351 auto &q = obj(d_q);
352 auto &mem = obj<buffer>(d_mem);
353 auto deps = objs<wait_list_tag>(d_deps, num_deps);
354 auto region = vector(p_region);
355 auto obj_origin = vector(p_obj_origin);
356 auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
357 auto host_origin = vector(p_host_origin);
358 auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
359
360 validate_common(q, deps);
361 validate_object(q, ptr, host_origin, host_pitch, region);
362 validate_object(q, mem, obj_origin, obj_pitch, region);
363 validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
364
365 auto hev = create<hard_event>(
366 q, CL_COMMAND_READ_BUFFER_RECT, deps,
367 soft_copy_op(q, ptr, host_origin, host_pitch,
368 &mem, obj_origin, obj_pitch,
369 region));
370
371 if (blocking)
372 hev().wait_signalled();
373
374 ret_object(rd_ev, hev);
375 return CL_SUCCESS;
376
377 } catch (error &e) {
378 return e.get();
379 }
380
381 CLOVER_API cl_int
clEnqueueWriteBufferRect(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_obj_origin,const size_t * p_host_origin,const size_t * p_region,size_t obj_row_pitch,size_t obj_slice_pitch,size_t host_row_pitch,size_t host_slice_pitch,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)382 clEnqueueWriteBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
383 const size_t *p_obj_origin,
384 const size_t *p_host_origin,
385 const size_t *p_region,
386 size_t obj_row_pitch, size_t obj_slice_pitch,
387 size_t host_row_pitch, size_t host_slice_pitch,
388 const void *ptr,
389 cl_uint num_deps, const cl_event *d_deps,
390 cl_event *rd_ev) try {
391 auto &q = obj(d_q);
392 auto &mem = obj<buffer>(d_mem);
393 auto deps = objs<wait_list_tag>(d_deps, num_deps);
394 auto region = vector(p_region);
395 auto obj_origin = vector(p_obj_origin);
396 auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
397 auto host_origin = vector(p_host_origin);
398 auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
399
400 validate_common(q, deps);
401 validate_object(q, mem, obj_origin, obj_pitch, region);
402 validate_object(q, ptr, host_origin, host_pitch, region);
403 validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
404
405 auto hev = create<hard_event>(
406 q, CL_COMMAND_WRITE_BUFFER_RECT, deps,
407 soft_copy_op(q, &mem, obj_origin, obj_pitch,
408 ptr, host_origin, host_pitch,
409 region));
410
411 if (blocking)
412 hev().wait_signalled();
413
414 ret_object(rd_ev, hev);
415 return CL_SUCCESS;
416
417 } catch (error &e) {
418 return e.get();
419 }
420
421 CLOVER_API cl_int
clEnqueueCopyBuffer(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,size_t src_offset,size_t dst_offset,size_t size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)422 clEnqueueCopyBuffer(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
423 size_t src_offset, size_t dst_offset, size_t size,
424 cl_uint num_deps, const cl_event *d_deps,
425 cl_event *rd_ev) try {
426 auto &q = obj(d_q);
427 auto &src_mem = obj<buffer>(d_src_mem);
428 auto &dst_mem = obj<buffer>(d_dst_mem);
429 auto deps = objs<wait_list_tag>(d_deps, num_deps);
430 vector_t region = { size, 1, 1 };
431 vector_t dst_origin = { dst_offset };
432 auto dst_pitch = pitch(region, {{ 1 }});
433 vector_t src_origin = { src_offset };
434 auto src_pitch = pitch(region, {{ 1 }});
435
436 validate_common(q, deps);
437 validate_object(q, dst_mem, dst_origin, dst_pitch, region);
438 validate_object(q, src_mem, src_origin, src_pitch, region);
439 validate_copy(q, dst_mem, dst_origin, dst_pitch,
440 src_mem, src_origin, src_pitch, region);
441
442 auto hev = create<hard_event>(
443 q, CL_COMMAND_COPY_BUFFER, deps,
444 hard_copy_op(q, &dst_mem, dst_origin,
445 &src_mem, src_origin, region));
446
447 ret_object(rd_ev, hev);
448 return CL_SUCCESS;
449
450 } catch (error &e) {
451 return e.get();
452 }
453
454 CLOVER_API cl_int
clEnqueueCopyBufferRect(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_dst_origin,const size_t * p_region,size_t src_row_pitch,size_t src_slice_pitch,size_t dst_row_pitch,size_t dst_slice_pitch,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)455 clEnqueueCopyBufferRect(cl_command_queue d_q, cl_mem d_src_mem,
456 cl_mem d_dst_mem,
457 const size_t *p_src_origin, const size_t *p_dst_origin,
458 const size_t *p_region,
459 size_t src_row_pitch, size_t src_slice_pitch,
460 size_t dst_row_pitch, size_t dst_slice_pitch,
461 cl_uint num_deps, const cl_event *d_deps,
462 cl_event *rd_ev) try {
463 auto &q = obj(d_q);
464 auto &src_mem = obj<buffer>(d_src_mem);
465 auto &dst_mem = obj<buffer>(d_dst_mem);
466 auto deps = objs<wait_list_tag>(d_deps, num_deps);
467 auto region = vector(p_region);
468 auto dst_origin = vector(p_dst_origin);
469 auto dst_pitch = pitch(region, {{ 1, dst_row_pitch, dst_slice_pitch }});
470 auto src_origin = vector(p_src_origin);
471 auto src_pitch = pitch(region, {{ 1, src_row_pitch, src_slice_pitch }});
472
473 validate_common(q, deps);
474 validate_object(q, dst_mem, dst_origin, dst_pitch, region);
475 validate_object(q, src_mem, src_origin, src_pitch, region);
476 validate_copy(q, dst_mem, dst_origin, dst_pitch,
477 src_mem, src_origin, src_pitch, region);
478
479 auto hev = create<hard_event>(
480 q, CL_COMMAND_COPY_BUFFER_RECT, deps,
481 soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
482 &src_mem, src_origin, src_pitch,
483 region));
484
485 ret_object(rd_ev, hev);
486 return CL_SUCCESS;
487
488 } catch (error &e) {
489 return e.get();
490 }
491
492 CLOVER_API cl_int
clEnqueueReadImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_origin,const size_t * p_region,size_t row_pitch,size_t slice_pitch,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)493 clEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
494 const size_t *p_origin, const size_t *p_region,
495 size_t row_pitch, size_t slice_pitch, void *ptr,
496 cl_uint num_deps, const cl_event *d_deps,
497 cl_event *rd_ev) try {
498 auto &q = obj(d_q);
499 auto &img = obj<image>(d_mem);
500 auto deps = objs<wait_list_tag>(d_deps, num_deps);
501 auto region = vector(p_region);
502 auto dst_pitch = pitch(region, {{ img.pixel_size(),
503 row_pitch, slice_pitch }});
504 auto src_origin = vector(p_origin);
505 auto src_pitch = pitch(region, {{ img.pixel_size(),
506 img.row_pitch(), img.slice_pitch() }});
507
508 validate_common(q, deps);
509 validate_object(q, ptr, {}, dst_pitch, region);
510 validate_object(q, img, src_origin, region);
511 validate_object_access(img, CL_MEM_HOST_READ_ONLY);
512
513 auto hev = create<hard_event>(
514 q, CL_COMMAND_READ_IMAGE, deps,
515 soft_copy_op(q, ptr, {}, dst_pitch,
516 &img, src_origin, src_pitch,
517 region));
518
519 if (blocking)
520 hev().wait_signalled();
521
522 ret_object(rd_ev, hev);
523 return CL_SUCCESS;
524
525 } catch (error &e) {
526 return e.get();
527 }
528
529 CLOVER_API cl_int
clEnqueueWriteImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_origin,const size_t * p_region,size_t row_pitch,size_t slice_pitch,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)530 clEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
531 const size_t *p_origin, const size_t *p_region,
532 size_t row_pitch, size_t slice_pitch, const void *ptr,
533 cl_uint num_deps, const cl_event *d_deps,
534 cl_event *rd_ev) try {
535 auto &q = obj(d_q);
536 auto &img = obj<image>(d_mem);
537 auto deps = objs<wait_list_tag>(d_deps, num_deps);
538 auto region = vector(p_region);
539 auto dst_origin = vector(p_origin);
540 auto dst_pitch = pitch(region, {{ img.pixel_size(),
541 img.row_pitch(), img.slice_pitch() }});
542 auto src_pitch = pitch(region, {{ img.pixel_size(),
543 row_pitch, slice_pitch }});
544
545 validate_common(q, deps);
546 validate_object(q, img, dst_origin, region);
547 validate_object(q, ptr, {}, src_pitch, region);
548 validate_object_access(img, CL_MEM_HOST_WRITE_ONLY);
549
550 auto hev = create<hard_event>(
551 q, CL_COMMAND_WRITE_IMAGE, deps,
552 soft_copy_op(q, &img, dst_origin, dst_pitch,
553 ptr, {}, src_pitch,
554 region));
555
556 if (blocking)
557 hev().wait_signalled();
558
559 ret_object(rd_ev, hev);
560 return CL_SUCCESS;
561
562 } catch (error &e) {
563 return e.get();
564 }
565
566 CLOVER_API cl_int
clEnqueueCopyImage(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_dst_origin,const size_t * p_region,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)567 clEnqueueCopyImage(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
568 const size_t *p_src_origin, const size_t *p_dst_origin,
569 const size_t *p_region,
570 cl_uint num_deps, const cl_event *d_deps,
571 cl_event *rd_ev) try {
572 auto &q = obj(d_q);
573 auto &src_img = obj<image>(d_src_mem);
574 auto &dst_img = obj<image>(d_dst_mem);
575 auto deps = objs<wait_list_tag>(d_deps, num_deps);
576 auto region = vector(p_region);
577 auto dst_origin = vector(p_dst_origin);
578 auto src_origin = vector(p_src_origin);
579
580 validate_common(q, deps);
581 validate_object(q, dst_img, dst_origin, region);
582 validate_object(q, src_img, src_origin, region);
583 validate_copy(q, dst_img, dst_origin, src_img, src_origin, region);
584
585 auto hev = create<hard_event>(
586 q, CL_COMMAND_COPY_IMAGE, deps,
587 hard_copy_op(q, &dst_img, dst_origin,
588 &src_img, src_origin,
589 region));
590
591 ret_object(rd_ev, hev);
592 return CL_SUCCESS;
593
594 } catch (error &e) {
595 return e.get();
596 }
597
598 CLOVER_API cl_int
clEnqueueCopyImageToBuffer(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_region,size_t dst_offset,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)599 clEnqueueCopyImageToBuffer(cl_command_queue d_q,
600 cl_mem d_src_mem, cl_mem d_dst_mem,
601 const size_t *p_src_origin, const size_t *p_region,
602 size_t dst_offset,
603 cl_uint num_deps, const cl_event *d_deps,
604 cl_event *rd_ev) try {
605 auto &q = obj(d_q);
606 auto &src_img = obj<image>(d_src_mem);
607 auto &dst_mem = obj<buffer>(d_dst_mem);
608 auto deps = objs<wait_list_tag>(d_deps, num_deps);
609 auto region = vector(p_region);
610 vector_t dst_origin = { dst_offset };
611 auto dst_pitch = pitch(region, {{ src_img.pixel_size() }});
612 auto src_origin = vector(p_src_origin);
613 auto src_pitch = pitch(region, {{ src_img.pixel_size(),
614 src_img.row_pitch(),
615 src_img.slice_pitch() }});
616
617 validate_common(q, deps);
618 validate_object(q, dst_mem, dst_origin, dst_pitch, region);
619 validate_object(q, src_img, src_origin, region);
620
621 auto hev = create<hard_event>(
622 q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, deps,
623 soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
624 &src_img, src_origin, src_pitch,
625 region));
626
627 ret_object(rd_ev, hev);
628 return CL_SUCCESS;
629
630 } catch (error &e) {
631 return e.get();
632 }
633
634 CLOVER_API cl_int
clEnqueueCopyBufferToImage(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,size_t src_offset,const size_t * p_dst_origin,const size_t * p_region,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)635 clEnqueueCopyBufferToImage(cl_command_queue d_q,
636 cl_mem d_src_mem, cl_mem d_dst_mem,
637 size_t src_offset,
638 const size_t *p_dst_origin, const size_t *p_region,
639 cl_uint num_deps, const cl_event *d_deps,
640 cl_event *rd_ev) try {
641 auto &q = obj(d_q);
642 auto &src_mem = obj<buffer>(d_src_mem);
643 auto &dst_img = obj<image>(d_dst_mem);
644 auto deps = objs<wait_list_tag>(d_deps, num_deps);
645 auto region = vector(p_region);
646 auto dst_origin = vector(p_dst_origin);
647 auto dst_pitch = pitch(region, {{ dst_img.pixel_size(),
648 dst_img.row_pitch(),
649 dst_img.slice_pitch() }});
650 vector_t src_origin = { src_offset };
651 auto src_pitch = pitch(region, {{ dst_img.pixel_size() }});
652
653 validate_common(q, deps);
654 validate_object(q, dst_img, dst_origin, region);
655 validate_object(q, src_mem, src_origin, src_pitch, region);
656
657 auto hev = create<hard_event>(
658 q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, deps,
659 soft_copy_op(q, &dst_img, dst_origin, dst_pitch,
660 &src_mem, src_origin, src_pitch,
661 region));
662
663 ret_object(rd_ev, hev);
664 return CL_SUCCESS;
665
666 } catch (error &e) {
667 return e.get();
668 }
669
670 CLOVER_API void *
clEnqueueMapBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,cl_map_flags flags,size_t offset,size_t size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev,cl_int * r_errcode)671 clEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
672 cl_map_flags flags, size_t offset, size_t size,
673 cl_uint num_deps, const cl_event *d_deps,
674 cl_event *rd_ev, cl_int *r_errcode) try {
675 auto &q = obj(d_q);
676 auto &mem = obj<buffer>(d_mem);
677 auto deps = objs<wait_list_tag>(d_deps, num_deps);
678 vector_t region = { size, 1, 1 };
679 vector_t obj_origin = { offset };
680 auto obj_pitch = pitch(region, {{ 1 }});
681
682 validate_common(q, deps);
683 validate_object(q, mem, obj_origin, obj_pitch, region);
684 validate_map_flags(mem, flags);
685
686 void *map = mem.resource(q).add_map(q, flags, blocking, obj_origin, region);
687
688 auto hev = create<hard_event>(q, CL_COMMAND_MAP_BUFFER, deps);
689 if (blocking)
690 hev().wait_signalled();
691
692 ret_object(rd_ev, hev);
693 ret_error(r_errcode, CL_SUCCESS);
694 return map;
695
696 } catch (error &e) {
697 ret_error(r_errcode, e);
698 return NULL;
699 }
700
701 CLOVER_API void *
clEnqueueMapImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,cl_map_flags flags,const size_t * p_origin,const size_t * p_region,size_t * row_pitch,size_t * slice_pitch,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev,cl_int * r_errcode)702 clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
703 cl_map_flags flags,
704 const size_t *p_origin, const size_t *p_region,
705 size_t *row_pitch, size_t *slice_pitch,
706 cl_uint num_deps, const cl_event *d_deps,
707 cl_event *rd_ev, cl_int *r_errcode) try {
708 auto &q = obj(d_q);
709 auto &img = obj<image>(d_mem);
710 auto deps = objs<wait_list_tag>(d_deps, num_deps);
711 auto region = vector(p_region);
712 auto origin = vector(p_origin);
713
714 validate_common(q, deps);
715 validate_object(q, img, origin, region);
716 validate_map_flags(img, flags);
717
718 void *map = img.resource(q).add_map(q, flags, blocking, origin, region);
719
720 auto hev = create<hard_event>(q, CL_COMMAND_MAP_IMAGE, deps);
721 if (blocking)
722 hev().wait_signalled();
723
724 ret_object(rd_ev, hev);
725 ret_error(r_errcode, CL_SUCCESS);
726 return map;
727
728 } catch (error &e) {
729 ret_error(r_errcode, e);
730 return NULL;
731 }
732
733 CLOVER_API cl_int
clEnqueueUnmapMemObject(cl_command_queue d_q,cl_mem d_mem,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)734 clEnqueueUnmapMemObject(cl_command_queue d_q, cl_mem d_mem, void *ptr,
735 cl_uint num_deps, const cl_event *d_deps,
736 cl_event *rd_ev) try {
737 auto &q = obj(d_q);
738 auto &mem = obj(d_mem);
739 auto deps = objs<wait_list_tag>(d_deps, num_deps);
740
741 validate_common(q, deps);
742
743 auto hev = create<hard_event>(
744 q, CL_COMMAND_UNMAP_MEM_OBJECT, deps,
745 [=, &q, &mem](event &) {
746 mem.resource(q).del_map(ptr);
747 });
748
749 ret_object(rd_ev, hev);
750 return CL_SUCCESS;
751
752 } catch (error &e) {
753 return e.get();
754 }
755
756 CLOVER_API cl_int
clEnqueueMigrateMemObjects(cl_command_queue command_queue,cl_uint num_mem_objects,const cl_mem * mem_objects,cl_mem_migration_flags flags,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)757 clEnqueueMigrateMemObjects(cl_command_queue command_queue,
758 cl_uint num_mem_objects,
759 const cl_mem *mem_objects,
760 cl_mem_migration_flags flags,
761 cl_uint num_events_in_wait_list,
762 const cl_event *event_wait_list,
763 cl_event *event) {
764 CLOVER_NOT_SUPPORTED_UNTIL("1.2");
765 return CL_INVALID_VALUE;
766 }
767