1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 //
22
23 #include <cstring>
24
25 #include "api/util.hpp"
26 #include "core/event.hpp"
27 #include "core/memory.hpp"
28
29 using namespace clover;
30
31 namespace {
32 typedef resource::vector vector_t;
33
34 vector_t
vector(const size_t * p)35 vector(const size_t *p) {
36 return range(p, 3);
37 }
38
39 vector_t
pitch(const vector_t & region,vector_t pitch)40 pitch(const vector_t ®ion, vector_t pitch) {
41 for (auto x : zip(tail(pitch),
42 map(multiplies(), region, pitch))) {
43 // The spec defines a value of zero as the natural pitch,
44 // i.e. the unaligned size of the previous dimension.
45 if (std::get<0>(x) == 0)
46 std::get<0>(x) = std::get<1>(x);
47 }
48
49 return pitch;
50 }
51
52 ///
53 /// Size of a region in bytes.
54 ///
55 size_t
size(const vector_t & pitch,const vector_t & region)56 size(const vector_t &pitch, const vector_t ®ion) {
57 if (any_of(is_zero(), region))
58 return 0;
59 else
60 return dot(pitch, region - vector_t{ 0, 1, 1 });
61 }
62
63 ///
64 /// Common argument checking shared by memory transfer commands.
65 ///
66 void
validate_common(command_queue & q,const ref_vector<event> & deps)67 validate_common(command_queue &q,
68 const ref_vector<event> &deps) {
69 if (any_of([&](const event &ev) {
70 return ev.context() != q.context();
71 }, deps))
72 throw error(CL_INVALID_CONTEXT);
73 }
74
75 ///
76 /// Common error checking for a buffer object argument.
77 ///
78 void
validate_object(command_queue & q,buffer & mem,const vector_t & origin,const vector_t & pitch,const vector_t & region)79 validate_object(command_queue &q, buffer &mem, const vector_t &origin,
80 const vector_t &pitch, const vector_t ®ion) {
81 if (mem.context() != q.context())
82 throw error(CL_INVALID_CONTEXT);
83
84 // The region must fit within the specified pitch,
85 if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
86 throw error(CL_INVALID_VALUE);
87
88 // ...and within the specified object.
89 if (dot(pitch, origin) + size(pitch, region) > mem.size())
90 throw error(CL_INVALID_VALUE);
91
92 if (any_of(is_zero(), region))
93 throw error(CL_INVALID_VALUE);
94 }
95
96 ///
97 /// Common error checking for an image argument.
98 ///
99 void
validate_object(command_queue & q,image & img,const vector_t & orig,const vector_t & region)100 validate_object(command_queue &q, image &img,
101 const vector_t &orig, const vector_t ®ion) {
102 vector_t size = { img.width(), img.height(), img.depth() };
103
104 if (!q.device().image_support())
105 throw error(CL_INVALID_OPERATION);
106
107 if (img.context() != q.context())
108 throw error(CL_INVALID_CONTEXT);
109
110 if (any_of(greater(), orig + region, size))
111 throw error(CL_INVALID_VALUE);
112
113 if (any_of(is_zero(), region))
114 throw error(CL_INVALID_VALUE);
115 }
116
117 ///
118 /// Common error checking for a host pointer argument.
119 ///
120 void
validate_object(command_queue & q,const void * ptr,const vector_t & orig,const vector_t & pitch,const vector_t & region)121 validate_object(command_queue &q, const void *ptr, const vector_t &orig,
122 const vector_t &pitch, const vector_t ®ion) {
123 if (!ptr)
124 throw error(CL_INVALID_VALUE);
125
126 // The region must fit within the specified pitch.
127 if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
128 throw error(CL_INVALID_VALUE);
129 }
130
131 ///
132 /// Common argument checking for a copy between two buffer objects.
133 ///
134 void
validate_copy(command_queue & q,buffer & dst_mem,const vector_t & dst_orig,const vector_t & dst_pitch,buffer & src_mem,const vector_t & src_orig,const vector_t & src_pitch,const vector_t & region)135 validate_copy(command_queue &q, buffer &dst_mem,
136 const vector_t &dst_orig, const vector_t &dst_pitch,
137 buffer &src_mem,
138 const vector_t &src_orig, const vector_t &src_pitch,
139 const vector_t ®ion) {
140 if (dst_mem == src_mem) {
141 auto dst_offset = dot(dst_pitch, dst_orig);
142 auto src_offset = dot(src_pitch, src_orig);
143
144 if (interval_overlaps()(
145 dst_offset, dst_offset + size(dst_pitch, region),
146 src_offset, src_offset + size(src_pitch, region)))
147 throw error(CL_MEM_COPY_OVERLAP);
148 }
149 }
150
151 ///
152 /// Common argument checking for a copy between two image objects.
153 ///
154 void
validate_copy(command_queue & q,image & dst_img,const vector_t & dst_orig,image & src_img,const vector_t & src_orig,const vector_t & region)155 validate_copy(command_queue &q,
156 image &dst_img, const vector_t &dst_orig,
157 image &src_img, const vector_t &src_orig,
158 const vector_t ®ion) {
159 if (dst_img.format() != src_img.format())
160 throw error(CL_IMAGE_FORMAT_MISMATCH);
161
162 if (dst_img == src_img) {
163 if (all_of(interval_overlaps(),
164 dst_orig, dst_orig + region,
165 src_orig, src_orig + region))
166 throw error(CL_MEM_COPY_OVERLAP);
167 }
168 }
169
170 ///
171 /// Checks that the host access flags of the memory object are
172 /// within the allowed set \a flags.
173 ///
174 void
validate_object_access(const memory_obj & mem,const cl_mem_flags flags)175 validate_object_access(const memory_obj &mem, const cl_mem_flags flags) {
176 if (mem.flags() & ~flags &
177 (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY |
178 CL_MEM_HOST_NO_ACCESS))
179 throw error(CL_INVALID_OPERATION);
180 }
181
182 ///
183 /// Checks that the mapping flags are correct.
184 ///
185 void
validate_map_flags(const memory_obj & mem,const cl_map_flags flags)186 validate_map_flags(const memory_obj &mem, const cl_map_flags flags) {
187 if ((flags & (CL_MAP_WRITE | CL_MAP_READ)) &&
188 (flags & CL_MAP_WRITE_INVALIDATE_REGION))
189 throw error(CL_INVALID_VALUE);
190
191 if (flags & CL_MAP_READ)
192 validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
193
194 if (flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
195 validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
196 }
197
198 ///
199 /// Class that encapsulates the task of mapping an object of type
200 /// \a T. The return value of get() should be implicitly
201 /// convertible to \a void *.
202 ///
203 template<typename T>
204 struct _map {
205 static mapping
get__anon85ed13720111::_map206 get(command_queue &q, T obj, cl_map_flags flags,
207 size_t offset, size_t size) {
208 return { q, obj->resource(q), flags, true,
209 {{ offset }}, {{ size, 1, 1 }} };
210 }
211 };
212
213 template<>
214 struct _map<void *> {
215 static void *
get__anon85ed13720111::_map216 get(command_queue &q, void *obj, cl_map_flags flags,
217 size_t offset, size_t size) {
218 return (char *)obj + offset;
219 }
220 };
221
222 template<>
223 struct _map<const void *> {
224 static const void *
get__anon85ed13720111::_map225 get(command_queue &q, const void *obj, cl_map_flags flags,
226 size_t offset, size_t size) {
227 return (const char *)obj + offset;
228 }
229 };
230
231 ///
232 /// Software copy from \a src_obj to \a dst_obj. They can be
233 /// either pointers or memory objects.
234 ///
235 template<typename T, typename S>
236 std::function<void (event &)>
soft_copy_op(command_queue & q,T dst_obj,const vector_t & dst_orig,const vector_t & dst_pitch,S src_obj,const vector_t & src_orig,const vector_t & src_pitch,const vector_t & region)237 soft_copy_op(command_queue &q,
238 T dst_obj, const vector_t &dst_orig, const vector_t &dst_pitch,
239 S src_obj, const vector_t &src_orig, const vector_t &src_pitch,
240 const vector_t ®ion) {
241 return [=, &q](event &) {
242 auto dst = _map<T>::get(q, dst_obj, CL_MAP_WRITE,
243 dot(dst_pitch, dst_orig),
244 size(dst_pitch, region));
245 auto src = _map<S>::get(q, src_obj, CL_MAP_READ,
246 dot(src_pitch, src_orig),
247 size(src_pitch, region));
248 vector_t v = {};
249
250 for (v[2] = 0; v[2] < region[2]; ++v[2]) {
251 for (v[1] = 0; v[1] < region[1]; ++v[1]) {
252 std::memcpy(
253 static_cast<char *>(dst) + dot(dst_pitch, v),
254 static_cast<const char *>(src) + dot(src_pitch, v),
255 src_pitch[0] * region[0]);
256 }
257 }
258 };
259 }
260
261 ///
262 /// Hardware copy from \a src_obj to \a dst_obj.
263 ///
264 template<typename T, typename S>
265 std::function<void (event &)>
hard_copy_op(command_queue & q,T dst_obj,const vector_t & dst_orig,S src_obj,const vector_t & src_orig,const vector_t & region)266 hard_copy_op(command_queue &q, T dst_obj, const vector_t &dst_orig,
267 S src_obj, const vector_t &src_orig, const vector_t ®ion) {
268 return [=, &q](event &) {
269 dst_obj->resource(q).copy(q, dst_orig, region,
270 src_obj->resource(q), src_orig);
271 };
272 }
273 }
274
275 CLOVER_API cl_int
clEnqueueReadBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,size_t offset,size_t size,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)276 clEnqueueReadBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
277 size_t offset, size_t size, void *ptr,
278 cl_uint num_deps, const cl_event *d_deps,
279 cl_event *rd_ev) try {
280 auto &q = obj(d_q);
281 auto &mem = obj<buffer>(d_mem);
282 auto deps = objs<wait_list_tag>(d_deps, num_deps);
283 vector_t region = { size, 1, 1 };
284 vector_t obj_origin = { offset };
285 auto obj_pitch = pitch(region, {{ 1 }});
286
287 validate_common(q, deps);
288 validate_object(q, ptr, {}, obj_pitch, region);
289 validate_object(q, mem, obj_origin, obj_pitch, region);
290 validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
291
292 auto hev = create<hard_event>(
293 q, CL_COMMAND_READ_BUFFER, deps,
294 soft_copy_op(q, ptr, {}, obj_pitch,
295 &mem, obj_origin, obj_pitch,
296 region));
297
298 ret_object(rd_ev, hev);
299 return CL_SUCCESS;
300
301 } catch (error &e) {
302 return e.get();
303 }
304
305 CLOVER_API cl_int
clEnqueueWriteBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,size_t offset,size_t size,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)306 clEnqueueWriteBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
307 size_t offset, size_t size, const void *ptr,
308 cl_uint num_deps, const cl_event *d_deps,
309 cl_event *rd_ev) try {
310 auto &q = obj(d_q);
311 auto &mem = obj<buffer>(d_mem);
312 auto deps = objs<wait_list_tag>(d_deps, num_deps);
313 vector_t region = { size, 1, 1 };
314 vector_t obj_origin = { offset };
315 auto obj_pitch = pitch(region, {{ 1 }});
316
317 validate_common(q, deps);
318 validate_object(q, mem, obj_origin, obj_pitch, region);
319 validate_object(q, ptr, {}, obj_pitch, region);
320 validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
321
322 auto hev = create<hard_event>(
323 q, CL_COMMAND_WRITE_BUFFER, deps,
324 soft_copy_op(q, &mem, obj_origin, obj_pitch,
325 ptr, {}, obj_pitch,
326 region));
327
328 ret_object(rd_ev, hev);
329 return CL_SUCCESS;
330
331 } catch (error &e) {
332 return e.get();
333 }
334
335 CLOVER_API cl_int
clEnqueueReadBufferRect(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_obj_origin,const size_t * p_host_origin,const size_t * p_region,size_t obj_row_pitch,size_t obj_slice_pitch,size_t host_row_pitch,size_t host_slice_pitch,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)336 clEnqueueReadBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
337 const size_t *p_obj_origin,
338 const size_t *p_host_origin,
339 const size_t *p_region,
340 size_t obj_row_pitch, size_t obj_slice_pitch,
341 size_t host_row_pitch, size_t host_slice_pitch,
342 void *ptr,
343 cl_uint num_deps, const cl_event *d_deps,
344 cl_event *rd_ev) try {
345 auto &q = obj(d_q);
346 auto &mem = obj<buffer>(d_mem);
347 auto deps = objs<wait_list_tag>(d_deps, num_deps);
348 auto region = vector(p_region);
349 auto obj_origin = vector(p_obj_origin);
350 auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
351 auto host_origin = vector(p_host_origin);
352 auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
353
354 validate_common(q, deps);
355 validate_object(q, ptr, host_origin, host_pitch, region);
356 validate_object(q, mem, obj_origin, obj_pitch, region);
357 validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
358
359 auto hev = create<hard_event>(
360 q, CL_COMMAND_READ_BUFFER_RECT, deps,
361 soft_copy_op(q, ptr, host_origin, host_pitch,
362 &mem, obj_origin, obj_pitch,
363 region));
364
365 ret_object(rd_ev, hev);
366 return CL_SUCCESS;
367
368 } catch (error &e) {
369 return e.get();
370 }
371
372 CLOVER_API cl_int
clEnqueueWriteBufferRect(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_obj_origin,const size_t * p_host_origin,const size_t * p_region,size_t obj_row_pitch,size_t obj_slice_pitch,size_t host_row_pitch,size_t host_slice_pitch,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)373 clEnqueueWriteBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
374 const size_t *p_obj_origin,
375 const size_t *p_host_origin,
376 const size_t *p_region,
377 size_t obj_row_pitch, size_t obj_slice_pitch,
378 size_t host_row_pitch, size_t host_slice_pitch,
379 const void *ptr,
380 cl_uint num_deps, const cl_event *d_deps,
381 cl_event *rd_ev) try {
382 auto &q = obj(d_q);
383 auto &mem = obj<buffer>(d_mem);
384 auto deps = objs<wait_list_tag>(d_deps, num_deps);
385 auto region = vector(p_region);
386 auto obj_origin = vector(p_obj_origin);
387 auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
388 auto host_origin = vector(p_host_origin);
389 auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
390
391 validate_common(q, deps);
392 validate_object(q, mem, obj_origin, obj_pitch, region);
393 validate_object(q, ptr, host_origin, host_pitch, region);
394 validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
395
396 auto hev = create<hard_event>(
397 q, CL_COMMAND_WRITE_BUFFER_RECT, deps,
398 soft_copy_op(q, &mem, obj_origin, obj_pitch,
399 ptr, host_origin, host_pitch,
400 region));
401
402 ret_object(rd_ev, hev);
403 return CL_SUCCESS;
404
405 } catch (error &e) {
406 return e.get();
407 }
408
409 CLOVER_API cl_int
clEnqueueCopyBuffer(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,size_t src_offset,size_t dst_offset,size_t size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)410 clEnqueueCopyBuffer(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
411 size_t src_offset, size_t dst_offset, size_t size,
412 cl_uint num_deps, const cl_event *d_deps,
413 cl_event *rd_ev) try {
414 auto &q = obj(d_q);
415 auto &src_mem = obj<buffer>(d_src_mem);
416 auto &dst_mem = obj<buffer>(d_dst_mem);
417 auto deps = objs<wait_list_tag>(d_deps, num_deps);
418 vector_t region = { size, 1, 1 };
419 vector_t dst_origin = { dst_offset };
420 auto dst_pitch = pitch(region, {{ 1 }});
421 vector_t src_origin = { src_offset };
422 auto src_pitch = pitch(region, {{ 1 }});
423
424 validate_common(q, deps);
425 validate_object(q, dst_mem, dst_origin, dst_pitch, region);
426 validate_object(q, src_mem, src_origin, src_pitch, region);
427 validate_copy(q, dst_mem, dst_origin, dst_pitch,
428 src_mem, src_origin, src_pitch, region);
429
430 auto hev = create<hard_event>(
431 q, CL_COMMAND_COPY_BUFFER, deps,
432 hard_copy_op(q, &dst_mem, dst_origin,
433 &src_mem, src_origin, region));
434
435 ret_object(rd_ev, hev);
436 return CL_SUCCESS;
437
438 } catch (error &e) {
439 return e.get();
440 }
441
442 CLOVER_API cl_int
clEnqueueCopyBufferRect(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_dst_origin,const size_t * p_region,size_t src_row_pitch,size_t src_slice_pitch,size_t dst_row_pitch,size_t dst_slice_pitch,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)443 clEnqueueCopyBufferRect(cl_command_queue d_q, cl_mem d_src_mem,
444 cl_mem d_dst_mem,
445 const size_t *p_src_origin, const size_t *p_dst_origin,
446 const size_t *p_region,
447 size_t src_row_pitch, size_t src_slice_pitch,
448 size_t dst_row_pitch, size_t dst_slice_pitch,
449 cl_uint num_deps, const cl_event *d_deps,
450 cl_event *rd_ev) try {
451 auto &q = obj(d_q);
452 auto &src_mem = obj<buffer>(d_src_mem);
453 auto &dst_mem = obj<buffer>(d_dst_mem);
454 auto deps = objs<wait_list_tag>(d_deps, num_deps);
455 auto region = vector(p_region);
456 auto dst_origin = vector(p_dst_origin);
457 auto dst_pitch = pitch(region, {{ 1, dst_row_pitch, dst_slice_pitch }});
458 auto src_origin = vector(p_src_origin);
459 auto src_pitch = pitch(region, {{ 1, src_row_pitch, src_slice_pitch }});
460
461 validate_common(q, deps);
462 validate_object(q, dst_mem, dst_origin, dst_pitch, region);
463 validate_object(q, src_mem, src_origin, src_pitch, region);
464 validate_copy(q, dst_mem, dst_origin, dst_pitch,
465 src_mem, src_origin, src_pitch, region);
466
467 auto hev = create<hard_event>(
468 q, CL_COMMAND_COPY_BUFFER_RECT, deps,
469 soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
470 &src_mem, src_origin, src_pitch,
471 region));
472
473 ret_object(rd_ev, hev);
474 return CL_SUCCESS;
475
476 } catch (error &e) {
477 return e.get();
478 }
479
480 CLOVER_API cl_int
clEnqueueReadImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_origin,const size_t * p_region,size_t row_pitch,size_t slice_pitch,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)481 clEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
482 const size_t *p_origin, const size_t *p_region,
483 size_t row_pitch, size_t slice_pitch, void *ptr,
484 cl_uint num_deps, const cl_event *d_deps,
485 cl_event *rd_ev) try {
486 auto &q = obj(d_q);
487 auto &img = obj<image>(d_mem);
488 auto deps = objs<wait_list_tag>(d_deps, num_deps);
489 auto region = vector(p_region);
490 auto dst_pitch = pitch(region, {{ img.pixel_size(),
491 row_pitch, slice_pitch }});
492 auto src_origin = vector(p_origin);
493 auto src_pitch = pitch(region, {{ img.pixel_size(),
494 img.row_pitch(), img.slice_pitch() }});
495
496 validate_common(q, deps);
497 validate_object(q, ptr, {}, dst_pitch, region);
498 validate_object(q, img, src_origin, region);
499 validate_object_access(img, CL_MEM_HOST_READ_ONLY);
500
501 auto hev = create<hard_event>(
502 q, CL_COMMAND_READ_IMAGE, deps,
503 soft_copy_op(q, ptr, {}, dst_pitch,
504 &img, src_origin, src_pitch,
505 region));
506
507 ret_object(rd_ev, hev);
508 return CL_SUCCESS;
509
510 } catch (error &e) {
511 return e.get();
512 }
513
514 CLOVER_API cl_int
clEnqueueWriteImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_origin,const size_t * p_region,size_t row_pitch,size_t slice_pitch,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)515 clEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
516 const size_t *p_origin, const size_t *p_region,
517 size_t row_pitch, size_t slice_pitch, const void *ptr,
518 cl_uint num_deps, const cl_event *d_deps,
519 cl_event *rd_ev) try {
520 auto &q = obj(d_q);
521 auto &img = obj<image>(d_mem);
522 auto deps = objs<wait_list_tag>(d_deps, num_deps);
523 auto region = vector(p_region);
524 auto dst_origin = vector(p_origin);
525 auto dst_pitch = pitch(region, {{ img.pixel_size(),
526 img.row_pitch(), img.slice_pitch() }});
527 auto src_pitch = pitch(region, {{ img.pixel_size(),
528 row_pitch, slice_pitch }});
529
530 validate_common(q, deps);
531 validate_object(q, img, dst_origin, region);
532 validate_object(q, ptr, {}, src_pitch, region);
533 validate_object_access(img, CL_MEM_HOST_WRITE_ONLY);
534
535 auto hev = create<hard_event>(
536 q, CL_COMMAND_WRITE_IMAGE, deps,
537 soft_copy_op(q, &img, dst_origin, dst_pitch,
538 ptr, {}, src_pitch,
539 region));
540
541 ret_object(rd_ev, hev);
542 return CL_SUCCESS;
543
544 } catch (error &e) {
545 return e.get();
546 }
547
548 CLOVER_API cl_int
clEnqueueCopyImage(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_dst_origin,const size_t * p_region,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)549 clEnqueueCopyImage(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
550 const size_t *p_src_origin, const size_t *p_dst_origin,
551 const size_t *p_region,
552 cl_uint num_deps, const cl_event *d_deps,
553 cl_event *rd_ev) try {
554 auto &q = obj(d_q);
555 auto &src_img = obj<image>(d_src_mem);
556 auto &dst_img = obj<image>(d_dst_mem);
557 auto deps = objs<wait_list_tag>(d_deps, num_deps);
558 auto region = vector(p_region);
559 auto dst_origin = vector(p_dst_origin);
560 auto src_origin = vector(p_src_origin);
561
562 validate_common(q, deps);
563 validate_object(q, dst_img, dst_origin, region);
564 validate_object(q, src_img, src_origin, region);
565 validate_copy(q, dst_img, dst_origin, src_img, src_origin, region);
566
567 auto hev = create<hard_event>(
568 q, CL_COMMAND_COPY_IMAGE, deps,
569 hard_copy_op(q, &dst_img, dst_origin,
570 &src_img, src_origin,
571 region));
572
573 ret_object(rd_ev, hev);
574 return CL_SUCCESS;
575
576 } catch (error &e) {
577 return e.get();
578 }
579
580 CLOVER_API cl_int
clEnqueueCopyImageToBuffer(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_region,size_t dst_offset,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)581 clEnqueueCopyImageToBuffer(cl_command_queue d_q,
582 cl_mem d_src_mem, cl_mem d_dst_mem,
583 const size_t *p_src_origin, const size_t *p_region,
584 size_t dst_offset,
585 cl_uint num_deps, const cl_event *d_deps,
586 cl_event *rd_ev) try {
587 auto &q = obj(d_q);
588 auto &src_img = obj<image>(d_src_mem);
589 auto &dst_mem = obj<buffer>(d_dst_mem);
590 auto deps = objs<wait_list_tag>(d_deps, num_deps);
591 auto region = vector(p_region);
592 vector_t dst_origin = { dst_offset };
593 auto dst_pitch = pitch(region, {{ src_img.pixel_size() }});
594 auto src_origin = vector(p_src_origin);
595 auto src_pitch = pitch(region, {{ src_img.pixel_size(),
596 src_img.row_pitch(),
597 src_img.slice_pitch() }});
598
599 validate_common(q, deps);
600 validate_object(q, dst_mem, dst_origin, dst_pitch, region);
601 validate_object(q, src_img, src_origin, region);
602
603 auto hev = create<hard_event>(
604 q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, deps,
605 soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
606 &src_img, src_origin, src_pitch,
607 region));
608
609 ret_object(rd_ev, hev);
610 return CL_SUCCESS;
611
612 } catch (error &e) {
613 return e.get();
614 }
615
616 CLOVER_API cl_int
clEnqueueCopyBufferToImage(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,size_t src_offset,const size_t * p_dst_origin,const size_t * p_region,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)617 clEnqueueCopyBufferToImage(cl_command_queue d_q,
618 cl_mem d_src_mem, cl_mem d_dst_mem,
619 size_t src_offset,
620 const size_t *p_dst_origin, const size_t *p_region,
621 cl_uint num_deps, const cl_event *d_deps,
622 cl_event *rd_ev) try {
623 auto &q = obj(d_q);
624 auto &src_mem = obj<buffer>(d_src_mem);
625 auto &dst_img = obj<image>(d_dst_mem);
626 auto deps = objs<wait_list_tag>(d_deps, num_deps);
627 auto region = vector(p_region);
628 auto dst_origin = vector(p_dst_origin);
629 auto dst_pitch = pitch(region, {{ dst_img.pixel_size(),
630 dst_img.row_pitch(),
631 dst_img.slice_pitch() }});
632 vector_t src_origin = { src_offset };
633 auto src_pitch = pitch(region, {{ dst_img.pixel_size() }});
634
635 validate_common(q, deps);
636 validate_object(q, dst_img, dst_origin, region);
637 validate_object(q, src_mem, src_origin, src_pitch, region);
638
639 auto hev = create<hard_event>(
640 q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, deps,
641 soft_copy_op(q, &dst_img, dst_origin, dst_pitch,
642 &src_mem, src_origin, src_pitch,
643 region));
644
645 ret_object(rd_ev, hev);
646 return CL_SUCCESS;
647
648 } catch (error &e) {
649 return e.get();
650 }
651
652 CLOVER_API void *
clEnqueueMapBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,cl_map_flags flags,size_t offset,size_t size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev,cl_int * r_errcode)653 clEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
654 cl_map_flags flags, size_t offset, size_t size,
655 cl_uint num_deps, const cl_event *d_deps,
656 cl_event *rd_ev, cl_int *r_errcode) try {
657 auto &q = obj(d_q);
658 auto &mem = obj<buffer>(d_mem);
659 auto deps = objs<wait_list_tag>(d_deps, num_deps);
660 vector_t region = { size, 1, 1 };
661 vector_t obj_origin = { offset };
662 auto obj_pitch = pitch(region, {{ 1 }});
663
664 validate_common(q, deps);
665 validate_object(q, mem, obj_origin, obj_pitch, region);
666 validate_map_flags(mem, flags);
667
668 void *map = mem.resource(q).add_map(q, flags, blocking, obj_origin, region);
669
670 ret_object(rd_ev, create<hard_event>(q, CL_COMMAND_MAP_BUFFER, deps));
671 ret_error(r_errcode, CL_SUCCESS);
672 return map;
673
674 } catch (error &e) {
675 ret_error(r_errcode, e);
676 return NULL;
677 }
678
679 CLOVER_API void *
clEnqueueMapImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,cl_map_flags flags,const size_t * p_origin,const size_t * p_region,size_t * row_pitch,size_t * slice_pitch,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev,cl_int * r_errcode)680 clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
681 cl_map_flags flags,
682 const size_t *p_origin, const size_t *p_region,
683 size_t *row_pitch, size_t *slice_pitch,
684 cl_uint num_deps, const cl_event *d_deps,
685 cl_event *rd_ev, cl_int *r_errcode) try {
686 auto &q = obj(d_q);
687 auto &img = obj<image>(d_mem);
688 auto deps = objs<wait_list_tag>(d_deps, num_deps);
689 auto region = vector(p_region);
690 auto origin = vector(p_origin);
691
692 validate_common(q, deps);
693 validate_object(q, img, origin, region);
694 validate_map_flags(img, flags);
695
696 void *map = img.resource(q).add_map(q, flags, blocking, origin, region);
697
698 ret_object(rd_ev, create<hard_event>(q, CL_COMMAND_MAP_IMAGE, deps));
699 ret_error(r_errcode, CL_SUCCESS);
700 return map;
701
702 } catch (error &e) {
703 ret_error(r_errcode, e);
704 return NULL;
705 }
706
707 CLOVER_API cl_int
clEnqueueUnmapMemObject(cl_command_queue d_q,cl_mem d_mem,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)708 clEnqueueUnmapMemObject(cl_command_queue d_q, cl_mem d_mem, void *ptr,
709 cl_uint num_deps, const cl_event *d_deps,
710 cl_event *rd_ev) try {
711 auto &q = obj(d_q);
712 auto &mem = obj(d_mem);
713 auto deps = objs<wait_list_tag>(d_deps, num_deps);
714
715 validate_common(q, deps);
716
717 auto hev = create<hard_event>(
718 q, CL_COMMAND_UNMAP_MEM_OBJECT, deps,
719 [=, &q, &mem](event &) {
720 mem.resource(q).del_map(ptr);
721 });
722
723 ret_object(rd_ev, hev);
724 return CL_SUCCESS;
725
726 } catch (error &e) {
727 return e.get();
728 }
729
730 CLOVER_API cl_int
clEnqueueMigrateMemObjects(cl_command_queue command_queue,cl_uint num_mem_objects,const cl_mem * mem_objects,cl_mem_migration_flags flags,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)731 clEnqueueMigrateMemObjects(cl_command_queue command_queue,
732 cl_uint num_mem_objects,
733 const cl_mem *mem_objects,
734 cl_mem_migration_flags flags,
735 cl_uint num_events_in_wait_list,
736 const cl_event *event_wait_list,
737 cl_event *event) {
738 CLOVER_NOT_SUPPORTED_UNTIL("1.2");
739 return CL_INVALID_VALUE;
740 }
741