1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 //
22
23 #include <cstring>
24
25 #include "util/bitscan.h"
26
27 #include "api/dispatch.hpp"
28 #include "api/util.hpp"
29 #include "core/event.hpp"
30 #include "core/memory.hpp"
31
32 using namespace clover;
33
34 namespace {
35 typedef resource::vector vector_t;
36
37 vector_t
vector(const size_t * p)38 vector(const size_t *p) {
39 return range(p, 3);
40 }
41
42 vector_t
pitch(const vector_t & region,vector_t pitch)43 pitch(const vector_t ®ion, vector_t pitch) {
44 for (auto x : zip(tail(pitch),
45 map(multiplies(), region, pitch))) {
46 // The spec defines a value of zero as the natural pitch,
47 // i.e. the unaligned size of the previous dimension.
48 if (std::get<0>(x) == 0)
49 std::get<0>(x) = std::get<1>(x);
50 }
51
52 return pitch;
53 }
54
55 ///
56 /// Size of a region in bytes.
57 ///
58 size_t
size(const vector_t & pitch,const vector_t & region)59 size(const vector_t &pitch, const vector_t ®ion) {
60 if (any_of(is_zero(), region))
61 return 0;
62 else
63 return dot(pitch, region - vector_t{ 0, 1, 1 });
64 }
65
66 ///
67 /// Common argument checking shared by memory transfer commands.
68 ///
69 void
validate_common(command_queue & q,const ref_vector<event> & deps)70 validate_common(command_queue &q,
71 const ref_vector<event> &deps) {
72 if (any_of([&](const event &ev) {
73 return ev.context() != q.context();
74 }, deps))
75 throw error(CL_INVALID_CONTEXT);
76 }
77
78 ///
79 /// Common error checking for a buffer object argument.
80 ///
81 void
validate_object(command_queue & q,buffer & mem,const vector_t & origin,const vector_t & pitch,const vector_t & region)82 validate_object(command_queue &q, buffer &mem, const vector_t &origin,
83 const vector_t &pitch, const vector_t ®ion) {
84 if (mem.context() != q.context())
85 throw error(CL_INVALID_CONTEXT);
86
87 // The region must fit within the specified pitch,
88 if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
89 throw error(CL_INVALID_VALUE);
90
91 // ...and within the specified object.
92 if (dot(pitch, origin) + size(pitch, region) > mem.size())
93 throw error(CL_INVALID_VALUE);
94
95 if (any_of(is_zero(), region))
96 throw error(CL_INVALID_VALUE);
97 }
98
99 ///
100 /// Common error checking for an image argument.
101 ///
102 void
validate_object(command_queue & q,image & img,const vector_t & orig,const vector_t & region)103 validate_object(command_queue &q, image &img,
104 const vector_t &orig, const vector_t ®ion) {
105 vector_t size = { img.width(), img.height(), img.depth() };
106 const auto &dev = q.device();
107
108 if (!dev.image_support())
109 throw error(CL_INVALID_OPERATION);
110
111 if (img.context() != q.context())
112 throw error(CL_INVALID_CONTEXT);
113
114 if (any_of(greater(), orig + region, size))
115 throw error(CL_INVALID_VALUE);
116
117 if (any_of(is_zero(), region))
118 throw error(CL_INVALID_VALUE);
119
120 switch (img.type()) {
121 case CL_MEM_OBJECT_IMAGE2D: {
122 const size_t max = 1 << dev.max_image_levels_2d();
123 if (img.width() > max || img.height() > max)
124 throw error(CL_INVALID_IMAGE_SIZE);
125 break;
126 }
127 case CL_MEM_OBJECT_IMAGE3D: {
128 const size_t max = 1 << dev.max_image_levels_3d();
129 if (img.width() > max || img.height() > max || img.depth() > max)
130 throw error(CL_INVALID_IMAGE_SIZE);
131 break;
132 }
133 // XXX: Implement missing checks once Clover supports more image types.
134 default:
135 throw error(CL_INVALID_IMAGE_SIZE);
136 }
137 }
138
139 ///
140 /// Common error checking for a host pointer argument.
141 ///
142 void
validate_object(command_queue & q,const void * ptr,const vector_t & orig,const vector_t & pitch,const vector_t & region)143 validate_object(command_queue &q, const void *ptr, const vector_t &orig,
144 const vector_t &pitch, const vector_t ®ion) {
145 if (!ptr)
146 throw error(CL_INVALID_VALUE);
147
148 // The region must fit within the specified pitch.
149 if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
150 throw error(CL_INVALID_VALUE);
151 }
152
153 ///
154 /// Common argument checking for a copy between two buffer objects.
155 ///
156 void
validate_copy(command_queue & q,buffer & dst_mem,const vector_t & dst_orig,const vector_t & dst_pitch,buffer & src_mem,const vector_t & src_orig,const vector_t & src_pitch,const vector_t & region)157 validate_copy(command_queue &q, buffer &dst_mem,
158 const vector_t &dst_orig, const vector_t &dst_pitch,
159 buffer &src_mem,
160 const vector_t &src_orig, const vector_t &src_pitch,
161 const vector_t ®ion) {
162 if (dst_mem == src_mem) {
163 auto dst_offset = dot(dst_pitch, dst_orig);
164 auto src_offset = dot(src_pitch, src_orig);
165
166 if (interval_overlaps()(
167 dst_offset, dst_offset + size(dst_pitch, region),
168 src_offset, src_offset + size(src_pitch, region)))
169 throw error(CL_MEM_COPY_OVERLAP);
170 }
171 }
172
173 ///
174 /// Common argument checking for a copy between two image objects.
175 ///
176 void
validate_copy(command_queue & q,image & dst_img,const vector_t & dst_orig,image & src_img,const vector_t & src_orig,const vector_t & region)177 validate_copy(command_queue &q,
178 image &dst_img, const vector_t &dst_orig,
179 image &src_img, const vector_t &src_orig,
180 const vector_t ®ion) {
181 if (dst_img.format() != src_img.format())
182 throw error(CL_IMAGE_FORMAT_MISMATCH);
183
184 if (dst_img == src_img) {
185 if (all_of(interval_overlaps(),
186 dst_orig, dst_orig + region,
187 src_orig, src_orig + region))
188 throw error(CL_MEM_COPY_OVERLAP);
189 }
190 }
191
192 ///
193 /// Checks that the host access flags of the memory object are
194 /// within the allowed set \a flags.
195 ///
196 void
validate_object_access(const memory_obj & mem,const cl_mem_flags flags)197 validate_object_access(const memory_obj &mem, const cl_mem_flags flags) {
198 if (mem.flags() & ~flags &
199 (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY |
200 CL_MEM_HOST_NO_ACCESS))
201 throw error(CL_INVALID_OPERATION);
202 }
203
204 ///
205 /// Checks that the mapping flags are correct.
206 ///
207 void
validate_map_flags(const memory_obj & mem,const cl_map_flags flags)208 validate_map_flags(const memory_obj &mem, const cl_map_flags flags) {
209 if ((flags & (CL_MAP_WRITE | CL_MAP_READ)) &&
210 (flags & CL_MAP_WRITE_INVALIDATE_REGION))
211 throw error(CL_INVALID_VALUE);
212
213 if (flags & CL_MAP_READ)
214 validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
215
216 if (flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
217 validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
218 }
219
220 ///
221 /// Class that encapsulates the task of mapping an object of type
222 /// \a T. The return value of get() should be implicitly
223 /// convertible to \a void *.
224 ///
225 template<typename T>
226 struct _map;
227
228 template<>
229 struct _map<image*> {
_map__anon9d9a60f00111::_map230 _map(command_queue &q, image *img, cl_map_flags flags,
231 vector_t offset, vector_t pitch, vector_t region) :
232 map(q, img->resource_in(q), flags, true, offset, region),
233 pitch(map.pitch())
234 { }
235
236 template<typename T>
operator T*__anon9d9a60f00111::_map237 operator T *() const {
238 return static_cast<T *>(map);
239 }
240
241 mapping map;
242 vector_t pitch;
243 };
244
245 template<>
246 struct _map<buffer*> {
_map__anon9d9a60f00111::_map247 _map(command_queue &q, buffer *mem, cl_map_flags flags,
248 vector_t offset, vector_t pitch, vector_t region) :
249 map(q, mem->resource_in(q), flags, true,
250 {{ dot(pitch, offset) }}, {{ size(pitch, region) }}),
251 pitch(pitch)
252 { }
253
254 template<typename T>
operator T*__anon9d9a60f00111::_map255 operator T *() const {
256 return static_cast<T *>(map);
257 }
258
259 mapping map;
260 vector_t pitch;
261 };
262
263 template<typename P>
264 struct _map<P *> {
_map__anon9d9a60f00111::_map265 _map(command_queue &q, P *ptr, cl_map_flags flags,
266 vector_t offset, vector_t pitch, vector_t region) :
267 ptr((P *)((char *)ptr + dot(pitch, offset))), pitch(pitch)
268 { }
269
270 template<typename T>
operator T*__anon9d9a60f00111::_map271 operator T *() const {
272 return static_cast<T *>(ptr);
273 }
274
275 P *ptr;
276 vector_t pitch;
277 };
278
279 ///
280 /// Software copy from \a src_obj to \a dst_obj. They can be
281 /// either pointers or memory objects.
282 ///
283 template<typename T, typename S>
284 std::function<void (event &)>
soft_copy_op(command_queue & q,T dst_obj,const vector_t & dst_orig,const vector_t & dst_pitch,S src_obj,const vector_t & src_orig,const vector_t & src_pitch,const vector_t & region)285 soft_copy_op(command_queue &q,
286 T dst_obj, const vector_t &dst_orig, const vector_t &dst_pitch,
287 S src_obj, const vector_t &src_orig, const vector_t &src_pitch,
288 const vector_t ®ion) {
289 return [=, &q](event &) {
290 _map<T> dst = { q, dst_obj, CL_MAP_WRITE,
291 dst_orig, dst_pitch, region };
292 _map<S> src = { q, src_obj, CL_MAP_READ,
293 src_orig, src_pitch, region };
294 assert(src.pitch[0] == dst.pitch[0]);
295 vector_t v = {};
296
297 for (v[2] = 0; v[2] < region[2]; ++v[2]) {
298 for (v[1] = 0; v[1] < region[1]; ++v[1]) {
299 std::memcpy(
300 static_cast<char *>(dst) + dot(dst.pitch, v),
301 static_cast<const char *>(src) + dot(src.pitch, v),
302 src.pitch[0] * region[0]);
303 }
304 }
305 };
306 }
307
308 ///
309 /// Hardware copy from \a src_obj to \a dst_obj.
310 ///
311 template<typename T, typename S>
312 std::function<void (event &)>
hard_copy_op(command_queue & q,T dst_obj,const vector_t & dst_orig,S src_obj,const vector_t & src_orig,const vector_t & region)313 hard_copy_op(command_queue &q, T dst_obj, const vector_t &dst_orig,
314 S src_obj, const vector_t &src_orig, const vector_t ®ion) {
315 return [=, &q](event &) {
316 dst_obj->resource_in(q).copy(q, dst_orig, region,
317 src_obj->resource_in(q), src_orig);
318 };
319 }
320 }
321
322 CLOVER_API cl_int
clEnqueueReadBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,size_t offset,size_t size,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)323 clEnqueueReadBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
324 size_t offset, size_t size, void *ptr,
325 cl_uint num_deps, const cl_event *d_deps,
326 cl_event *rd_ev) try {
327 auto &q = obj(d_q);
328 auto &mem = obj<buffer>(d_mem);
329 auto deps = objs<wait_list_tag>(d_deps, num_deps);
330 vector_t region = { size, 1, 1 };
331 vector_t obj_origin = { offset };
332 auto obj_pitch = pitch(region, {{ 1 }});
333
334 validate_common(q, deps);
335 validate_object(q, ptr, {}, obj_pitch, region);
336 validate_object(q, mem, obj_origin, obj_pitch, region);
337 validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
338
339 auto hev = create<hard_event>(
340 q, CL_COMMAND_READ_BUFFER, deps,
341 soft_copy_op(q, ptr, {}, obj_pitch,
342 &mem, obj_origin, obj_pitch,
343 region));
344
345 if (blocking)
346 hev().wait_signalled();
347
348 ret_object(rd_ev, hev);
349 return CL_SUCCESS;
350
351 } catch (error &e) {
352 return e.get();
353 }
354
355 CLOVER_API cl_int
clEnqueueWriteBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,size_t offset,size_t size,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)356 clEnqueueWriteBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
357 size_t offset, size_t size, const void *ptr,
358 cl_uint num_deps, const cl_event *d_deps,
359 cl_event *rd_ev) try {
360 auto &q = obj(d_q);
361 auto &mem = obj<buffer>(d_mem);
362 auto deps = objs<wait_list_tag>(d_deps, num_deps);
363 vector_t region = { size, 1, 1 };
364 vector_t obj_origin = { offset };
365 auto obj_pitch = pitch(region, {{ 1 }});
366
367 validate_common(q, deps);
368 validate_object(q, mem, obj_origin, obj_pitch, region);
369 validate_object(q, ptr, {}, obj_pitch, region);
370 validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
371
372 auto hev = create<hard_event>(
373 q, CL_COMMAND_WRITE_BUFFER, deps,
374 soft_copy_op(q, &mem, obj_origin, obj_pitch,
375 ptr, {}, obj_pitch,
376 region));
377
378 if (blocking)
379 hev().wait_signalled();
380
381 ret_object(rd_ev, hev);
382 return CL_SUCCESS;
383
384 } catch (error &e) {
385 return e.get();
386 }
387
388 CLOVER_API cl_int
clEnqueueReadBufferRect(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_obj_origin,const size_t * p_host_origin,const size_t * p_region,size_t obj_row_pitch,size_t obj_slice_pitch,size_t host_row_pitch,size_t host_slice_pitch,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)389 clEnqueueReadBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
390 const size_t *p_obj_origin,
391 const size_t *p_host_origin,
392 const size_t *p_region,
393 size_t obj_row_pitch, size_t obj_slice_pitch,
394 size_t host_row_pitch, size_t host_slice_pitch,
395 void *ptr,
396 cl_uint num_deps, const cl_event *d_deps,
397 cl_event *rd_ev) try {
398 auto &q = obj(d_q);
399 auto &mem = obj<buffer>(d_mem);
400 auto deps = objs<wait_list_tag>(d_deps, num_deps);
401 auto region = vector(p_region);
402 auto obj_origin = vector(p_obj_origin);
403 auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
404 auto host_origin = vector(p_host_origin);
405 auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
406
407 validate_common(q, deps);
408 validate_object(q, ptr, host_origin, host_pitch, region);
409 validate_object(q, mem, obj_origin, obj_pitch, region);
410 validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
411
412 auto hev = create<hard_event>(
413 q, CL_COMMAND_READ_BUFFER_RECT, deps,
414 soft_copy_op(q, ptr, host_origin, host_pitch,
415 &mem, obj_origin, obj_pitch,
416 region));
417
418 if (blocking)
419 hev().wait_signalled();
420
421 ret_object(rd_ev, hev);
422 return CL_SUCCESS;
423
424 } catch (error &e) {
425 return e.get();
426 }
427
428 CLOVER_API cl_int
clEnqueueWriteBufferRect(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_obj_origin,const size_t * p_host_origin,const size_t * p_region,size_t obj_row_pitch,size_t obj_slice_pitch,size_t host_row_pitch,size_t host_slice_pitch,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)429 clEnqueueWriteBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
430 const size_t *p_obj_origin,
431 const size_t *p_host_origin,
432 const size_t *p_region,
433 size_t obj_row_pitch, size_t obj_slice_pitch,
434 size_t host_row_pitch, size_t host_slice_pitch,
435 const void *ptr,
436 cl_uint num_deps, const cl_event *d_deps,
437 cl_event *rd_ev) try {
438 auto &q = obj(d_q);
439 auto &mem = obj<buffer>(d_mem);
440 auto deps = objs<wait_list_tag>(d_deps, num_deps);
441 auto region = vector(p_region);
442 auto obj_origin = vector(p_obj_origin);
443 auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
444 auto host_origin = vector(p_host_origin);
445 auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
446
447 validate_common(q, deps);
448 validate_object(q, mem, obj_origin, obj_pitch, region);
449 validate_object(q, ptr, host_origin, host_pitch, region);
450 validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
451
452 auto hev = create<hard_event>(
453 q, CL_COMMAND_WRITE_BUFFER_RECT, deps,
454 soft_copy_op(q, &mem, obj_origin, obj_pitch,
455 ptr, host_origin, host_pitch,
456 region));
457
458 if (blocking)
459 hev().wait_signalled();
460
461 ret_object(rd_ev, hev);
462 return CL_SUCCESS;
463
464 } catch (error &e) {
465 return e.get();
466 }
467
468 CLOVER_API cl_int
clEnqueueFillBuffer(cl_command_queue d_queue,cl_mem d_mem,const void * pattern,size_t pattern_size,size_t offset,size_t size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)469 clEnqueueFillBuffer(cl_command_queue d_queue, cl_mem d_mem,
470 const void *pattern, size_t pattern_size,
471 size_t offset, size_t size,
472 cl_uint num_deps, const cl_event *d_deps,
473 cl_event *rd_ev) try {
474 auto &q = obj(d_queue);
475 auto &mem = obj<buffer>(d_mem);
476 auto deps = objs<wait_list_tag>(d_deps, num_deps);
477 vector_t region = { size, 1, 1 };
478 vector_t origin = { offset };
479 auto dst_pitch = pitch(region, {{ 1 }});
480
481 validate_common(q, deps);
482 validate_object(q, mem, origin, dst_pitch, region);
483
484 if (!pattern)
485 return CL_INVALID_VALUE;
486
487 if (!util_is_power_of_two_nonzero(pattern_size) ||
488 pattern_size > 128 || size % pattern_size
489 || offset % pattern_size) {
490 return CL_INVALID_VALUE;
491 }
492
493 auto sub = dynamic_cast<sub_buffer *>(&mem);
494 if (sub && sub->offset() % q.device().mem_base_addr_align()) {
495 return CL_MISALIGNED_SUB_BUFFER_OFFSET;
496 }
497
498 std::string data = std::string((char *)pattern, pattern_size);
499 auto hev = create<hard_event>(
500 q, CL_COMMAND_FILL_BUFFER, deps,
501 [=, &q, &mem](event &) {
502 mem.resource_in(q).clear(q, origin, region, data);
503 });
504
505 ret_object(rd_ev, hev);
506 return CL_SUCCESS;
507
508 } catch (error &e) {
509 return e.get();
510 }
511
512 CLOVER_API cl_int
clEnqueueCopyBuffer(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,size_t src_offset,size_t dst_offset,size_t size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)513 clEnqueueCopyBuffer(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
514 size_t src_offset, size_t dst_offset, size_t size,
515 cl_uint num_deps, const cl_event *d_deps,
516 cl_event *rd_ev) try {
517 auto &q = obj(d_q);
518 auto &src_mem = obj<buffer>(d_src_mem);
519 auto &dst_mem = obj<buffer>(d_dst_mem);
520 auto deps = objs<wait_list_tag>(d_deps, num_deps);
521 vector_t region = { size, 1, 1 };
522 vector_t dst_origin = { dst_offset };
523 auto dst_pitch = pitch(region, {{ 1 }});
524 vector_t src_origin = { src_offset };
525 auto src_pitch = pitch(region, {{ 1 }});
526
527 validate_common(q, deps);
528 validate_object(q, dst_mem, dst_origin, dst_pitch, region);
529 validate_object(q, src_mem, src_origin, src_pitch, region);
530 validate_copy(q, dst_mem, dst_origin, dst_pitch,
531 src_mem, src_origin, src_pitch, region);
532
533 auto hev = create<hard_event>(
534 q, CL_COMMAND_COPY_BUFFER, deps,
535 hard_copy_op(q, &dst_mem, dst_origin,
536 &src_mem, src_origin, region));
537
538 ret_object(rd_ev, hev);
539 return CL_SUCCESS;
540
541 } catch (error &e) {
542 return e.get();
543 }
544
545 CLOVER_API cl_int
clEnqueueCopyBufferRect(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_dst_origin,const size_t * p_region,size_t src_row_pitch,size_t src_slice_pitch,size_t dst_row_pitch,size_t dst_slice_pitch,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)546 clEnqueueCopyBufferRect(cl_command_queue d_q, cl_mem d_src_mem,
547 cl_mem d_dst_mem,
548 const size_t *p_src_origin, const size_t *p_dst_origin,
549 const size_t *p_region,
550 size_t src_row_pitch, size_t src_slice_pitch,
551 size_t dst_row_pitch, size_t dst_slice_pitch,
552 cl_uint num_deps, const cl_event *d_deps,
553 cl_event *rd_ev) try {
554 auto &q = obj(d_q);
555 auto &src_mem = obj<buffer>(d_src_mem);
556 auto &dst_mem = obj<buffer>(d_dst_mem);
557 auto deps = objs<wait_list_tag>(d_deps, num_deps);
558 auto region = vector(p_region);
559 auto dst_origin = vector(p_dst_origin);
560 auto dst_pitch = pitch(region, {{ 1, dst_row_pitch, dst_slice_pitch }});
561 auto src_origin = vector(p_src_origin);
562 auto src_pitch = pitch(region, {{ 1, src_row_pitch, src_slice_pitch }});
563
564 validate_common(q, deps);
565 validate_object(q, dst_mem, dst_origin, dst_pitch, region);
566 validate_object(q, src_mem, src_origin, src_pitch, region);
567 validate_copy(q, dst_mem, dst_origin, dst_pitch,
568 src_mem, src_origin, src_pitch, region);
569
570 auto hev = create<hard_event>(
571 q, CL_COMMAND_COPY_BUFFER_RECT, deps,
572 soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
573 &src_mem, src_origin, src_pitch,
574 region));
575
576 ret_object(rd_ev, hev);
577 return CL_SUCCESS;
578
579 } catch (error &e) {
580 return e.get();
581 }
582
583 CLOVER_API cl_int
clEnqueueReadImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_origin,const size_t * p_region,size_t row_pitch,size_t slice_pitch,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)584 clEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
585 const size_t *p_origin, const size_t *p_region,
586 size_t row_pitch, size_t slice_pitch, void *ptr,
587 cl_uint num_deps, const cl_event *d_deps,
588 cl_event *rd_ev) try {
589 auto &q = obj(d_q);
590 auto &img = obj<image>(d_mem);
591 auto deps = objs<wait_list_tag>(d_deps, num_deps);
592 auto region = vector(p_region);
593 auto dst_pitch = pitch(region, {{ img.pixel_size(),
594 row_pitch, slice_pitch }});
595 auto src_origin = vector(p_origin);
596 auto src_pitch = pitch(region, {{ img.pixel_size(),
597 img.row_pitch(), img.slice_pitch() }});
598
599 validate_common(q, deps);
600 validate_object(q, ptr, {}, dst_pitch, region);
601 validate_object(q, img, src_origin, region);
602 validate_object_access(img, CL_MEM_HOST_READ_ONLY);
603
604 auto hev = create<hard_event>(
605 q, CL_COMMAND_READ_IMAGE, deps,
606 soft_copy_op(q, ptr, {}, dst_pitch,
607 &img, src_origin, src_pitch,
608 region));
609
610 if (blocking)
611 hev().wait_signalled();
612
613 ret_object(rd_ev, hev);
614 return CL_SUCCESS;
615
616 } catch (error &e) {
617 return e.get();
618 }
619
620 CLOVER_API cl_int
clEnqueueWriteImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_origin,const size_t * p_region,size_t row_pitch,size_t slice_pitch,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)621 clEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
622 const size_t *p_origin, const size_t *p_region,
623 size_t row_pitch, size_t slice_pitch, const void *ptr,
624 cl_uint num_deps, const cl_event *d_deps,
625 cl_event *rd_ev) try {
626 auto &q = obj(d_q);
627 auto &img = obj<image>(d_mem);
628 auto deps = objs<wait_list_tag>(d_deps, num_deps);
629 auto region = vector(p_region);
630 auto dst_origin = vector(p_origin);
631 auto dst_pitch = pitch(region, {{ img.pixel_size(),
632 img.row_pitch(), img.slice_pitch() }});
633 auto src_pitch = pitch(region, {{ img.pixel_size(),
634 row_pitch, slice_pitch }});
635
636 validate_common(q, deps);
637 validate_object(q, img, dst_origin, region);
638 validate_object(q, ptr, {}, src_pitch, region);
639 validate_object_access(img, CL_MEM_HOST_WRITE_ONLY);
640
641 auto hev = create<hard_event>(
642 q, CL_COMMAND_WRITE_IMAGE, deps,
643 soft_copy_op(q, &img, dst_origin, dst_pitch,
644 ptr, {}, src_pitch,
645 region));
646
647 if (blocking)
648 hev().wait_signalled();
649
650 ret_object(rd_ev, hev);
651 return CL_SUCCESS;
652
653 } catch (error &e) {
654 return e.get();
655 }
656
657 CLOVER_API cl_int
clEnqueueFillImage(cl_command_queue d_queue,cl_mem d_mem,const void * fill_color,const size_t * p_origin,const size_t * p_region,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)658 clEnqueueFillImage(cl_command_queue d_queue, cl_mem d_mem,
659 const void *fill_color,
660 const size_t *p_origin, const size_t *p_region,
661 cl_uint num_deps, const cl_event *d_deps,
662 cl_event *rd_ev) try {
663 auto &q = obj(d_queue);
664 auto &img = obj<image>(d_mem);
665 auto deps = objs<wait_list_tag>(d_deps, num_deps);
666 auto origin = vector(p_origin);
667 auto region = vector(p_region);
668
669 validate_common(q, deps);
670 validate_object(q, img, origin, region);
671
672 if (!fill_color)
673 return CL_INVALID_VALUE;
674
675 std::string data = std::string((char *)fill_color, sizeof(cl_uint4));
676 auto hev = create<hard_event>(
677 q, CL_COMMAND_FILL_IMAGE, deps,
678 [=, &q, &img](event &) {
679 img.resource_in(q).clear(q, origin, region, data);
680 });
681
682 ret_object(rd_ev, hev);
683 return CL_SUCCESS;
684
685 } catch (error &e) {
686 return e.get();
687 }
688
689 CLOVER_API cl_int
clEnqueueCopyImage(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_dst_origin,const size_t * p_region,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)690 clEnqueueCopyImage(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
691 const size_t *p_src_origin, const size_t *p_dst_origin,
692 const size_t *p_region,
693 cl_uint num_deps, const cl_event *d_deps,
694 cl_event *rd_ev) try {
695 auto &q = obj(d_q);
696 auto &src_img = obj<image>(d_src_mem);
697 auto &dst_img = obj<image>(d_dst_mem);
698 auto deps = objs<wait_list_tag>(d_deps, num_deps);
699 auto region = vector(p_region);
700 auto dst_origin = vector(p_dst_origin);
701 auto src_origin = vector(p_src_origin);
702
703 validate_common(q, deps);
704 validate_object(q, dst_img, dst_origin, region);
705 validate_object(q, src_img, src_origin, region);
706 validate_copy(q, dst_img, dst_origin, src_img, src_origin, region);
707
708 auto hev = create<hard_event>(
709 q, CL_COMMAND_COPY_IMAGE, deps,
710 hard_copy_op(q, &dst_img, dst_origin,
711 &src_img, src_origin,
712 region));
713
714 ret_object(rd_ev, hev);
715 return CL_SUCCESS;
716
717 } catch (error &e) {
718 return e.get();
719 }
720
721 CLOVER_API cl_int
clEnqueueCopyImageToBuffer(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_region,size_t dst_offset,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)722 clEnqueueCopyImageToBuffer(cl_command_queue d_q,
723 cl_mem d_src_mem, cl_mem d_dst_mem,
724 const size_t *p_src_origin, const size_t *p_region,
725 size_t dst_offset,
726 cl_uint num_deps, const cl_event *d_deps,
727 cl_event *rd_ev) try {
728 auto &q = obj(d_q);
729 auto &src_img = obj<image>(d_src_mem);
730 auto &dst_mem = obj<buffer>(d_dst_mem);
731 auto deps = objs<wait_list_tag>(d_deps, num_deps);
732 auto region = vector(p_region);
733 vector_t dst_origin = { dst_offset };
734 auto dst_pitch = pitch(region, {{ src_img.pixel_size() }});
735 auto src_origin = vector(p_src_origin);
736 auto src_pitch = pitch(region, {{ src_img.pixel_size(),
737 src_img.row_pitch(),
738 src_img.slice_pitch() }});
739
740 validate_common(q, deps);
741 validate_object(q, dst_mem, dst_origin, dst_pitch, region);
742 validate_object(q, src_img, src_origin, region);
743
744 auto hev = create<hard_event>(
745 q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, deps,
746 soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
747 &src_img, src_origin, src_pitch,
748 region));
749
750 ret_object(rd_ev, hev);
751 return CL_SUCCESS;
752
753 } catch (error &e) {
754 return e.get();
755 }
756
757 CLOVER_API cl_int
clEnqueueCopyBufferToImage(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,size_t src_offset,const size_t * p_dst_origin,const size_t * p_region,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)758 clEnqueueCopyBufferToImage(cl_command_queue d_q,
759 cl_mem d_src_mem, cl_mem d_dst_mem,
760 size_t src_offset,
761 const size_t *p_dst_origin, const size_t *p_region,
762 cl_uint num_deps, const cl_event *d_deps,
763 cl_event *rd_ev) try {
764 auto &q = obj(d_q);
765 auto &src_mem = obj<buffer>(d_src_mem);
766 auto &dst_img = obj<image>(d_dst_mem);
767 auto deps = objs<wait_list_tag>(d_deps, num_deps);
768 auto region = vector(p_region);
769 auto dst_origin = vector(p_dst_origin);
770 auto dst_pitch = pitch(region, {{ dst_img.pixel_size(),
771 dst_img.row_pitch(),
772 dst_img.slice_pitch() }});
773 vector_t src_origin = { src_offset };
774 auto src_pitch = pitch(region, {{ dst_img.pixel_size() }});
775
776 validate_common(q, deps);
777 validate_object(q, dst_img, dst_origin, region);
778 validate_object(q, src_mem, src_origin, src_pitch, region);
779
780 auto hev = create<hard_event>(
781 q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, deps,
782 soft_copy_op(q, &dst_img, dst_origin, dst_pitch,
783 &src_mem, src_origin, src_pitch,
784 region));
785
786 ret_object(rd_ev, hev);
787 return CL_SUCCESS;
788
789 } catch (error &e) {
790 return e.get();
791 }
792
793 CLOVER_API void *
clEnqueueMapBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,cl_map_flags flags,size_t offset,size_t size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev,cl_int * r_errcode)794 clEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
795 cl_map_flags flags, size_t offset, size_t size,
796 cl_uint num_deps, const cl_event *d_deps,
797 cl_event *rd_ev, cl_int *r_errcode) try {
798 auto &q = obj(d_q);
799 auto &mem = obj<buffer>(d_mem);
800 auto deps = objs<wait_list_tag>(d_deps, num_deps);
801 vector_t region = { size, 1, 1 };
802 vector_t obj_origin = { offset };
803 auto obj_pitch = pitch(region, {{ 1 }});
804
805 validate_common(q, deps);
806 validate_object(q, mem, obj_origin, obj_pitch, region);
807 validate_map_flags(mem, flags);
808
809 auto *map = mem.resource_in(q).add_map(q, flags, blocking, obj_origin, region);
810
811 auto hev = create<hard_event>(q, CL_COMMAND_MAP_BUFFER, deps);
812 if (blocking)
813 hev().wait_signalled();
814
815 ret_object(rd_ev, hev);
816 ret_error(r_errcode, CL_SUCCESS);
817 return *map;
818
819 } catch (error &e) {
820 ret_error(r_errcode, e);
821 return NULL;
822 }
823
824 CLOVER_API void *
clEnqueueMapImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,cl_map_flags flags,const size_t * p_origin,const size_t * p_region,size_t * row_pitch,size_t * slice_pitch,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev,cl_int * r_errcode)825 clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
826 cl_map_flags flags,
827 const size_t *p_origin, const size_t *p_region,
828 size_t *row_pitch, size_t *slice_pitch,
829 cl_uint num_deps, const cl_event *d_deps,
830 cl_event *rd_ev, cl_int *r_errcode) try {
831 auto &q = obj(d_q);
832 auto &img = obj<image>(d_mem);
833 auto deps = objs<wait_list_tag>(d_deps, num_deps);
834 auto region = vector(p_region);
835 auto origin = vector(p_origin);
836
837 validate_common(q, deps);
838 validate_object(q, img, origin, region);
839 validate_map_flags(img, flags);
840
841 if (!row_pitch)
842 throw error(CL_INVALID_VALUE);
843
844 if (img.slice_pitch() && !slice_pitch)
845 throw error(CL_INVALID_VALUE);
846
847 auto *map = img.resource_in(q).add_map(q, flags, blocking, origin, region);
848 *row_pitch = map->pitch()[1];
849 if (slice_pitch)
850 *slice_pitch = map->pitch()[2];
851
852 auto hev = create<hard_event>(q, CL_COMMAND_MAP_IMAGE, deps);
853 if (blocking)
854 hev().wait_signalled();
855
856 ret_object(rd_ev, hev);
857 ret_error(r_errcode, CL_SUCCESS);
858 return *map;
859
860 } catch (error &e) {
861 ret_error(r_errcode, e);
862 return NULL;
863 }
864
865 CLOVER_API cl_int
clEnqueueUnmapMemObject(cl_command_queue d_q,cl_mem d_mem,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)866 clEnqueueUnmapMemObject(cl_command_queue d_q, cl_mem d_mem, void *ptr,
867 cl_uint num_deps, const cl_event *d_deps,
868 cl_event *rd_ev) try {
869 auto &q = obj(d_q);
870 auto &mem = obj(d_mem);
871 auto deps = objs<wait_list_tag>(d_deps, num_deps);
872
873 validate_common(q, deps);
874
875 auto hev = create<hard_event>(
876 q, CL_COMMAND_UNMAP_MEM_OBJECT, deps,
877 [=, &q, &mem](event &) {
878 mem.resource_in(q).del_map(ptr);
879 });
880
881 ret_object(rd_ev, hev);
882 return CL_SUCCESS;
883
884 } catch (error &e) {
885 return e.get();
886 }
887
888 CLOVER_API cl_int
clEnqueueMigrateMemObjects(cl_command_queue d_q,cl_uint num_mems,const cl_mem * d_mems,cl_mem_migration_flags flags,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)889 clEnqueueMigrateMemObjects(cl_command_queue d_q,
890 cl_uint num_mems,
891 const cl_mem *d_mems,
892 cl_mem_migration_flags flags,
893 cl_uint num_deps,
894 const cl_event *d_deps,
895 cl_event *rd_ev) try {
896 auto &q = obj(d_q);
897 auto mems = objs<memory_obj>(d_mems, num_mems);
898 auto deps = objs<wait_list_tag>(d_deps, num_deps);
899
900 validate_common(q, deps);
901
902 if (any_of([&](const memory_obj &m) {
903 return m.context() != q.context();
904 }, mems))
905 throw error(CL_INVALID_CONTEXT);
906
907 if (flags & ~(CL_MIGRATE_MEM_OBJECT_HOST |
908 CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED))
909 throw error(CL_INVALID_VALUE);
910
911 auto hev = create<hard_event>(
912 q, CL_COMMAND_MIGRATE_MEM_OBJECTS, deps,
913 [=, &q](event &) {
914 for (auto &mem: mems) {
915 if (flags & CL_MIGRATE_MEM_OBJECT_HOST) {
916 if ((flags & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED))
917 mem.resource_out(q);
918
919 // For flags == CL_MIGRATE_MEM_OBJECT_HOST only to be
920 // efficient we would need cl*ReadBuffer* to implement
921 // reading from host memory.
922
923 } else {
924 if (flags & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED)
925 mem.resource_undef(q);
926 else
927 mem.resource_in(q);
928 }
929 }
930 });
931
932 ret_object(rd_ev, hev);
933 return CL_SUCCESS;;
934
935 } catch (error &e) {
936 return e.get();
937 }
938
939 cl_int
EnqueueSVMFree(cl_command_queue d_q,cl_uint num_svm_pointers,void * svm_pointers[],void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue,cl_uint num_svm_pointers,void * svm_pointers[],void * user_data),void * user_data,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)940 clover::EnqueueSVMFree(cl_command_queue d_q,
941 cl_uint num_svm_pointers,
942 void *svm_pointers[],
943 void (CL_CALLBACK *pfn_free_func) (
944 cl_command_queue queue, cl_uint num_svm_pointers,
945 void *svm_pointers[], void *user_data),
946 void *user_data,
947 cl_uint num_events_in_wait_list,
948 const cl_event *event_wait_list,
949 cl_event *event,
950 cl_int cmd) try {
951
952 if (bool(num_svm_pointers) != bool(svm_pointers))
953 return CL_INVALID_VALUE;
954
955 auto &q = obj(d_q);
956 bool can_emulate = q.device().has_system_svm();
957 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
958
959 validate_common(q, deps);
960
961 std::vector<void *> svm_pointers_cpy(svm_pointers,
962 svm_pointers + num_svm_pointers);
963 if (!pfn_free_func) {
964 if (!can_emulate) {
965 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
966 return CL_INVALID_VALUE;
967 }
968 pfn_free_func = [](cl_command_queue, cl_uint num_svm_pointers,
969 void *svm_pointers[], void *) {
970 for (void *p : range(svm_pointers, num_svm_pointers))
971 free(p);
972 };
973 }
974
975 auto hev = create<hard_event>(q, cmd, deps,
976 [=](clover::event &) mutable {
977 pfn_free_func(d_q, num_svm_pointers, svm_pointers_cpy.data(),
978 user_data);
979 });
980
981 ret_object(event, hev);
982 return CL_SUCCESS;
983
984 } catch (error &e) {
985 return e.get();
986 }
987
988 CLOVER_API cl_int
clEnqueueSVMFree(cl_command_queue d_q,cl_uint num_svm_pointers,void * svm_pointers[],void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue,cl_uint num_svm_pointers,void * svm_pointers[],void * user_data),void * user_data,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)989 clEnqueueSVMFree(cl_command_queue d_q,
990 cl_uint num_svm_pointers,
991 void *svm_pointers[],
992 void (CL_CALLBACK *pfn_free_func) (
993 cl_command_queue queue, cl_uint num_svm_pointers,
994 void *svm_pointers[], void *user_data),
995 void *user_data,
996 cl_uint num_events_in_wait_list,
997 const cl_event *event_wait_list,
998 cl_event *event) {
999
1000 return EnqueueSVMFree(d_q, num_svm_pointers, svm_pointers,
1001 pfn_free_func, user_data, num_events_in_wait_list,
1002 event_wait_list, event, CL_COMMAND_SVM_FREE);
1003 }
1004
1005 cl_int
EnqueueSVMMemcpy(cl_command_queue d_q,cl_bool blocking_copy,void * dst_ptr,const void * src_ptr,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)1006 clover::EnqueueSVMMemcpy(cl_command_queue d_q,
1007 cl_bool blocking_copy,
1008 void *dst_ptr,
1009 const void *src_ptr,
1010 size_t size,
1011 cl_uint num_events_in_wait_list,
1012 const cl_event *event_wait_list,
1013 cl_event *event,
1014 cl_int cmd) try {
1015
1016 if (dst_ptr == nullptr || src_ptr == nullptr)
1017 return CL_INVALID_VALUE;
1018
1019 if (static_cast<size_t>(abs(reinterpret_cast<ptrdiff_t>(dst_ptr) -
1020 reinterpret_cast<ptrdiff_t>(src_ptr))) < size)
1021 return CL_MEM_COPY_OVERLAP;
1022
1023 auto &q = obj(d_q);
1024 bool can_emulate = q.device().has_system_svm();
1025 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1026
1027 validate_common(q, deps);
1028
1029 if (can_emulate) {
1030 auto hev = create<hard_event>(q, cmd, deps,
1031 [=](clover::event &) {
1032 memcpy(dst_ptr, src_ptr, size);
1033 });
1034
1035 if (blocking_copy)
1036 hev().wait();
1037 ret_object(event, hev);
1038 return CL_SUCCESS;
1039 }
1040
1041 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1042 return CL_INVALID_VALUE;
1043
1044 } catch (error &e) {
1045 return e.get();
1046 }
1047
1048 CLOVER_API cl_int
clEnqueueSVMMemcpy(cl_command_queue d_q,cl_bool blocking_copy,void * dst_ptr,const void * src_ptr,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1049 clEnqueueSVMMemcpy(cl_command_queue d_q,
1050 cl_bool blocking_copy,
1051 void *dst_ptr,
1052 const void *src_ptr,
1053 size_t size,
1054 cl_uint num_events_in_wait_list,
1055 const cl_event *event_wait_list,
1056 cl_event *event) {
1057
1058 return EnqueueSVMMemcpy(d_q, blocking_copy, dst_ptr, src_ptr,
1059 size, num_events_in_wait_list, event_wait_list,
1060 event, CL_COMMAND_SVM_MEMCPY);
1061 }
1062
1063 cl_int
EnqueueSVMMemFill(cl_command_queue d_q,void * svm_ptr,const void * pattern,size_t pattern_size,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)1064 clover::EnqueueSVMMemFill(cl_command_queue d_q,
1065 void *svm_ptr,
1066 const void *pattern,
1067 size_t pattern_size,
1068 size_t size,
1069 cl_uint num_events_in_wait_list,
1070 const cl_event *event_wait_list,
1071 cl_event *event,
1072 cl_int cmd) try {
1073
1074 if (svm_ptr == nullptr || pattern == nullptr ||
1075 !util_is_power_of_two_nonzero(pattern_size) ||
1076 pattern_size > 128 ||
1077 !ptr_is_aligned(svm_ptr, pattern_size) ||
1078 size % pattern_size)
1079 return CL_INVALID_VALUE;
1080
1081 auto &q = obj(d_q);
1082 bool can_emulate = q.device().has_system_svm();
1083 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1084
1085 validate_common(q, deps);
1086
1087 if (can_emulate) {
1088 auto hev = create<hard_event>(q, cmd, deps,
1089 [=](clover::event &) {
1090 void *ptr = svm_ptr;
1091 for (size_t s = size; s; s -= pattern_size) {
1092 memcpy(ptr, pattern, pattern_size);
1093 ptr = static_cast<uint8_t*>(ptr) + pattern_size;
1094 }
1095 });
1096
1097 ret_object(event, hev);
1098 return CL_SUCCESS;
1099 }
1100
1101 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1102 return CL_INVALID_VALUE;
1103
1104 } catch (error &e) {
1105 return e.get();
1106 }
1107
1108 CLOVER_API cl_int
clEnqueueSVMMemFill(cl_command_queue d_q,void * svm_ptr,const void * pattern,size_t pattern_size,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1109 clEnqueueSVMMemFill(cl_command_queue d_q,
1110 void *svm_ptr,
1111 const void *pattern,
1112 size_t pattern_size,
1113 size_t size,
1114 cl_uint num_events_in_wait_list,
1115 const cl_event *event_wait_list,
1116 cl_event *event) {
1117
1118 return EnqueueSVMMemFill(d_q, svm_ptr, pattern, pattern_size,
1119 size, num_events_in_wait_list, event_wait_list,
1120 event, CL_COMMAND_SVM_MEMFILL);
1121 }
1122
1123 cl_int
EnqueueSVMMap(cl_command_queue d_q,cl_bool blocking_map,cl_map_flags map_flags,void * svm_ptr,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)1124 clover::EnqueueSVMMap(cl_command_queue d_q,
1125 cl_bool blocking_map,
1126 cl_map_flags map_flags,
1127 void *svm_ptr,
1128 size_t size,
1129 cl_uint num_events_in_wait_list,
1130 const cl_event *event_wait_list,
1131 cl_event *event,
1132 cl_int cmd) try {
1133
1134 if (svm_ptr == nullptr || size == 0)
1135 return CL_INVALID_VALUE;
1136
1137 auto &q = obj(d_q);
1138 bool can_emulate = q.device().has_system_svm();
1139 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1140
1141 validate_common(q, deps);
1142
1143 if (can_emulate) {
1144 auto hev = create<hard_event>(q, cmd, deps,
1145 [](clover::event &) { });
1146
1147 ret_object(event, hev);
1148 return CL_SUCCESS;
1149 }
1150
1151 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1152 return CL_INVALID_VALUE;
1153
1154 } catch (error &e) {
1155 return e.get();
1156 }
1157
1158 CLOVER_API cl_int
clEnqueueSVMMap(cl_command_queue d_q,cl_bool blocking_map,cl_map_flags map_flags,void * svm_ptr,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1159 clEnqueueSVMMap(cl_command_queue d_q,
1160 cl_bool blocking_map,
1161 cl_map_flags map_flags,
1162 void *svm_ptr,
1163 size_t size,
1164 cl_uint num_events_in_wait_list,
1165 const cl_event *event_wait_list,
1166 cl_event *event) {
1167
1168 return EnqueueSVMMap(d_q, blocking_map, map_flags, svm_ptr, size,
1169 num_events_in_wait_list, event_wait_list, event,
1170 CL_COMMAND_SVM_MAP);
1171 }
1172
1173 cl_int
EnqueueSVMUnmap(cl_command_queue d_q,void * svm_ptr,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)1174 clover::EnqueueSVMUnmap(cl_command_queue d_q,
1175 void *svm_ptr,
1176 cl_uint num_events_in_wait_list,
1177 const cl_event *event_wait_list,
1178 cl_event *event,
1179 cl_int cmd) try {
1180
1181 if (svm_ptr == nullptr)
1182 return CL_INVALID_VALUE;
1183
1184 auto &q = obj(d_q);
1185 bool can_emulate = q.device().has_system_svm();
1186 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1187
1188 validate_common(q, deps);
1189
1190 if (can_emulate) {
1191 auto hev = create<hard_event>(q, cmd, deps,
1192 [](clover::event &) { });
1193
1194 ret_object(event, hev);
1195 return CL_SUCCESS;
1196 }
1197
1198 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1199 return CL_INVALID_VALUE;
1200
1201 } catch (error &e) {
1202 return e.get();
1203 }
1204
1205 CLOVER_API cl_int
clEnqueueSVMUnmap(cl_command_queue d_q,void * svm_ptr,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1206 clEnqueueSVMUnmap(cl_command_queue d_q,
1207 void *svm_ptr,
1208 cl_uint num_events_in_wait_list,
1209 const cl_event *event_wait_list,
1210 cl_event *event) {
1211
1212 return EnqueueSVMUnmap(d_q, svm_ptr, num_events_in_wait_list,
1213 event_wait_list, event, CL_COMMAND_SVM_UNMAP);
1214 }
1215
1216 CLOVER_API cl_int
clEnqueueSVMMigrateMem(cl_command_queue d_q,cl_uint num_svm_pointers,const void ** svm_pointers,const size_t * sizes,const cl_mem_migration_flags flags,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1217 clEnqueueSVMMigrateMem(cl_command_queue d_q,
1218 cl_uint num_svm_pointers,
1219 const void **svm_pointers,
1220 const size_t *sizes,
1221 const cl_mem_migration_flags flags,
1222 cl_uint num_events_in_wait_list,
1223 const cl_event *event_wait_list,
1224 cl_event *event) {
1225 CLOVER_NOT_SUPPORTED_UNTIL("2.1");
1226 return CL_INVALID_VALUE;
1227 }
1228