1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 //
22
23 #include <cstring>
24
25 #include "util/bitscan.h"
26
27 #include "api/dispatch.hpp"
28 #include "api/util.hpp"
29 #include "core/event.hpp"
30 #include "core/memory.hpp"
31
32 using namespace clover;
33
34 namespace {
35 typedef resource::vector vector_t;
36
37 vector_t
vector(const size_t * p)38 vector(const size_t *p) {
39 if (!p)
40 throw error(CL_INVALID_VALUE);
41 return range(p, 3);
42 }
43
44 vector_t
pitch(const vector_t & region,vector_t pitch)45 pitch(const vector_t ®ion, vector_t pitch) {
46 for (auto x : zip(tail(pitch),
47 map(multiplies(), region, pitch))) {
48 // The spec defines a value of zero as the natural pitch,
49 // i.e. the unaligned size of the previous dimension.
50 if (std::get<0>(x) == 0)
51 std::get<0>(x) = std::get<1>(x);
52 }
53
54 return pitch;
55 }
56
57 ///
58 /// Size of a region in bytes.
59 ///
60 size_t
size(const vector_t & pitch,const vector_t & region)61 size(const vector_t &pitch, const vector_t ®ion) {
62 if (any_of(is_zero(), region))
63 return 0;
64 else
65 return dot(pitch, region - vector_t{ 0, 1, 1 });
66 }
67
68 ///
69 /// Common argument checking shared by memory transfer commands.
70 ///
71 void
validate_common(command_queue & q,const ref_vector<event> & deps)72 validate_common(command_queue &q,
73 const ref_vector<event> &deps) {
74 if (any_of([&](const event &ev) {
75 return ev.context() != q.context();
76 }, deps))
77 throw error(CL_INVALID_CONTEXT);
78 }
79
80 ///
81 /// Common error checking for a buffer object argument.
82 ///
83 void
validate_object(command_queue & q,buffer & mem,const vector_t & origin,const vector_t & pitch,const vector_t & region)84 validate_object(command_queue &q, buffer &mem, const vector_t &origin,
85 const vector_t &pitch, const vector_t ®ion) {
86 if (mem.context() != q.context())
87 throw error(CL_INVALID_CONTEXT);
88
89 // The region must fit within the specified pitch,
90 if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
91 throw error(CL_INVALID_VALUE);
92
93 // ...and within the specified object.
94 if (dot(pitch, origin) + size(pitch, region) > mem.size())
95 throw error(CL_INVALID_VALUE);
96
97 if (any_of(is_zero(), region))
98 throw error(CL_INVALID_VALUE);
99 }
100
101 ///
102 /// Common error checking for an image argument.
103 ///
104 void
validate_object(command_queue & q,image & img,const vector_t & orig,const vector_t & region)105 validate_object(command_queue &q, image &img,
106 const vector_t &orig, const vector_t ®ion) {
107 size_t height = img.type() == CL_MEM_OBJECT_IMAGE1D_ARRAY ? img.array_size() : img.height();
108 size_t depth = img.type() == CL_MEM_OBJECT_IMAGE2D_ARRAY ? img.array_size() : img.depth();
109 vector_t size = { img.width(), height, depth };
110 const auto &dev = q.device();
111
112 if (!dev.image_support())
113 throw error(CL_INVALID_OPERATION);
114
115 if (img.context() != q.context())
116 throw error(CL_INVALID_CONTEXT);
117
118 if (any_of(greater(), orig + region, size))
119 throw error(CL_INVALID_VALUE);
120
121 if (any_of(is_zero(), region))
122 throw error(CL_INVALID_VALUE);
123
124 switch (img.type()) {
125 case CL_MEM_OBJECT_IMAGE1D: {
126 const size_t max = dev.max_image_size();
127 if (img.width() > max)
128 throw error(CL_INVALID_IMAGE_SIZE);
129 break;
130 }
131 case CL_MEM_OBJECT_IMAGE1D_ARRAY: {
132 const size_t max_size = dev.max_image_size();
133 const size_t max_array = dev.max_image_array_number();
134 if (img.width() > max_size || img.array_size() > max_array)
135 throw error(CL_INVALID_IMAGE_SIZE);
136 break;
137 }
138 case CL_MEM_OBJECT_IMAGE2D: {
139 const size_t max = dev.max_image_size();
140 if (img.width() > max || img.height() > max)
141 throw error(CL_INVALID_IMAGE_SIZE);
142 break;
143 }
144 case CL_MEM_OBJECT_IMAGE2D_ARRAY: {
145 const size_t max_size = dev.max_image_size();
146 const size_t max_array = dev.max_image_array_number();
147 if (img.width() > max_size || img.height() > max_size || img.array_size() > max_array)
148 throw error(CL_INVALID_IMAGE_SIZE);
149 break;
150 }
151 case CL_MEM_OBJECT_IMAGE3D: {
152 const size_t max = dev.max_image_size_3d();
153 if (img.width() > max || img.height() > max || img.depth() > max)
154 throw error(CL_INVALID_IMAGE_SIZE);
155 break;
156 }
157 // XXX: Implement missing checks once Clover supports more image types.
158 default:
159 throw error(CL_INVALID_IMAGE_SIZE);
160 }
161 }
162
163 ///
164 /// Common error checking for a host pointer argument.
165 ///
166 void
validate_object(command_queue & q,const void * ptr,const vector_t & orig,const vector_t & pitch,const vector_t & region)167 validate_object(command_queue &q, const void *ptr, const vector_t &orig,
168 const vector_t &pitch, const vector_t ®ion) {
169 if (!ptr)
170 throw error(CL_INVALID_VALUE);
171
172 // The region must fit within the specified pitch.
173 if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
174 throw error(CL_INVALID_VALUE);
175 }
176
177 ///
178 /// Common argument checking for a copy between two buffer objects.
179 ///
180 void
validate_copy(command_queue & q,buffer & dst_mem,const vector_t & dst_orig,const vector_t & dst_pitch,buffer & src_mem,const vector_t & src_orig,const vector_t & src_pitch,const vector_t & region)181 validate_copy(command_queue &q, buffer &dst_mem,
182 const vector_t &dst_orig, const vector_t &dst_pitch,
183 buffer &src_mem,
184 const vector_t &src_orig, const vector_t &src_pitch,
185 const vector_t ®ion) {
186 if (dst_mem == src_mem) {
187 auto dst_offset = dot(dst_pitch, dst_orig);
188 auto src_offset = dot(src_pitch, src_orig);
189
190 if (interval_overlaps()(
191 dst_offset, dst_offset + size(dst_pitch, region),
192 src_offset, src_offset + size(src_pitch, region)))
193 throw error(CL_MEM_COPY_OVERLAP);
194 }
195 }
196
197 ///
198 /// Common argument checking for a copy between two image objects.
199 ///
200 void
validate_copy(command_queue & q,image & dst_img,const vector_t & dst_orig,image & src_img,const vector_t & src_orig,const vector_t & region)201 validate_copy(command_queue &q,
202 image &dst_img, const vector_t &dst_orig,
203 image &src_img, const vector_t &src_orig,
204 const vector_t ®ion) {
205 if (dst_img.format() != src_img.format())
206 throw error(CL_IMAGE_FORMAT_MISMATCH);
207
208 if (dst_img == src_img) {
209 if (all_of(interval_overlaps(),
210 dst_orig, dst_orig + region,
211 src_orig, src_orig + region))
212 throw error(CL_MEM_COPY_OVERLAP);
213 }
214 }
215
216 ///
217 /// Checks that the host access flags of the memory object are
218 /// within the allowed set \a flags.
219 ///
220 void
validate_object_access(const memory_obj & mem,const cl_mem_flags flags)221 validate_object_access(const memory_obj &mem, const cl_mem_flags flags) {
222 if (mem.flags() & ~flags &
223 (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY |
224 CL_MEM_HOST_NO_ACCESS))
225 throw error(CL_INVALID_OPERATION);
226 }
227
228 ///
229 /// Checks that the mapping flags are correct.
230 ///
231 void
validate_map_flags(const memory_obj & mem,const cl_map_flags flags)232 validate_map_flags(const memory_obj &mem, const cl_map_flags flags) {
233 if ((flags & (CL_MAP_WRITE | CL_MAP_READ)) &&
234 (flags & CL_MAP_WRITE_INVALIDATE_REGION))
235 throw error(CL_INVALID_VALUE);
236
237 if (flags & CL_MAP_READ)
238 validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
239
240 if (flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
241 validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
242 }
243
244 ///
245 /// Checks that the memory migration flags are correct.
246 ///
247 void
validate_mem_migration_flags(const cl_mem_migration_flags flags)248 validate_mem_migration_flags(const cl_mem_migration_flags flags) {
249 const cl_mem_migration_flags valid =
250 CL_MIGRATE_MEM_OBJECT_HOST |
251 CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED;
252
253 if (flags & ~valid)
254 throw error(CL_INVALID_VALUE);
255 }
256
257 ///
258 /// Class that encapsulates the task of mapping an object of type
259 /// \a T. The return value of get() should be implicitly
260 /// convertible to \a void *.
261 ///
262 template<typename T>
263 struct _map;
264
265 template<>
266 struct _map<image*> {
_map__anon7f6d781b0111::_map267 _map(command_queue &q, image *img, cl_map_flags flags,
268 vector_t offset, vector_t pitch, vector_t region) :
269 map(q, img->resource_in(q), flags, true, offset, region),
270 pitch(map.pitch())
271 { }
272
273 template<typename T>
operator T*__anon7f6d781b0111::_map274 operator T *() const {
275 return static_cast<T *>(map);
276 }
277
278 mapping map;
279 vector_t pitch;
280 };
281
282 template<>
283 struct _map<buffer*> {
_map__anon7f6d781b0111::_map284 _map(command_queue &q, buffer *mem, cl_map_flags flags,
285 vector_t offset, vector_t pitch, vector_t region) :
286 map(q, mem->resource_in(q), flags, true,
287 {{ dot(pitch, offset) }}, {{ size(pitch, region) }}),
288 pitch(pitch)
289 { }
290
291 template<typename T>
operator T*__anon7f6d781b0111::_map292 operator T *() const {
293 return static_cast<T *>(map);
294 }
295
296 mapping map;
297 vector_t pitch;
298 };
299
300 template<typename P>
301 struct _map<P *> {
_map__anon7f6d781b0111::_map302 _map(command_queue &q, P *ptr, cl_map_flags flags,
303 vector_t offset, vector_t pitch, vector_t region) :
304 ptr((P *)((char *)ptr + dot(pitch, offset))), pitch(pitch)
305 { }
306
307 template<typename T>
operator T*__anon7f6d781b0111::_map308 operator T *() const {
309 return static_cast<T *>(ptr);
310 }
311
312 P *ptr;
313 vector_t pitch;
314 };
315
316 ///
317 /// Software copy from \a src_obj to \a dst_obj. They can be
318 /// either pointers or memory objects.
319 ///
320 template<typename T, typename S>
321 std::function<void (event &)>
soft_copy_op(command_queue & q,T dst_obj,const vector_t & dst_orig,const vector_t & dst_pitch,S src_obj,const vector_t & src_orig,const vector_t & src_pitch,const vector_t & region)322 soft_copy_op(command_queue &q,
323 T dst_obj, const vector_t &dst_orig, const vector_t &dst_pitch,
324 S src_obj, const vector_t &src_orig, const vector_t &src_pitch,
325 const vector_t ®ion) {
326 return [=, &q](event &) {
327 _map<T> dst = { q, dst_obj, CL_MAP_WRITE,
328 dst_orig, dst_pitch, region };
329 _map<S> src = { q, src_obj, CL_MAP_READ,
330 src_orig, src_pitch, region };
331 assert(src.pitch[0] == dst.pitch[0]);
332 vector_t v = {};
333
334 for (v[2] = 0; v[2] < region[2]; ++v[2]) {
335 for (v[1] = 0; v[1] < region[1]; ++v[1]) {
336 std::memcpy(
337 static_cast<char *>(dst) + dot(dst.pitch, v),
338 static_cast<const char *>(src) + dot(src.pitch, v),
339 src.pitch[0] * region[0]);
340 }
341 }
342 };
343 }
344
345 ///
346 /// Hardware copy from \a src_obj to \a dst_obj.
347 ///
348 template<typename T, typename S>
349 std::function<void (event &)>
hard_copy_op(command_queue & q,T dst_obj,const vector_t & dst_orig,S src_obj,const vector_t & src_orig,const vector_t & region)350 hard_copy_op(command_queue &q, T dst_obj, const vector_t &dst_orig,
351 S src_obj, const vector_t &src_orig, const vector_t ®ion) {
352 return [=, &q](event &) {
353 dst_obj->resource_in(q).copy(q, dst_orig, region,
354 src_obj->resource_in(q), src_orig);
355 };
356 }
357 }
358
359 CLOVER_API cl_int
clEnqueueReadBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,size_t offset,size_t size,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)360 clEnqueueReadBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
361 size_t offset, size_t size, void *ptr,
362 cl_uint num_deps, const cl_event *d_deps,
363 cl_event *rd_ev) try {
364 auto &q = obj(d_q);
365 auto &mem = obj<buffer>(d_mem);
366 auto deps = objs<wait_list_tag>(d_deps, num_deps);
367 vector_t region = { size, 1, 1 };
368 vector_t obj_origin = { offset };
369 auto obj_pitch = pitch(region, {{ 1 }});
370
371 validate_common(q, deps);
372 validate_object(q, ptr, {}, obj_pitch, region);
373 validate_object(q, mem, obj_origin, obj_pitch, region);
374 validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
375
376 auto hev = create<hard_event>(
377 q, CL_COMMAND_READ_BUFFER, deps,
378 soft_copy_op(q, ptr, {}, obj_pitch,
379 &mem, obj_origin, obj_pitch,
380 region));
381
382 if (blocking)
383 hev().wait_signalled();
384
385 ret_object(rd_ev, hev);
386 return CL_SUCCESS;
387
388 } catch (error &e) {
389 return e.get();
390 }
391
392 CLOVER_API cl_int
clEnqueueWriteBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,size_t offset,size_t size,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)393 clEnqueueWriteBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
394 size_t offset, size_t size, const void *ptr,
395 cl_uint num_deps, const cl_event *d_deps,
396 cl_event *rd_ev) try {
397 auto &q = obj(d_q);
398 auto &mem = obj<buffer>(d_mem);
399 auto deps = objs<wait_list_tag>(d_deps, num_deps);
400 vector_t region = { size, 1, 1 };
401 vector_t obj_origin = { offset };
402 auto obj_pitch = pitch(region, {{ 1 }});
403
404 validate_common(q, deps);
405 validate_object(q, mem, obj_origin, obj_pitch, region);
406 validate_object(q, ptr, {}, obj_pitch, region);
407 validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
408
409 auto hev = create<hard_event>(
410 q, CL_COMMAND_WRITE_BUFFER, deps,
411 soft_copy_op(q, &mem, obj_origin, obj_pitch,
412 ptr, {}, obj_pitch,
413 region));
414
415 if (blocking)
416 hev().wait_signalled();
417
418 ret_object(rd_ev, hev);
419 return CL_SUCCESS;
420
421 } catch (error &e) {
422 return e.get();
423 }
424
425 CLOVER_API cl_int
clEnqueueReadBufferRect(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_obj_origin,const size_t * p_host_origin,const size_t * p_region,size_t obj_row_pitch,size_t obj_slice_pitch,size_t host_row_pitch,size_t host_slice_pitch,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)426 clEnqueueReadBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
427 const size_t *p_obj_origin,
428 const size_t *p_host_origin,
429 const size_t *p_region,
430 size_t obj_row_pitch, size_t obj_slice_pitch,
431 size_t host_row_pitch, size_t host_slice_pitch,
432 void *ptr,
433 cl_uint num_deps, const cl_event *d_deps,
434 cl_event *rd_ev) try {
435 auto &q = obj(d_q);
436 auto &mem = obj<buffer>(d_mem);
437 auto deps = objs<wait_list_tag>(d_deps, num_deps);
438 auto region = vector(p_region);
439 auto obj_origin = vector(p_obj_origin);
440 auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
441 auto host_origin = vector(p_host_origin);
442 auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
443
444 validate_common(q, deps);
445 validate_object(q, ptr, host_origin, host_pitch, region);
446 validate_object(q, mem, obj_origin, obj_pitch, region);
447 validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
448
449 auto hev = create<hard_event>(
450 q, CL_COMMAND_READ_BUFFER_RECT, deps,
451 soft_copy_op(q, ptr, host_origin, host_pitch,
452 &mem, obj_origin, obj_pitch,
453 region));
454
455 if (blocking)
456 hev().wait_signalled();
457
458 ret_object(rd_ev, hev);
459 return CL_SUCCESS;
460
461 } catch (error &e) {
462 return e.get();
463 }
464
465 CLOVER_API cl_int
clEnqueueWriteBufferRect(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_obj_origin,const size_t * p_host_origin,const size_t * p_region,size_t obj_row_pitch,size_t obj_slice_pitch,size_t host_row_pitch,size_t host_slice_pitch,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)466 clEnqueueWriteBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
467 const size_t *p_obj_origin,
468 const size_t *p_host_origin,
469 const size_t *p_region,
470 size_t obj_row_pitch, size_t obj_slice_pitch,
471 size_t host_row_pitch, size_t host_slice_pitch,
472 const void *ptr,
473 cl_uint num_deps, const cl_event *d_deps,
474 cl_event *rd_ev) try {
475 auto &q = obj(d_q);
476 auto &mem = obj<buffer>(d_mem);
477 auto deps = objs<wait_list_tag>(d_deps, num_deps);
478 auto region = vector(p_region);
479 auto obj_origin = vector(p_obj_origin);
480 auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
481 auto host_origin = vector(p_host_origin);
482 auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
483
484 validate_common(q, deps);
485 validate_object(q, mem, obj_origin, obj_pitch, region);
486 validate_object(q, ptr, host_origin, host_pitch, region);
487 validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
488
489 auto hev = create<hard_event>(
490 q, CL_COMMAND_WRITE_BUFFER_RECT, deps,
491 soft_copy_op(q, &mem, obj_origin, obj_pitch,
492 ptr, host_origin, host_pitch,
493 region));
494
495 if (blocking)
496 hev().wait_signalled();
497
498 ret_object(rd_ev, hev);
499 return CL_SUCCESS;
500
501 } catch (error &e) {
502 return e.get();
503 }
504
505 CLOVER_API cl_int
clEnqueueFillBuffer(cl_command_queue d_queue,cl_mem d_mem,const void * pattern,size_t pattern_size,size_t offset,size_t size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)506 clEnqueueFillBuffer(cl_command_queue d_queue, cl_mem d_mem,
507 const void *pattern, size_t pattern_size,
508 size_t offset, size_t size,
509 cl_uint num_deps, const cl_event *d_deps,
510 cl_event *rd_ev) try {
511 auto &q = obj(d_queue);
512 auto &mem = obj<buffer>(d_mem);
513 auto deps = objs<wait_list_tag>(d_deps, num_deps);
514 vector_t region = { size, 1, 1 };
515 vector_t origin = { offset };
516 auto dst_pitch = pitch(region, {{ 1 }});
517
518 validate_common(q, deps);
519 validate_object(q, mem, origin, dst_pitch, region);
520
521 if (!pattern)
522 return CL_INVALID_VALUE;
523
524 if (!util_is_power_of_two_nonzero(pattern_size) ||
525 pattern_size > 128 || size % pattern_size
526 || offset % pattern_size) {
527 return CL_INVALID_VALUE;
528 }
529
530 auto sub = dynamic_cast<sub_buffer *>(&mem);
531 if (sub && sub->offset() % q.device().mem_base_addr_align()) {
532 return CL_MISALIGNED_SUB_BUFFER_OFFSET;
533 }
534
535 std::string data = std::string((char *)pattern, pattern_size);
536 auto hev = create<hard_event>(
537 q, CL_COMMAND_FILL_BUFFER, deps,
538 [=, &q, &mem](event &) {
539 mem.resource_in(q).clear(q, origin, region, data);
540 });
541
542 ret_object(rd_ev, hev);
543 return CL_SUCCESS;
544
545 } catch (error &e) {
546 return e.get();
547 }
548
549 CLOVER_API cl_int
clEnqueueCopyBuffer(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,size_t src_offset,size_t dst_offset,size_t size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)550 clEnqueueCopyBuffer(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
551 size_t src_offset, size_t dst_offset, size_t size,
552 cl_uint num_deps, const cl_event *d_deps,
553 cl_event *rd_ev) try {
554 auto &q = obj(d_q);
555 auto &src_mem = obj<buffer>(d_src_mem);
556 auto &dst_mem = obj<buffer>(d_dst_mem);
557 auto deps = objs<wait_list_tag>(d_deps, num_deps);
558 vector_t region = { size, 1, 1 };
559 vector_t dst_origin = { dst_offset };
560 auto dst_pitch = pitch(region, {{ 1 }});
561 vector_t src_origin = { src_offset };
562 auto src_pitch = pitch(region, {{ 1 }});
563
564 validate_common(q, deps);
565 validate_object(q, dst_mem, dst_origin, dst_pitch, region);
566 validate_object(q, src_mem, src_origin, src_pitch, region);
567 validate_copy(q, dst_mem, dst_origin, dst_pitch,
568 src_mem, src_origin, src_pitch, region);
569
570 auto hev = create<hard_event>(
571 q, CL_COMMAND_COPY_BUFFER, deps,
572 hard_copy_op(q, &dst_mem, dst_origin,
573 &src_mem, src_origin, region));
574
575 ret_object(rd_ev, hev);
576 return CL_SUCCESS;
577
578 } catch (error &e) {
579 return e.get();
580 }
581
582 CLOVER_API cl_int
clEnqueueCopyBufferRect(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_dst_origin,const size_t * p_region,size_t src_row_pitch,size_t src_slice_pitch,size_t dst_row_pitch,size_t dst_slice_pitch,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)583 clEnqueueCopyBufferRect(cl_command_queue d_q, cl_mem d_src_mem,
584 cl_mem d_dst_mem,
585 const size_t *p_src_origin, const size_t *p_dst_origin,
586 const size_t *p_region,
587 size_t src_row_pitch, size_t src_slice_pitch,
588 size_t dst_row_pitch, size_t dst_slice_pitch,
589 cl_uint num_deps, const cl_event *d_deps,
590 cl_event *rd_ev) try {
591 auto &q = obj(d_q);
592 auto &src_mem = obj<buffer>(d_src_mem);
593 auto &dst_mem = obj<buffer>(d_dst_mem);
594 auto deps = objs<wait_list_tag>(d_deps, num_deps);
595 auto region = vector(p_region);
596 auto dst_origin = vector(p_dst_origin);
597 auto dst_pitch = pitch(region, {{ 1, dst_row_pitch, dst_slice_pitch }});
598 auto src_origin = vector(p_src_origin);
599 auto src_pitch = pitch(region, {{ 1, src_row_pitch, src_slice_pitch }});
600
601 validate_common(q, deps);
602 validate_object(q, dst_mem, dst_origin, dst_pitch, region);
603 validate_object(q, src_mem, src_origin, src_pitch, region);
604 validate_copy(q, dst_mem, dst_origin, dst_pitch,
605 src_mem, src_origin, src_pitch, region);
606
607 auto hev = create<hard_event>(
608 q, CL_COMMAND_COPY_BUFFER_RECT, deps,
609 soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
610 &src_mem, src_origin, src_pitch,
611 region));
612
613 ret_object(rd_ev, hev);
614 return CL_SUCCESS;
615
616 } catch (error &e) {
617 return e.get();
618 }
619
620 CLOVER_API cl_int
clEnqueueReadImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_origin,const size_t * p_region,size_t row_pitch,size_t slice_pitch,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)621 clEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
622 const size_t *p_origin, const size_t *p_region,
623 size_t row_pitch, size_t slice_pitch, void *ptr,
624 cl_uint num_deps, const cl_event *d_deps,
625 cl_event *rd_ev) try {
626 auto &q = obj(d_q);
627 auto &img = obj<image>(d_mem);
628 auto deps = objs<wait_list_tag>(d_deps, num_deps);
629 auto region = vector(p_region);
630 auto dst_pitch = pitch(region, {{ img.pixel_size(),
631 row_pitch, slice_pitch }});
632 auto src_origin = vector(p_origin);
633 auto src_pitch = pitch(region, {{ img.pixel_size(),
634 img.row_pitch(), img.slice_pitch() }});
635
636 validate_common(q, deps);
637 validate_object(q, ptr, {}, dst_pitch, region);
638 validate_object(q, img, src_origin, region);
639 validate_object_access(img, CL_MEM_HOST_READ_ONLY);
640
641 auto hev = create<hard_event>(
642 q, CL_COMMAND_READ_IMAGE, deps,
643 soft_copy_op(q, ptr, {}, dst_pitch,
644 &img, src_origin, src_pitch,
645 region));
646
647 if (blocking)
648 hev().wait_signalled();
649
650 ret_object(rd_ev, hev);
651 return CL_SUCCESS;
652
653 } catch (error &e) {
654 return e.get();
655 }
656
657 CLOVER_API cl_int
clEnqueueWriteImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_origin,const size_t * p_region,size_t row_pitch,size_t slice_pitch,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)658 clEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
659 const size_t *p_origin, const size_t *p_region,
660 size_t row_pitch, size_t slice_pitch, const void *ptr,
661 cl_uint num_deps, const cl_event *d_deps,
662 cl_event *rd_ev) try {
663 auto &q = obj(d_q);
664 auto &img = obj<image>(d_mem);
665 auto deps = objs<wait_list_tag>(d_deps, num_deps);
666 auto region = vector(p_region);
667 auto dst_origin = vector(p_origin);
668 auto dst_pitch = pitch(region, {{ img.pixel_size(),
669 img.row_pitch(), img.slice_pitch() }});
670 auto src_pitch = pitch(region, {{ img.pixel_size(),
671 row_pitch, slice_pitch }});
672
673 validate_common(q, deps);
674 validate_object(q, img, dst_origin, region);
675 validate_object(q, ptr, {}, src_pitch, region);
676 validate_object_access(img, CL_MEM_HOST_WRITE_ONLY);
677
678 auto hev = create<hard_event>(
679 q, CL_COMMAND_WRITE_IMAGE, deps,
680 soft_copy_op(q, &img, dst_origin, dst_pitch,
681 ptr, {}, src_pitch,
682 region));
683
684 if (blocking)
685 hev().wait_signalled();
686
687 ret_object(rd_ev, hev);
688 return CL_SUCCESS;
689
690 } catch (error &e) {
691 return e.get();
692 }
693
694 CLOVER_API cl_int
clEnqueueFillImage(cl_command_queue d_queue,cl_mem d_mem,const void * fill_color,const size_t * p_origin,const size_t * p_region,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)695 clEnqueueFillImage(cl_command_queue d_queue, cl_mem d_mem,
696 const void *fill_color,
697 const size_t *p_origin, const size_t *p_region,
698 cl_uint num_deps, const cl_event *d_deps,
699 cl_event *rd_ev) try {
700 auto &q = obj(d_queue);
701 auto &img = obj<image>(d_mem);
702 auto deps = objs<wait_list_tag>(d_deps, num_deps);
703 auto origin = vector(p_origin);
704 auto region = vector(p_region);
705
706 validate_common(q, deps);
707 validate_object(q, img, origin, region);
708
709 if (!fill_color)
710 return CL_INVALID_VALUE;
711
712 std::string data = std::string((char *)fill_color, sizeof(cl_uint4));
713 auto hev = create<hard_event>(
714 q, CL_COMMAND_FILL_IMAGE, deps,
715 [=, &q, &img](event &) {
716 img.resource_in(q).clear(q, origin, region, data);
717 });
718
719 ret_object(rd_ev, hev);
720 return CL_SUCCESS;
721
722 } catch (error &e) {
723 return e.get();
724 }
725
726 CLOVER_API cl_int
clEnqueueCopyImage(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_dst_origin,const size_t * p_region,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)727 clEnqueueCopyImage(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
728 const size_t *p_src_origin, const size_t *p_dst_origin,
729 const size_t *p_region,
730 cl_uint num_deps, const cl_event *d_deps,
731 cl_event *rd_ev) try {
732 auto &q = obj(d_q);
733 auto &src_img = obj<image>(d_src_mem);
734 auto &dst_img = obj<image>(d_dst_mem);
735 auto deps = objs<wait_list_tag>(d_deps, num_deps);
736 auto region = vector(p_region);
737 auto dst_origin = vector(p_dst_origin);
738 auto src_origin = vector(p_src_origin);
739
740 validate_common(q, deps);
741 validate_object(q, dst_img, dst_origin, region);
742 validate_object(q, src_img, src_origin, region);
743 validate_copy(q, dst_img, dst_origin, src_img, src_origin, region);
744
745 auto hev = create<hard_event>(
746 q, CL_COMMAND_COPY_IMAGE, deps,
747 hard_copy_op(q, &dst_img, dst_origin,
748 &src_img, src_origin,
749 region));
750
751 ret_object(rd_ev, hev);
752 return CL_SUCCESS;
753
754 } catch (error &e) {
755 return e.get();
756 }
757
758 CLOVER_API cl_int
clEnqueueCopyImageToBuffer(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_region,size_t dst_offset,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)759 clEnqueueCopyImageToBuffer(cl_command_queue d_q,
760 cl_mem d_src_mem, cl_mem d_dst_mem,
761 const size_t *p_src_origin, const size_t *p_region,
762 size_t dst_offset,
763 cl_uint num_deps, const cl_event *d_deps,
764 cl_event *rd_ev) try {
765 auto &q = obj(d_q);
766 auto &src_img = obj<image>(d_src_mem);
767 auto &dst_mem = obj<buffer>(d_dst_mem);
768 auto deps = objs<wait_list_tag>(d_deps, num_deps);
769 auto region = vector(p_region);
770 vector_t dst_origin = { dst_offset };
771 auto dst_pitch = pitch(region, {{ src_img.pixel_size() }});
772 auto src_origin = vector(p_src_origin);
773 auto src_pitch = pitch(region, {{ src_img.pixel_size(),
774 src_img.row_pitch(),
775 src_img.slice_pitch() }});
776
777 validate_common(q, deps);
778 validate_object(q, dst_mem, dst_origin, dst_pitch, region);
779 validate_object(q, src_img, src_origin, region);
780
781 auto hev = create<hard_event>(
782 q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, deps,
783 soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
784 &src_img, src_origin, src_pitch,
785 region));
786
787 ret_object(rd_ev, hev);
788 return CL_SUCCESS;
789
790 } catch (error &e) {
791 return e.get();
792 }
793
794 CLOVER_API cl_int
clEnqueueCopyBufferToImage(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,size_t src_offset,const size_t * p_dst_origin,const size_t * p_region,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)795 clEnqueueCopyBufferToImage(cl_command_queue d_q,
796 cl_mem d_src_mem, cl_mem d_dst_mem,
797 size_t src_offset,
798 const size_t *p_dst_origin, const size_t *p_region,
799 cl_uint num_deps, const cl_event *d_deps,
800 cl_event *rd_ev) try {
801 auto &q = obj(d_q);
802 auto &src_mem = obj<buffer>(d_src_mem);
803 auto &dst_img = obj<image>(d_dst_mem);
804 auto deps = objs<wait_list_tag>(d_deps, num_deps);
805 auto region = vector(p_region);
806 auto dst_origin = vector(p_dst_origin);
807 auto dst_pitch = pitch(region, {{ dst_img.pixel_size(),
808 dst_img.row_pitch(),
809 dst_img.slice_pitch() }});
810 vector_t src_origin = { src_offset };
811 auto src_pitch = pitch(region, {{ dst_img.pixel_size() }});
812
813 validate_common(q, deps);
814 validate_object(q, dst_img, dst_origin, region);
815 validate_object(q, src_mem, src_origin, src_pitch, region);
816
817 auto hev = create<hard_event>(
818 q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, deps,
819 soft_copy_op(q, &dst_img, dst_origin, dst_pitch,
820 &src_mem, src_origin, src_pitch,
821 region));
822
823 ret_object(rd_ev, hev);
824 return CL_SUCCESS;
825
826 } catch (error &e) {
827 return e.get();
828 }
829
830 CLOVER_API void *
clEnqueueMapBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,cl_map_flags flags,size_t offset,size_t size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev,cl_int * r_errcode)831 clEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
832 cl_map_flags flags, size_t offset, size_t size,
833 cl_uint num_deps, const cl_event *d_deps,
834 cl_event *rd_ev, cl_int *r_errcode) try {
835 auto &q = obj(d_q);
836 auto &mem = obj<buffer>(d_mem);
837 auto deps = objs<wait_list_tag>(d_deps, num_deps);
838 vector_t region = { size, 1, 1 };
839 vector_t obj_origin = { offset };
840 auto obj_pitch = pitch(region, {{ 1 }});
841
842 validate_common(q, deps);
843 validate_object(q, mem, obj_origin, obj_pitch, region);
844 validate_map_flags(mem, flags);
845
846 auto *map = mem.resource_in(q).add_map(q, flags, blocking, obj_origin, region);
847
848 auto hev = create<hard_event>(q, CL_COMMAND_MAP_BUFFER, deps);
849 if (blocking)
850 hev().wait_signalled();
851
852 ret_object(rd_ev, hev);
853 ret_error(r_errcode, CL_SUCCESS);
854 return *map;
855
856 } catch (error &e) {
857 ret_error(r_errcode, e);
858 return NULL;
859 }
860
861 CLOVER_API void *
clEnqueueMapImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,cl_map_flags flags,const size_t * p_origin,const size_t * p_region,size_t * row_pitch,size_t * slice_pitch,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev,cl_int * r_errcode)862 clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
863 cl_map_flags flags,
864 const size_t *p_origin, const size_t *p_region,
865 size_t *row_pitch, size_t *slice_pitch,
866 cl_uint num_deps, const cl_event *d_deps,
867 cl_event *rd_ev, cl_int *r_errcode) try {
868 auto &q = obj(d_q);
869 auto &img = obj<image>(d_mem);
870 auto deps = objs<wait_list_tag>(d_deps, num_deps);
871 auto region = vector(p_region);
872 auto origin = vector(p_origin);
873
874 validate_common(q, deps);
875 validate_object(q, img, origin, region);
876 validate_map_flags(img, flags);
877
878 if (!row_pitch)
879 throw error(CL_INVALID_VALUE);
880
881 if ((img.slice_pitch() || img.array_size()) && !slice_pitch)
882 throw error(CL_INVALID_VALUE);
883
884 auto *map = img.resource_in(q).add_map(q, flags, blocking, origin, region);
885 *row_pitch = map->pitch()[1];
886 if (slice_pitch)
887 *slice_pitch = map->pitch()[2];
888
889 auto hev = create<hard_event>(q, CL_COMMAND_MAP_IMAGE, deps);
890 if (blocking)
891 hev().wait_signalled();
892
893 ret_object(rd_ev, hev);
894 ret_error(r_errcode, CL_SUCCESS);
895 return *map;
896
897 } catch (error &e) {
898 ret_error(r_errcode, e);
899 return NULL;
900 }
901
902 CLOVER_API cl_int
clEnqueueUnmapMemObject(cl_command_queue d_q,cl_mem d_mem,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)903 clEnqueueUnmapMemObject(cl_command_queue d_q, cl_mem d_mem, void *ptr,
904 cl_uint num_deps, const cl_event *d_deps,
905 cl_event *rd_ev) try {
906 auto &q = obj(d_q);
907 auto &mem = obj(d_mem);
908 auto deps = objs<wait_list_tag>(d_deps, num_deps);
909
910 validate_common(q, deps);
911
912 auto hev = create<hard_event>(
913 q, CL_COMMAND_UNMAP_MEM_OBJECT, deps,
914 [=, &q, &mem](event &) {
915 mem.resource_in(q).del_map(ptr);
916 });
917
918 ret_object(rd_ev, hev);
919 return CL_SUCCESS;
920
921 } catch (error &e) {
922 return e.get();
923 }
924
925 CLOVER_API cl_int
clEnqueueMigrateMemObjects(cl_command_queue d_q,cl_uint num_mems,const cl_mem * d_mems,cl_mem_migration_flags flags,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)926 clEnqueueMigrateMemObjects(cl_command_queue d_q,
927 cl_uint num_mems,
928 const cl_mem *d_mems,
929 cl_mem_migration_flags flags,
930 cl_uint num_deps,
931 const cl_event *d_deps,
932 cl_event *rd_ev) try {
933 auto &q = obj(d_q);
934 auto mems = objs<memory_obj>(d_mems, num_mems);
935 auto deps = objs<wait_list_tag>(d_deps, num_deps);
936
937 validate_common(q, deps);
938 validate_mem_migration_flags(flags);
939
940 if (any_of([&](const memory_obj &m) {
941 return m.context() != q.context();
942 }, mems))
943 throw error(CL_INVALID_CONTEXT);
944
945 auto hev = create<hard_event>(
946 q, CL_COMMAND_MIGRATE_MEM_OBJECTS, deps,
947 [=, &q](event &) {
948 for (auto &mem: mems) {
949 if (flags & CL_MIGRATE_MEM_OBJECT_HOST) {
950 if ((flags & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED))
951 mem.resource_out(q);
952
953 // For flags == CL_MIGRATE_MEM_OBJECT_HOST only to be
954 // efficient we would need cl*ReadBuffer* to implement
955 // reading from host memory.
956
957 } else {
958 if (flags & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED)
959 mem.resource_undef(q);
960 else
961 mem.resource_in(q);
962 }
963 }
964 });
965
966 ret_object(rd_ev, hev);
967 return CL_SUCCESS;;
968
969 } catch (error &e) {
970 return e.get();
971 }
972
973 cl_int
EnqueueSVMFree(cl_command_queue d_q,cl_uint num_svm_pointers,void * svm_pointers[],void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue,cl_uint num_svm_pointers,void * svm_pointers[],void * user_data),void * user_data,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)974 clover::EnqueueSVMFree(cl_command_queue d_q,
975 cl_uint num_svm_pointers,
976 void *svm_pointers[],
977 void (CL_CALLBACK *pfn_free_func) (
978 cl_command_queue queue, cl_uint num_svm_pointers,
979 void *svm_pointers[], void *user_data),
980 void *user_data,
981 cl_uint num_events_in_wait_list,
982 const cl_event *event_wait_list,
983 cl_event *event,
984 cl_int cmd) try {
985
986 if (bool(num_svm_pointers) != bool(svm_pointers))
987 return CL_INVALID_VALUE;
988
989 auto &q = obj(d_q);
990
991 if (!q.device().svm_support())
992 return CL_INVALID_OPERATION;
993
994 bool can_emulate = q.device().has_system_svm();
995 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
996
997 validate_common(q, deps);
998
999 std::vector<void *> svm_pointers_cpy(svm_pointers,
1000 svm_pointers + num_svm_pointers);
1001 if (!pfn_free_func) {
1002 if (!can_emulate) {
1003 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1004 return CL_INVALID_VALUE;
1005 }
1006 pfn_free_func = [](cl_command_queue d_q, cl_uint num_svm_pointers,
1007 void *svm_pointers[], void *) {
1008 clover::context &ctx = obj(d_q).context();
1009 for (void *p : range(svm_pointers, num_svm_pointers)) {
1010 ctx.remove_svm_allocation(p);
1011 free(p);
1012 }
1013 };
1014 }
1015
1016 auto hev = create<hard_event>(q, cmd, deps,
1017 [=](clover::event &) mutable {
1018 pfn_free_func(d_q, num_svm_pointers, svm_pointers_cpy.data(),
1019 user_data);
1020 });
1021
1022 ret_object(event, hev);
1023 return CL_SUCCESS;
1024
1025 } catch (error &e) {
1026 return e.get();
1027 }
1028
1029 CLOVER_API cl_int
clEnqueueSVMFree(cl_command_queue d_q,cl_uint num_svm_pointers,void * svm_pointers[],void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue,cl_uint num_svm_pointers,void * svm_pointers[],void * user_data),void * user_data,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1030 clEnqueueSVMFree(cl_command_queue d_q,
1031 cl_uint num_svm_pointers,
1032 void *svm_pointers[],
1033 void (CL_CALLBACK *pfn_free_func) (
1034 cl_command_queue queue, cl_uint num_svm_pointers,
1035 void *svm_pointers[], void *user_data),
1036 void *user_data,
1037 cl_uint num_events_in_wait_list,
1038 const cl_event *event_wait_list,
1039 cl_event *event) {
1040
1041 return EnqueueSVMFree(d_q, num_svm_pointers, svm_pointers,
1042 pfn_free_func, user_data, num_events_in_wait_list,
1043 event_wait_list, event, CL_COMMAND_SVM_FREE);
1044 }
1045
1046 cl_int
EnqueueSVMMemcpy(cl_command_queue d_q,cl_bool blocking_copy,void * dst_ptr,const void * src_ptr,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)1047 clover::EnqueueSVMMemcpy(cl_command_queue d_q,
1048 cl_bool blocking_copy,
1049 void *dst_ptr,
1050 const void *src_ptr,
1051 size_t size,
1052 cl_uint num_events_in_wait_list,
1053 const cl_event *event_wait_list,
1054 cl_event *event,
1055 cl_int cmd) try {
1056 auto &q = obj(d_q);
1057
1058 if (!q.device().svm_support())
1059 return CL_INVALID_OPERATION;
1060
1061 if (dst_ptr == nullptr || src_ptr == nullptr)
1062 return CL_INVALID_VALUE;
1063
1064 if (static_cast<size_t>(abs(reinterpret_cast<ptrdiff_t>(dst_ptr) -
1065 reinterpret_cast<ptrdiff_t>(src_ptr))) < size)
1066 return CL_MEM_COPY_OVERLAP;
1067
1068
1069 bool can_emulate = q.device().has_system_svm();
1070 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1071
1072 validate_common(q, deps);
1073
1074 if (can_emulate) {
1075 auto hev = create<hard_event>(q, cmd, deps,
1076 [=](clover::event &) {
1077 memcpy(dst_ptr, src_ptr, size);
1078 });
1079
1080 if (blocking_copy)
1081 hev().wait();
1082 ret_object(event, hev);
1083 return CL_SUCCESS;
1084 }
1085
1086 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1087 return CL_INVALID_VALUE;
1088
1089 } catch (error &e) {
1090 return e.get();
1091 }
1092
1093 CLOVER_API cl_int
clEnqueueSVMMemcpy(cl_command_queue d_q,cl_bool blocking_copy,void * dst_ptr,const void * src_ptr,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1094 clEnqueueSVMMemcpy(cl_command_queue d_q,
1095 cl_bool blocking_copy,
1096 void *dst_ptr,
1097 const void *src_ptr,
1098 size_t size,
1099 cl_uint num_events_in_wait_list,
1100 const cl_event *event_wait_list,
1101 cl_event *event) {
1102
1103 return EnqueueSVMMemcpy(d_q, blocking_copy, dst_ptr, src_ptr,
1104 size, num_events_in_wait_list, event_wait_list,
1105 event, CL_COMMAND_SVM_MEMCPY);
1106 }
1107
1108 cl_int
EnqueueSVMMemFill(cl_command_queue d_q,void * svm_ptr,const void * pattern,size_t pattern_size,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)1109 clover::EnqueueSVMMemFill(cl_command_queue d_q,
1110 void *svm_ptr,
1111 const void *pattern,
1112 size_t pattern_size,
1113 size_t size,
1114 cl_uint num_events_in_wait_list,
1115 const cl_event *event_wait_list,
1116 cl_event *event,
1117 cl_int cmd) try {
1118 auto &q = obj(d_q);
1119
1120 if (!q.device().svm_support())
1121 return CL_INVALID_OPERATION;
1122
1123 if (svm_ptr == nullptr || pattern == nullptr ||
1124 !util_is_power_of_two_nonzero(pattern_size) ||
1125 pattern_size > 128 ||
1126 !ptr_is_aligned(svm_ptr, pattern_size) ||
1127 size % pattern_size)
1128 return CL_INVALID_VALUE;
1129
1130 bool can_emulate = q.device().has_system_svm();
1131 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1132
1133 validate_common(q, deps);
1134
1135 if (can_emulate) {
1136 auto hev = create<hard_event>(q, cmd, deps,
1137 [=](clover::event &) {
1138 void *ptr = svm_ptr;
1139 for (size_t s = size; s; s -= pattern_size) {
1140 memcpy(ptr, pattern, pattern_size);
1141 ptr = static_cast<uint8_t*>(ptr) + pattern_size;
1142 }
1143 });
1144
1145 ret_object(event, hev);
1146 return CL_SUCCESS;
1147 }
1148
1149 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1150 return CL_INVALID_VALUE;
1151
1152 } catch (error &e) {
1153 return e.get();
1154 }
1155
1156 CLOVER_API cl_int
clEnqueueSVMMemFill(cl_command_queue d_q,void * svm_ptr,const void * pattern,size_t pattern_size,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1157 clEnqueueSVMMemFill(cl_command_queue d_q,
1158 void *svm_ptr,
1159 const void *pattern,
1160 size_t pattern_size,
1161 size_t size,
1162 cl_uint num_events_in_wait_list,
1163 const cl_event *event_wait_list,
1164 cl_event *event) {
1165
1166 return EnqueueSVMMemFill(d_q, svm_ptr, pattern, pattern_size,
1167 size, num_events_in_wait_list, event_wait_list,
1168 event, CL_COMMAND_SVM_MEMFILL);
1169 }
1170
1171 cl_int
EnqueueSVMMap(cl_command_queue d_q,cl_bool blocking_map,cl_map_flags map_flags,void * svm_ptr,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)1172 clover::EnqueueSVMMap(cl_command_queue d_q,
1173 cl_bool blocking_map,
1174 cl_map_flags map_flags,
1175 void *svm_ptr,
1176 size_t size,
1177 cl_uint num_events_in_wait_list,
1178 const cl_event *event_wait_list,
1179 cl_event *event,
1180 cl_int cmd) try {
1181 auto &q = obj(d_q);
1182
1183 if (!q.device().svm_support())
1184 return CL_INVALID_OPERATION;
1185
1186 if (svm_ptr == nullptr || size == 0)
1187 return CL_INVALID_VALUE;
1188
1189 bool can_emulate = q.device().has_system_svm();
1190 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1191
1192 validate_common(q, deps);
1193
1194 if (can_emulate) {
1195 auto hev = create<hard_event>(q, cmd, deps,
1196 [](clover::event &) { });
1197
1198 ret_object(event, hev);
1199 return CL_SUCCESS;
1200 }
1201
1202 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1203 return CL_INVALID_VALUE;
1204
1205 } catch (error &e) {
1206 return e.get();
1207 }
1208
1209 CLOVER_API cl_int
clEnqueueSVMMap(cl_command_queue d_q,cl_bool blocking_map,cl_map_flags map_flags,void * svm_ptr,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1210 clEnqueueSVMMap(cl_command_queue d_q,
1211 cl_bool blocking_map,
1212 cl_map_flags map_flags,
1213 void *svm_ptr,
1214 size_t size,
1215 cl_uint num_events_in_wait_list,
1216 const cl_event *event_wait_list,
1217 cl_event *event) {
1218
1219 return EnqueueSVMMap(d_q, blocking_map, map_flags, svm_ptr, size,
1220 num_events_in_wait_list, event_wait_list, event,
1221 CL_COMMAND_SVM_MAP);
1222 }
1223
1224 cl_int
EnqueueSVMUnmap(cl_command_queue d_q,void * svm_ptr,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)1225 clover::EnqueueSVMUnmap(cl_command_queue d_q,
1226 void *svm_ptr,
1227 cl_uint num_events_in_wait_list,
1228 const cl_event *event_wait_list,
1229 cl_event *event,
1230 cl_int cmd) try {
1231 auto &q = obj(d_q);
1232
1233 if (!q.device().svm_support())
1234 return CL_INVALID_OPERATION;
1235
1236 if (svm_ptr == nullptr)
1237 return CL_INVALID_VALUE;
1238
1239 bool can_emulate = q.device().has_system_svm();
1240 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1241
1242 validate_common(q, deps);
1243
1244 if (can_emulate) {
1245 auto hev = create<hard_event>(q, cmd, deps,
1246 [](clover::event &) { });
1247
1248 ret_object(event, hev);
1249 return CL_SUCCESS;
1250 }
1251
1252 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1253 return CL_INVALID_VALUE;
1254
1255 } catch (error &e) {
1256 return e.get();
1257 }
1258
1259 CLOVER_API cl_int
clEnqueueSVMUnmap(cl_command_queue d_q,void * svm_ptr,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1260 clEnqueueSVMUnmap(cl_command_queue d_q,
1261 void *svm_ptr,
1262 cl_uint num_events_in_wait_list,
1263 const cl_event *event_wait_list,
1264 cl_event *event) {
1265
1266 return EnqueueSVMUnmap(d_q, svm_ptr, num_events_in_wait_list,
1267 event_wait_list, event, CL_COMMAND_SVM_UNMAP);
1268 }
1269
1270 CLOVER_API cl_int
clEnqueueSVMMigrateMem(cl_command_queue d_q,cl_uint num_svm_pointers,const void ** svm_pointers,const size_t * sizes,const cl_mem_migration_flags flags,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)1271 clEnqueueSVMMigrateMem(cl_command_queue d_q,
1272 cl_uint num_svm_pointers,
1273 const void **svm_pointers,
1274 const size_t *sizes,
1275 const cl_mem_migration_flags flags,
1276 cl_uint num_deps,
1277 const cl_event *d_deps,
1278 cl_event *rd_ev) try {
1279 auto &q = obj(d_q);
1280 auto deps = objs<wait_list_tag>(d_deps, num_deps);
1281
1282 validate_common(q, deps);
1283 validate_mem_migration_flags(flags);
1284
1285 if (!q.device().svm_support())
1286 return CL_INVALID_OPERATION;
1287
1288 if (!num_svm_pointers || !svm_pointers)
1289 return CL_INVALID_VALUE;
1290
1291 std::vector<size_t> sizes_copy(num_svm_pointers);
1292 std::vector<const void*> ptrs(num_svm_pointers);
1293
1294 for (unsigned i = 0; i < num_svm_pointers; ++i) {
1295 const void *ptr = svm_pointers[i];
1296 size_t size = sizes ? sizes[i] : 0;
1297 if (!ptr)
1298 return CL_INVALID_VALUE;
1299
1300 auto p = q.context().find_svm_allocation(ptr);
1301 if (!p.first)
1302 return CL_INVALID_VALUE;
1303
1304 std::ptrdiff_t pdiff = (uint8_t*)ptr - (uint8_t*)p.first;
1305 if (size && size + pdiff > p.second)
1306 return CL_INVALID_VALUE;
1307
1308 sizes_copy[i] = size ? size : p.second;
1309 ptrs[i] = size ? svm_pointers[i] : p.first;
1310 }
1311
1312 auto hev = create<hard_event>(
1313 q, CL_COMMAND_MIGRATE_MEM_OBJECTS, deps,
1314 [=, &q](event &) {
1315 q.svm_migrate(ptrs, sizes_copy, flags);
1316 });
1317
1318 ret_object(rd_ev, hev);
1319 return CL_SUCCESS;
1320
1321 } catch (error &e) {
1322 return e.get();
1323 }
1324