1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 //
22
23 #include <cstring>
24
25 #include "util/bitscan.h"
26
27 #include "api/dispatch.hpp"
28 #include "api/util.hpp"
29 #include "core/event.hpp"
30 #include "core/memory.hpp"
31
32 using namespace clover;
33
34 namespace {
35 typedef resource::vector vector_t;
36
37 vector_t
vector(const size_t * p)38 vector(const size_t *p) {
39 if (!p)
40 throw error(CL_INVALID_VALUE);
41 return range(p, 3);
42 }
43
44 vector_t
pitch(const vector_t & region,vector_t pitch)45 pitch(const vector_t ®ion, vector_t pitch) {
46 for (auto x : zip(tail(pitch),
47 map(multiplies(), region, pitch))) {
48 // The spec defines a value of zero as the natural pitch,
49 // i.e. the unaligned size of the previous dimension.
50 if (std::get<0>(x) == 0)
51 std::get<0>(x) = std::get<1>(x);
52 }
53
54 return pitch;
55 }
56
57 ///
58 /// Size of a region in bytes.
59 ///
60 size_t
size(const vector_t & pitch,const vector_t & region)61 size(const vector_t &pitch, const vector_t ®ion) {
62 if (any_of(is_zero(), region))
63 return 0;
64 else
65 return dot(pitch, region - vector_t{ 0, 1, 1 });
66 }
67
68 ///
69 /// Common argument checking shared by memory transfer commands.
70 ///
71 void
validate_common(command_queue & q,const ref_vector<event> & deps)72 validate_common(command_queue &q,
73 const ref_vector<event> &deps) {
74 if (any_of([&](const event &ev) {
75 return ev.context() != q.context();
76 }, deps))
77 throw error(CL_INVALID_CONTEXT);
78 }
79
80 ///
81 /// Common error checking for a buffer object argument.
82 ///
83 void
validate_object(command_queue & q,buffer & mem,const vector_t & origin,const vector_t & pitch,const vector_t & region)84 validate_object(command_queue &q, buffer &mem, const vector_t &origin,
85 const vector_t &pitch, const vector_t ®ion) {
86 if (mem.context() != q.context())
87 throw error(CL_INVALID_CONTEXT);
88
89 // The region must fit within the specified pitch,
90 if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
91 throw error(CL_INVALID_VALUE);
92
93 // ...and within the specified object.
94 if (dot(pitch, origin) + size(pitch, region) > mem.size())
95 throw error(CL_INVALID_VALUE);
96
97 if (any_of(is_zero(), region))
98 throw error(CL_INVALID_VALUE);
99 }
100
101 ///
102 /// Common error checking for an image argument.
103 ///
104 void
validate_object(command_queue & q,image & img,const vector_t & orig,const vector_t & region)105 validate_object(command_queue &q, image &img,
106 const vector_t &orig, const vector_t ®ion) {
107 vector_t size = { img.width(), img.height(), img.depth() };
108 const auto &dev = q.device();
109
110 if (!dev.image_support())
111 throw error(CL_INVALID_OPERATION);
112
113 if (img.context() != q.context())
114 throw error(CL_INVALID_CONTEXT);
115
116 if (any_of(greater(), orig + region, size))
117 throw error(CL_INVALID_VALUE);
118
119 if (any_of(is_zero(), region))
120 throw error(CL_INVALID_VALUE);
121
122 switch (img.type()) {
123 case CL_MEM_OBJECT_IMAGE1D: {
124 const size_t max = dev.max_image_size();
125 if (img.width() > max)
126 throw error(CL_INVALID_IMAGE_SIZE);
127 break;
128 }
129 case CL_MEM_OBJECT_IMAGE2D: {
130 const size_t max = dev.max_image_size();
131 if (img.width() > max || img.height() > max)
132 throw error(CL_INVALID_IMAGE_SIZE);
133 break;
134 }
135 case CL_MEM_OBJECT_IMAGE3D: {
136 const size_t max = dev.max_image_size_3d();
137 if (img.width() > max || img.height() > max || img.depth() > max)
138 throw error(CL_INVALID_IMAGE_SIZE);
139 break;
140 }
141 // XXX: Implement missing checks once Clover supports more image types.
142 default:
143 throw error(CL_INVALID_IMAGE_SIZE);
144 }
145 }
146
147 ///
148 /// Common error checking for a host pointer argument.
149 ///
150 void
validate_object(command_queue & q,const void * ptr,const vector_t & orig,const vector_t & pitch,const vector_t & region)151 validate_object(command_queue &q, const void *ptr, const vector_t &orig,
152 const vector_t &pitch, const vector_t ®ion) {
153 if (!ptr)
154 throw error(CL_INVALID_VALUE);
155
156 // The region must fit within the specified pitch.
157 if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
158 throw error(CL_INVALID_VALUE);
159 }
160
161 ///
162 /// Common argument checking for a copy between two buffer objects.
163 ///
164 void
validate_copy(command_queue & q,buffer & dst_mem,const vector_t & dst_orig,const vector_t & dst_pitch,buffer & src_mem,const vector_t & src_orig,const vector_t & src_pitch,const vector_t & region)165 validate_copy(command_queue &q, buffer &dst_mem,
166 const vector_t &dst_orig, const vector_t &dst_pitch,
167 buffer &src_mem,
168 const vector_t &src_orig, const vector_t &src_pitch,
169 const vector_t ®ion) {
170 if (dst_mem == src_mem) {
171 auto dst_offset = dot(dst_pitch, dst_orig);
172 auto src_offset = dot(src_pitch, src_orig);
173
174 if (interval_overlaps()(
175 dst_offset, dst_offset + size(dst_pitch, region),
176 src_offset, src_offset + size(src_pitch, region)))
177 throw error(CL_MEM_COPY_OVERLAP);
178 }
179 }
180
181 ///
182 /// Common argument checking for a copy between two image objects.
183 ///
184 void
validate_copy(command_queue & q,image & dst_img,const vector_t & dst_orig,image & src_img,const vector_t & src_orig,const vector_t & region)185 validate_copy(command_queue &q,
186 image &dst_img, const vector_t &dst_orig,
187 image &src_img, const vector_t &src_orig,
188 const vector_t ®ion) {
189 if (dst_img.format() != src_img.format())
190 throw error(CL_IMAGE_FORMAT_MISMATCH);
191
192 if (dst_img == src_img) {
193 if (all_of(interval_overlaps(),
194 dst_orig, dst_orig + region,
195 src_orig, src_orig + region))
196 throw error(CL_MEM_COPY_OVERLAP);
197 }
198 }
199
200 ///
201 /// Checks that the host access flags of the memory object are
202 /// within the allowed set \a flags.
203 ///
204 void
validate_object_access(const memory_obj & mem,const cl_mem_flags flags)205 validate_object_access(const memory_obj &mem, const cl_mem_flags flags) {
206 if (mem.flags() & ~flags &
207 (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY |
208 CL_MEM_HOST_NO_ACCESS))
209 throw error(CL_INVALID_OPERATION);
210 }
211
212 ///
213 /// Checks that the mapping flags are correct.
214 ///
215 void
validate_map_flags(const memory_obj & mem,const cl_map_flags flags)216 validate_map_flags(const memory_obj &mem, const cl_map_flags flags) {
217 if ((flags & (CL_MAP_WRITE | CL_MAP_READ)) &&
218 (flags & CL_MAP_WRITE_INVALIDATE_REGION))
219 throw error(CL_INVALID_VALUE);
220
221 if (flags & CL_MAP_READ)
222 validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
223
224 if (flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
225 validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
226 }
227
228 ///
229 /// Checks that the memory migration flags are correct.
230 ///
231 void
validate_mem_migration_flags(const cl_mem_migration_flags flags)232 validate_mem_migration_flags(const cl_mem_migration_flags flags) {
233 const cl_mem_migration_flags valid =
234 CL_MIGRATE_MEM_OBJECT_HOST |
235 CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED;
236
237 if (flags & ~valid)
238 throw error(CL_INVALID_VALUE);
239 }
240
241 ///
242 /// Class that encapsulates the task of mapping an object of type
243 /// \a T. The return value of get() should be implicitly
244 /// convertible to \a void *.
245 ///
246 template<typename T>
247 struct _map;
248
249 template<>
250 struct _map<image*> {
_map__anona72fcd3b0111::_map251 _map(command_queue &q, image *img, cl_map_flags flags,
252 vector_t offset, vector_t pitch, vector_t region) :
253 map(q, img->resource_in(q), flags, true, offset, region),
254 pitch(map.pitch())
255 { }
256
257 template<typename T>
operator T*__anona72fcd3b0111::_map258 operator T *() const {
259 return static_cast<T *>(map);
260 }
261
262 mapping map;
263 vector_t pitch;
264 };
265
266 template<>
267 struct _map<buffer*> {
_map__anona72fcd3b0111::_map268 _map(command_queue &q, buffer *mem, cl_map_flags flags,
269 vector_t offset, vector_t pitch, vector_t region) :
270 map(q, mem->resource_in(q), flags, true,
271 {{ dot(pitch, offset) }}, {{ size(pitch, region) }}),
272 pitch(pitch)
273 { }
274
275 template<typename T>
operator T*__anona72fcd3b0111::_map276 operator T *() const {
277 return static_cast<T *>(map);
278 }
279
280 mapping map;
281 vector_t pitch;
282 };
283
284 template<typename P>
285 struct _map<P *> {
_map__anona72fcd3b0111::_map286 _map(command_queue &q, P *ptr, cl_map_flags flags,
287 vector_t offset, vector_t pitch, vector_t region) :
288 ptr((P *)((char *)ptr + dot(pitch, offset))), pitch(pitch)
289 { }
290
291 template<typename T>
operator T*__anona72fcd3b0111::_map292 operator T *() const {
293 return static_cast<T *>(ptr);
294 }
295
296 P *ptr;
297 vector_t pitch;
298 };
299
300 ///
301 /// Software copy from \a src_obj to \a dst_obj. They can be
302 /// either pointers or memory objects.
303 ///
304 template<typename T, typename S>
305 std::function<void (event &)>
soft_copy_op(command_queue & q,T dst_obj,const vector_t & dst_orig,const vector_t & dst_pitch,S src_obj,const vector_t & src_orig,const vector_t & src_pitch,const vector_t & region)306 soft_copy_op(command_queue &q,
307 T dst_obj, const vector_t &dst_orig, const vector_t &dst_pitch,
308 S src_obj, const vector_t &src_orig, const vector_t &src_pitch,
309 const vector_t ®ion) {
310 return [=, &q](event &) {
311 _map<T> dst = { q, dst_obj, CL_MAP_WRITE,
312 dst_orig, dst_pitch, region };
313 _map<S> src = { q, src_obj, CL_MAP_READ,
314 src_orig, src_pitch, region };
315 assert(src.pitch[0] == dst.pitch[0]);
316 vector_t v = {};
317
318 for (v[2] = 0; v[2] < region[2]; ++v[2]) {
319 for (v[1] = 0; v[1] < region[1]; ++v[1]) {
320 std::memcpy(
321 static_cast<char *>(dst) + dot(dst.pitch, v),
322 static_cast<const char *>(src) + dot(src.pitch, v),
323 src.pitch[0] * region[0]);
324 }
325 }
326 };
327 }
328
329 ///
330 /// Hardware copy from \a src_obj to \a dst_obj.
331 ///
332 template<typename T, typename S>
333 std::function<void (event &)>
hard_copy_op(command_queue & q,T dst_obj,const vector_t & dst_orig,S src_obj,const vector_t & src_orig,const vector_t & region)334 hard_copy_op(command_queue &q, T dst_obj, const vector_t &dst_orig,
335 S src_obj, const vector_t &src_orig, const vector_t ®ion) {
336 return [=, &q](event &) {
337 dst_obj->resource_in(q).copy(q, dst_orig, region,
338 src_obj->resource_in(q), src_orig);
339 };
340 }
341 }
342
343 CLOVER_API cl_int
clEnqueueReadBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,size_t offset,size_t size,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)344 clEnqueueReadBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
345 size_t offset, size_t size, void *ptr,
346 cl_uint num_deps, const cl_event *d_deps,
347 cl_event *rd_ev) try {
348 auto &q = obj(d_q);
349 auto &mem = obj<buffer>(d_mem);
350 auto deps = objs<wait_list_tag>(d_deps, num_deps);
351 vector_t region = { size, 1, 1 };
352 vector_t obj_origin = { offset };
353 auto obj_pitch = pitch(region, {{ 1 }});
354
355 validate_common(q, deps);
356 validate_object(q, ptr, {}, obj_pitch, region);
357 validate_object(q, mem, obj_origin, obj_pitch, region);
358 validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
359
360 auto hev = create<hard_event>(
361 q, CL_COMMAND_READ_BUFFER, deps,
362 soft_copy_op(q, ptr, {}, obj_pitch,
363 &mem, obj_origin, obj_pitch,
364 region));
365
366 if (blocking)
367 hev().wait_signalled();
368
369 ret_object(rd_ev, hev);
370 return CL_SUCCESS;
371
372 } catch (error &e) {
373 return e.get();
374 }
375
376 CLOVER_API cl_int
clEnqueueWriteBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,size_t offset,size_t size,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)377 clEnqueueWriteBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
378 size_t offset, size_t size, const void *ptr,
379 cl_uint num_deps, const cl_event *d_deps,
380 cl_event *rd_ev) try {
381 auto &q = obj(d_q);
382 auto &mem = obj<buffer>(d_mem);
383 auto deps = objs<wait_list_tag>(d_deps, num_deps);
384 vector_t region = { size, 1, 1 };
385 vector_t obj_origin = { offset };
386 auto obj_pitch = pitch(region, {{ 1 }});
387
388 validate_common(q, deps);
389 validate_object(q, mem, obj_origin, obj_pitch, region);
390 validate_object(q, ptr, {}, obj_pitch, region);
391 validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
392
393 auto hev = create<hard_event>(
394 q, CL_COMMAND_WRITE_BUFFER, deps,
395 soft_copy_op(q, &mem, obj_origin, obj_pitch,
396 ptr, {}, obj_pitch,
397 region));
398
399 if (blocking)
400 hev().wait_signalled();
401
402 ret_object(rd_ev, hev);
403 return CL_SUCCESS;
404
405 } catch (error &e) {
406 return e.get();
407 }
408
409 CLOVER_API cl_int
clEnqueueReadBufferRect(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_obj_origin,const size_t * p_host_origin,const size_t * p_region,size_t obj_row_pitch,size_t obj_slice_pitch,size_t host_row_pitch,size_t host_slice_pitch,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)410 clEnqueueReadBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
411 const size_t *p_obj_origin,
412 const size_t *p_host_origin,
413 const size_t *p_region,
414 size_t obj_row_pitch, size_t obj_slice_pitch,
415 size_t host_row_pitch, size_t host_slice_pitch,
416 void *ptr,
417 cl_uint num_deps, const cl_event *d_deps,
418 cl_event *rd_ev) try {
419 auto &q = obj(d_q);
420 auto &mem = obj<buffer>(d_mem);
421 auto deps = objs<wait_list_tag>(d_deps, num_deps);
422 auto region = vector(p_region);
423 auto obj_origin = vector(p_obj_origin);
424 auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
425 auto host_origin = vector(p_host_origin);
426 auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
427
428 validate_common(q, deps);
429 validate_object(q, ptr, host_origin, host_pitch, region);
430 validate_object(q, mem, obj_origin, obj_pitch, region);
431 validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
432
433 auto hev = create<hard_event>(
434 q, CL_COMMAND_READ_BUFFER_RECT, deps,
435 soft_copy_op(q, ptr, host_origin, host_pitch,
436 &mem, obj_origin, obj_pitch,
437 region));
438
439 if (blocking)
440 hev().wait_signalled();
441
442 ret_object(rd_ev, hev);
443 return CL_SUCCESS;
444
445 } catch (error &e) {
446 return e.get();
447 }
448
449 CLOVER_API cl_int
clEnqueueWriteBufferRect(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_obj_origin,const size_t * p_host_origin,const size_t * p_region,size_t obj_row_pitch,size_t obj_slice_pitch,size_t host_row_pitch,size_t host_slice_pitch,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)450 clEnqueueWriteBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
451 const size_t *p_obj_origin,
452 const size_t *p_host_origin,
453 const size_t *p_region,
454 size_t obj_row_pitch, size_t obj_slice_pitch,
455 size_t host_row_pitch, size_t host_slice_pitch,
456 const void *ptr,
457 cl_uint num_deps, const cl_event *d_deps,
458 cl_event *rd_ev) try {
459 auto &q = obj(d_q);
460 auto &mem = obj<buffer>(d_mem);
461 auto deps = objs<wait_list_tag>(d_deps, num_deps);
462 auto region = vector(p_region);
463 auto obj_origin = vector(p_obj_origin);
464 auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
465 auto host_origin = vector(p_host_origin);
466 auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
467
468 validate_common(q, deps);
469 validate_object(q, mem, obj_origin, obj_pitch, region);
470 validate_object(q, ptr, host_origin, host_pitch, region);
471 validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
472
473 auto hev = create<hard_event>(
474 q, CL_COMMAND_WRITE_BUFFER_RECT, deps,
475 soft_copy_op(q, &mem, obj_origin, obj_pitch,
476 ptr, host_origin, host_pitch,
477 region));
478
479 if (blocking)
480 hev().wait_signalled();
481
482 ret_object(rd_ev, hev);
483 return CL_SUCCESS;
484
485 } catch (error &e) {
486 return e.get();
487 }
488
489 CLOVER_API cl_int
clEnqueueFillBuffer(cl_command_queue d_queue,cl_mem d_mem,const void * pattern,size_t pattern_size,size_t offset,size_t size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)490 clEnqueueFillBuffer(cl_command_queue d_queue, cl_mem d_mem,
491 const void *pattern, size_t pattern_size,
492 size_t offset, size_t size,
493 cl_uint num_deps, const cl_event *d_deps,
494 cl_event *rd_ev) try {
495 auto &q = obj(d_queue);
496 auto &mem = obj<buffer>(d_mem);
497 auto deps = objs<wait_list_tag>(d_deps, num_deps);
498 vector_t region = { size, 1, 1 };
499 vector_t origin = { offset };
500 auto dst_pitch = pitch(region, {{ 1 }});
501
502 validate_common(q, deps);
503 validate_object(q, mem, origin, dst_pitch, region);
504
505 if (!pattern)
506 return CL_INVALID_VALUE;
507
508 if (!util_is_power_of_two_nonzero(pattern_size) ||
509 pattern_size > 128 || size % pattern_size
510 || offset % pattern_size) {
511 return CL_INVALID_VALUE;
512 }
513
514 auto sub = dynamic_cast<sub_buffer *>(&mem);
515 if (sub && sub->offset() % q.device().mem_base_addr_align()) {
516 return CL_MISALIGNED_SUB_BUFFER_OFFSET;
517 }
518
519 std::string data = std::string((char *)pattern, pattern_size);
520 auto hev = create<hard_event>(
521 q, CL_COMMAND_FILL_BUFFER, deps,
522 [=, &q, &mem](event &) {
523 mem.resource_in(q).clear(q, origin, region, data);
524 });
525
526 ret_object(rd_ev, hev);
527 return CL_SUCCESS;
528
529 } catch (error &e) {
530 return e.get();
531 }
532
533 CLOVER_API cl_int
clEnqueueCopyBuffer(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,size_t src_offset,size_t dst_offset,size_t size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)534 clEnqueueCopyBuffer(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
535 size_t src_offset, size_t dst_offset, size_t size,
536 cl_uint num_deps, const cl_event *d_deps,
537 cl_event *rd_ev) try {
538 auto &q = obj(d_q);
539 auto &src_mem = obj<buffer>(d_src_mem);
540 auto &dst_mem = obj<buffer>(d_dst_mem);
541 auto deps = objs<wait_list_tag>(d_deps, num_deps);
542 vector_t region = { size, 1, 1 };
543 vector_t dst_origin = { dst_offset };
544 auto dst_pitch = pitch(region, {{ 1 }});
545 vector_t src_origin = { src_offset };
546 auto src_pitch = pitch(region, {{ 1 }});
547
548 validate_common(q, deps);
549 validate_object(q, dst_mem, dst_origin, dst_pitch, region);
550 validate_object(q, src_mem, src_origin, src_pitch, region);
551 validate_copy(q, dst_mem, dst_origin, dst_pitch,
552 src_mem, src_origin, src_pitch, region);
553
554 auto hev = create<hard_event>(
555 q, CL_COMMAND_COPY_BUFFER, deps,
556 hard_copy_op(q, &dst_mem, dst_origin,
557 &src_mem, src_origin, region));
558
559 ret_object(rd_ev, hev);
560 return CL_SUCCESS;
561
562 } catch (error &e) {
563 return e.get();
564 }
565
566 CLOVER_API cl_int
clEnqueueCopyBufferRect(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_dst_origin,const size_t * p_region,size_t src_row_pitch,size_t src_slice_pitch,size_t dst_row_pitch,size_t dst_slice_pitch,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)567 clEnqueueCopyBufferRect(cl_command_queue d_q, cl_mem d_src_mem,
568 cl_mem d_dst_mem,
569 const size_t *p_src_origin, const size_t *p_dst_origin,
570 const size_t *p_region,
571 size_t src_row_pitch, size_t src_slice_pitch,
572 size_t dst_row_pitch, size_t dst_slice_pitch,
573 cl_uint num_deps, const cl_event *d_deps,
574 cl_event *rd_ev) try {
575 auto &q = obj(d_q);
576 auto &src_mem = obj<buffer>(d_src_mem);
577 auto &dst_mem = obj<buffer>(d_dst_mem);
578 auto deps = objs<wait_list_tag>(d_deps, num_deps);
579 auto region = vector(p_region);
580 auto dst_origin = vector(p_dst_origin);
581 auto dst_pitch = pitch(region, {{ 1, dst_row_pitch, dst_slice_pitch }});
582 auto src_origin = vector(p_src_origin);
583 auto src_pitch = pitch(region, {{ 1, src_row_pitch, src_slice_pitch }});
584
585 validate_common(q, deps);
586 validate_object(q, dst_mem, dst_origin, dst_pitch, region);
587 validate_object(q, src_mem, src_origin, src_pitch, region);
588 validate_copy(q, dst_mem, dst_origin, dst_pitch,
589 src_mem, src_origin, src_pitch, region);
590
591 auto hev = create<hard_event>(
592 q, CL_COMMAND_COPY_BUFFER_RECT, deps,
593 soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
594 &src_mem, src_origin, src_pitch,
595 region));
596
597 ret_object(rd_ev, hev);
598 return CL_SUCCESS;
599
600 } catch (error &e) {
601 return e.get();
602 }
603
604 CLOVER_API cl_int
clEnqueueReadImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_origin,const size_t * p_region,size_t row_pitch,size_t slice_pitch,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)605 clEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
606 const size_t *p_origin, const size_t *p_region,
607 size_t row_pitch, size_t slice_pitch, void *ptr,
608 cl_uint num_deps, const cl_event *d_deps,
609 cl_event *rd_ev) try {
610 auto &q = obj(d_q);
611 auto &img = obj<image>(d_mem);
612 auto deps = objs<wait_list_tag>(d_deps, num_deps);
613 auto region = vector(p_region);
614 auto dst_pitch = pitch(region, {{ img.pixel_size(),
615 row_pitch, slice_pitch }});
616 auto src_origin = vector(p_origin);
617 auto src_pitch = pitch(region, {{ img.pixel_size(),
618 img.row_pitch(), img.slice_pitch() }});
619
620 validate_common(q, deps);
621 validate_object(q, ptr, {}, dst_pitch, region);
622 validate_object(q, img, src_origin, region);
623 validate_object_access(img, CL_MEM_HOST_READ_ONLY);
624
625 auto hev = create<hard_event>(
626 q, CL_COMMAND_READ_IMAGE, deps,
627 soft_copy_op(q, ptr, {}, dst_pitch,
628 &img, src_origin, src_pitch,
629 region));
630
631 if (blocking)
632 hev().wait_signalled();
633
634 ret_object(rd_ev, hev);
635 return CL_SUCCESS;
636
637 } catch (error &e) {
638 return e.get();
639 }
640
641 CLOVER_API cl_int
clEnqueueWriteImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_origin,const size_t * p_region,size_t row_pitch,size_t slice_pitch,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)642 clEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
643 const size_t *p_origin, const size_t *p_region,
644 size_t row_pitch, size_t slice_pitch, const void *ptr,
645 cl_uint num_deps, const cl_event *d_deps,
646 cl_event *rd_ev) try {
647 auto &q = obj(d_q);
648 auto &img = obj<image>(d_mem);
649 auto deps = objs<wait_list_tag>(d_deps, num_deps);
650 auto region = vector(p_region);
651 auto dst_origin = vector(p_origin);
652 auto dst_pitch = pitch(region, {{ img.pixel_size(),
653 img.row_pitch(), img.slice_pitch() }});
654 auto src_pitch = pitch(region, {{ img.pixel_size(),
655 row_pitch, slice_pitch }});
656
657 validate_common(q, deps);
658 validate_object(q, img, dst_origin, region);
659 validate_object(q, ptr, {}, src_pitch, region);
660 validate_object_access(img, CL_MEM_HOST_WRITE_ONLY);
661
662 auto hev = create<hard_event>(
663 q, CL_COMMAND_WRITE_IMAGE, deps,
664 soft_copy_op(q, &img, dst_origin, dst_pitch,
665 ptr, {}, src_pitch,
666 region));
667
668 if (blocking)
669 hev().wait_signalled();
670
671 ret_object(rd_ev, hev);
672 return CL_SUCCESS;
673
674 } catch (error &e) {
675 return e.get();
676 }
677
678 CLOVER_API cl_int
clEnqueueFillImage(cl_command_queue d_queue,cl_mem d_mem,const void * fill_color,const size_t * p_origin,const size_t * p_region,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)679 clEnqueueFillImage(cl_command_queue d_queue, cl_mem d_mem,
680 const void *fill_color,
681 const size_t *p_origin, const size_t *p_region,
682 cl_uint num_deps, const cl_event *d_deps,
683 cl_event *rd_ev) try {
684 auto &q = obj(d_queue);
685 auto &img = obj<image>(d_mem);
686 auto deps = objs<wait_list_tag>(d_deps, num_deps);
687 auto origin = vector(p_origin);
688 auto region = vector(p_region);
689
690 validate_common(q, deps);
691 validate_object(q, img, origin, region);
692
693 if (!fill_color)
694 return CL_INVALID_VALUE;
695
696 std::string data = std::string((char *)fill_color, sizeof(cl_uint4));
697 auto hev = create<hard_event>(
698 q, CL_COMMAND_FILL_IMAGE, deps,
699 [=, &q, &img](event &) {
700 img.resource_in(q).clear(q, origin, region, data);
701 });
702
703 ret_object(rd_ev, hev);
704 return CL_SUCCESS;
705
706 } catch (error &e) {
707 return e.get();
708 }
709
710 CLOVER_API cl_int
clEnqueueCopyImage(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_dst_origin,const size_t * p_region,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)711 clEnqueueCopyImage(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
712 const size_t *p_src_origin, const size_t *p_dst_origin,
713 const size_t *p_region,
714 cl_uint num_deps, const cl_event *d_deps,
715 cl_event *rd_ev) try {
716 auto &q = obj(d_q);
717 auto &src_img = obj<image>(d_src_mem);
718 auto &dst_img = obj<image>(d_dst_mem);
719 auto deps = objs<wait_list_tag>(d_deps, num_deps);
720 auto region = vector(p_region);
721 auto dst_origin = vector(p_dst_origin);
722 auto src_origin = vector(p_src_origin);
723
724 validate_common(q, deps);
725 validate_object(q, dst_img, dst_origin, region);
726 validate_object(q, src_img, src_origin, region);
727 validate_copy(q, dst_img, dst_origin, src_img, src_origin, region);
728
729 auto hev = create<hard_event>(
730 q, CL_COMMAND_COPY_IMAGE, deps,
731 hard_copy_op(q, &dst_img, dst_origin,
732 &src_img, src_origin,
733 region));
734
735 ret_object(rd_ev, hev);
736 return CL_SUCCESS;
737
738 } catch (error &e) {
739 return e.get();
740 }
741
742 CLOVER_API cl_int
clEnqueueCopyImageToBuffer(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_region,size_t dst_offset,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)743 clEnqueueCopyImageToBuffer(cl_command_queue d_q,
744 cl_mem d_src_mem, cl_mem d_dst_mem,
745 const size_t *p_src_origin, const size_t *p_region,
746 size_t dst_offset,
747 cl_uint num_deps, const cl_event *d_deps,
748 cl_event *rd_ev) try {
749 auto &q = obj(d_q);
750 auto &src_img = obj<image>(d_src_mem);
751 auto &dst_mem = obj<buffer>(d_dst_mem);
752 auto deps = objs<wait_list_tag>(d_deps, num_deps);
753 auto region = vector(p_region);
754 vector_t dst_origin = { dst_offset };
755 auto dst_pitch = pitch(region, {{ src_img.pixel_size() }});
756 auto src_origin = vector(p_src_origin);
757 auto src_pitch = pitch(region, {{ src_img.pixel_size(),
758 src_img.row_pitch(),
759 src_img.slice_pitch() }});
760
761 validate_common(q, deps);
762 validate_object(q, dst_mem, dst_origin, dst_pitch, region);
763 validate_object(q, src_img, src_origin, region);
764
765 auto hev = create<hard_event>(
766 q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, deps,
767 soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
768 &src_img, src_origin, src_pitch,
769 region));
770
771 ret_object(rd_ev, hev);
772 return CL_SUCCESS;
773
774 } catch (error &e) {
775 return e.get();
776 }
777
778 CLOVER_API cl_int
clEnqueueCopyBufferToImage(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,size_t src_offset,const size_t * p_dst_origin,const size_t * p_region,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)779 clEnqueueCopyBufferToImage(cl_command_queue d_q,
780 cl_mem d_src_mem, cl_mem d_dst_mem,
781 size_t src_offset,
782 const size_t *p_dst_origin, const size_t *p_region,
783 cl_uint num_deps, const cl_event *d_deps,
784 cl_event *rd_ev) try {
785 auto &q = obj(d_q);
786 auto &src_mem = obj<buffer>(d_src_mem);
787 auto &dst_img = obj<image>(d_dst_mem);
788 auto deps = objs<wait_list_tag>(d_deps, num_deps);
789 auto region = vector(p_region);
790 auto dst_origin = vector(p_dst_origin);
791 auto dst_pitch = pitch(region, {{ dst_img.pixel_size(),
792 dst_img.row_pitch(),
793 dst_img.slice_pitch() }});
794 vector_t src_origin = { src_offset };
795 auto src_pitch = pitch(region, {{ dst_img.pixel_size() }});
796
797 validate_common(q, deps);
798 validate_object(q, dst_img, dst_origin, region);
799 validate_object(q, src_mem, src_origin, src_pitch, region);
800
801 auto hev = create<hard_event>(
802 q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, deps,
803 soft_copy_op(q, &dst_img, dst_origin, dst_pitch,
804 &src_mem, src_origin, src_pitch,
805 region));
806
807 ret_object(rd_ev, hev);
808 return CL_SUCCESS;
809
810 } catch (error &e) {
811 return e.get();
812 }
813
814 CLOVER_API void *
clEnqueueMapBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,cl_map_flags flags,size_t offset,size_t size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev,cl_int * r_errcode)815 clEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
816 cl_map_flags flags, size_t offset, size_t size,
817 cl_uint num_deps, const cl_event *d_deps,
818 cl_event *rd_ev, cl_int *r_errcode) try {
819 auto &q = obj(d_q);
820 auto &mem = obj<buffer>(d_mem);
821 auto deps = objs<wait_list_tag>(d_deps, num_deps);
822 vector_t region = { size, 1, 1 };
823 vector_t obj_origin = { offset };
824 auto obj_pitch = pitch(region, {{ 1 }});
825
826 validate_common(q, deps);
827 validate_object(q, mem, obj_origin, obj_pitch, region);
828 validate_map_flags(mem, flags);
829
830 auto *map = mem.resource_in(q).add_map(q, flags, blocking, obj_origin, region);
831
832 auto hev = create<hard_event>(q, CL_COMMAND_MAP_BUFFER, deps);
833 if (blocking)
834 hev().wait_signalled();
835
836 ret_object(rd_ev, hev);
837 ret_error(r_errcode, CL_SUCCESS);
838 return *map;
839
840 } catch (error &e) {
841 ret_error(r_errcode, e);
842 return NULL;
843 }
844
845 CLOVER_API void *
clEnqueueMapImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,cl_map_flags flags,const size_t * p_origin,const size_t * p_region,size_t * row_pitch,size_t * slice_pitch,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev,cl_int * r_errcode)846 clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
847 cl_map_flags flags,
848 const size_t *p_origin, const size_t *p_region,
849 size_t *row_pitch, size_t *slice_pitch,
850 cl_uint num_deps, const cl_event *d_deps,
851 cl_event *rd_ev, cl_int *r_errcode) try {
852 auto &q = obj(d_q);
853 auto &img = obj<image>(d_mem);
854 auto deps = objs<wait_list_tag>(d_deps, num_deps);
855 auto region = vector(p_region);
856 auto origin = vector(p_origin);
857
858 validate_common(q, deps);
859 validate_object(q, img, origin, region);
860 validate_map_flags(img, flags);
861
862 if (!row_pitch)
863 throw error(CL_INVALID_VALUE);
864
865 if (img.slice_pitch() && !slice_pitch)
866 throw error(CL_INVALID_VALUE);
867
868 auto *map = img.resource_in(q).add_map(q, flags, blocking, origin, region);
869 *row_pitch = map->pitch()[1];
870 if (slice_pitch)
871 *slice_pitch = map->pitch()[2];
872
873 auto hev = create<hard_event>(q, CL_COMMAND_MAP_IMAGE, deps);
874 if (blocking)
875 hev().wait_signalled();
876
877 ret_object(rd_ev, hev);
878 ret_error(r_errcode, CL_SUCCESS);
879 return *map;
880
881 } catch (error &e) {
882 ret_error(r_errcode, e);
883 return NULL;
884 }
885
886 CLOVER_API cl_int
clEnqueueUnmapMemObject(cl_command_queue d_q,cl_mem d_mem,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)887 clEnqueueUnmapMemObject(cl_command_queue d_q, cl_mem d_mem, void *ptr,
888 cl_uint num_deps, const cl_event *d_deps,
889 cl_event *rd_ev) try {
890 auto &q = obj(d_q);
891 auto &mem = obj(d_mem);
892 auto deps = objs<wait_list_tag>(d_deps, num_deps);
893
894 validate_common(q, deps);
895
896 auto hev = create<hard_event>(
897 q, CL_COMMAND_UNMAP_MEM_OBJECT, deps,
898 [=, &q, &mem](event &) {
899 mem.resource_in(q).del_map(ptr);
900 });
901
902 ret_object(rd_ev, hev);
903 return CL_SUCCESS;
904
905 } catch (error &e) {
906 return e.get();
907 }
908
909 CLOVER_API cl_int
clEnqueueMigrateMemObjects(cl_command_queue d_q,cl_uint num_mems,const cl_mem * d_mems,cl_mem_migration_flags flags,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)910 clEnqueueMigrateMemObjects(cl_command_queue d_q,
911 cl_uint num_mems,
912 const cl_mem *d_mems,
913 cl_mem_migration_flags flags,
914 cl_uint num_deps,
915 const cl_event *d_deps,
916 cl_event *rd_ev) try {
917 auto &q = obj(d_q);
918 auto mems = objs<memory_obj>(d_mems, num_mems);
919 auto deps = objs<wait_list_tag>(d_deps, num_deps);
920
921 validate_common(q, deps);
922 validate_mem_migration_flags(flags);
923
924 if (any_of([&](const memory_obj &m) {
925 return m.context() != q.context();
926 }, mems))
927 throw error(CL_INVALID_CONTEXT);
928
929 auto hev = create<hard_event>(
930 q, CL_COMMAND_MIGRATE_MEM_OBJECTS, deps,
931 [=, &q](event &) {
932 for (auto &mem: mems) {
933 if (flags & CL_MIGRATE_MEM_OBJECT_HOST) {
934 if ((flags & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED))
935 mem.resource_out(q);
936
937 // For flags == CL_MIGRATE_MEM_OBJECT_HOST only to be
938 // efficient we would need cl*ReadBuffer* to implement
939 // reading from host memory.
940
941 } else {
942 if (flags & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED)
943 mem.resource_undef(q);
944 else
945 mem.resource_in(q);
946 }
947 }
948 });
949
950 ret_object(rd_ev, hev);
951 return CL_SUCCESS;;
952
953 } catch (error &e) {
954 return e.get();
955 }
956
957 cl_int
EnqueueSVMFree(cl_command_queue d_q,cl_uint num_svm_pointers,void * svm_pointers[],void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue,cl_uint num_svm_pointers,void * svm_pointers[],void * user_data),void * user_data,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)958 clover::EnqueueSVMFree(cl_command_queue d_q,
959 cl_uint num_svm_pointers,
960 void *svm_pointers[],
961 void (CL_CALLBACK *pfn_free_func) (
962 cl_command_queue queue, cl_uint num_svm_pointers,
963 void *svm_pointers[], void *user_data),
964 void *user_data,
965 cl_uint num_events_in_wait_list,
966 const cl_event *event_wait_list,
967 cl_event *event,
968 cl_int cmd) try {
969
970 if (bool(num_svm_pointers) != bool(svm_pointers))
971 return CL_INVALID_VALUE;
972
973 auto &q = obj(d_q);
974
975 if (!q.device().svm_support())
976 return CL_INVALID_OPERATION;
977
978 bool can_emulate = q.device().has_system_svm();
979 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
980
981 validate_common(q, deps);
982
983 std::vector<void *> svm_pointers_cpy(svm_pointers,
984 svm_pointers + num_svm_pointers);
985 if (!pfn_free_func) {
986 if (!can_emulate) {
987 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
988 return CL_INVALID_VALUE;
989 }
990 pfn_free_func = [](cl_command_queue d_q, cl_uint num_svm_pointers,
991 void *svm_pointers[], void *) {
992 clover::context &ctx = obj(d_q).context();
993 for (void *p : range(svm_pointers, num_svm_pointers)) {
994 ctx.remove_svm_allocation(p);
995 free(p);
996 }
997 };
998 }
999
1000 auto hev = create<hard_event>(q, cmd, deps,
1001 [=](clover::event &) mutable {
1002 pfn_free_func(d_q, num_svm_pointers, svm_pointers_cpy.data(),
1003 user_data);
1004 });
1005
1006 ret_object(event, hev);
1007 return CL_SUCCESS;
1008
1009 } catch (error &e) {
1010 return e.get();
1011 }
1012
1013 CLOVER_API cl_int
clEnqueueSVMFree(cl_command_queue d_q,cl_uint num_svm_pointers,void * svm_pointers[],void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue,cl_uint num_svm_pointers,void * svm_pointers[],void * user_data),void * user_data,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1014 clEnqueueSVMFree(cl_command_queue d_q,
1015 cl_uint num_svm_pointers,
1016 void *svm_pointers[],
1017 void (CL_CALLBACK *pfn_free_func) (
1018 cl_command_queue queue, cl_uint num_svm_pointers,
1019 void *svm_pointers[], void *user_data),
1020 void *user_data,
1021 cl_uint num_events_in_wait_list,
1022 const cl_event *event_wait_list,
1023 cl_event *event) {
1024
1025 return EnqueueSVMFree(d_q, num_svm_pointers, svm_pointers,
1026 pfn_free_func, user_data, num_events_in_wait_list,
1027 event_wait_list, event, CL_COMMAND_SVM_FREE);
1028 }
1029
1030 cl_int
EnqueueSVMMemcpy(cl_command_queue d_q,cl_bool blocking_copy,void * dst_ptr,const void * src_ptr,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)1031 clover::EnqueueSVMMemcpy(cl_command_queue d_q,
1032 cl_bool blocking_copy,
1033 void *dst_ptr,
1034 const void *src_ptr,
1035 size_t size,
1036 cl_uint num_events_in_wait_list,
1037 const cl_event *event_wait_list,
1038 cl_event *event,
1039 cl_int cmd) try {
1040 auto &q = obj(d_q);
1041
1042 if (!q.device().svm_support())
1043 return CL_INVALID_OPERATION;
1044
1045 if (dst_ptr == nullptr || src_ptr == nullptr)
1046 return CL_INVALID_VALUE;
1047
1048 if (static_cast<size_t>(abs(reinterpret_cast<ptrdiff_t>(dst_ptr) -
1049 reinterpret_cast<ptrdiff_t>(src_ptr))) < size)
1050 return CL_MEM_COPY_OVERLAP;
1051
1052
1053 bool can_emulate = q.device().has_system_svm();
1054 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1055
1056 validate_common(q, deps);
1057
1058 if (can_emulate) {
1059 auto hev = create<hard_event>(q, cmd, deps,
1060 [=](clover::event &) {
1061 memcpy(dst_ptr, src_ptr, size);
1062 });
1063
1064 if (blocking_copy)
1065 hev().wait();
1066 ret_object(event, hev);
1067 return CL_SUCCESS;
1068 }
1069
1070 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1071 return CL_INVALID_VALUE;
1072
1073 } catch (error &e) {
1074 return e.get();
1075 }
1076
1077 CLOVER_API cl_int
clEnqueueSVMMemcpy(cl_command_queue d_q,cl_bool blocking_copy,void * dst_ptr,const void * src_ptr,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1078 clEnqueueSVMMemcpy(cl_command_queue d_q,
1079 cl_bool blocking_copy,
1080 void *dst_ptr,
1081 const void *src_ptr,
1082 size_t size,
1083 cl_uint num_events_in_wait_list,
1084 const cl_event *event_wait_list,
1085 cl_event *event) {
1086
1087 return EnqueueSVMMemcpy(d_q, blocking_copy, dst_ptr, src_ptr,
1088 size, num_events_in_wait_list, event_wait_list,
1089 event, CL_COMMAND_SVM_MEMCPY);
1090 }
1091
1092 cl_int
EnqueueSVMMemFill(cl_command_queue d_q,void * svm_ptr,const void * pattern,size_t pattern_size,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)1093 clover::EnqueueSVMMemFill(cl_command_queue d_q,
1094 void *svm_ptr,
1095 const void *pattern,
1096 size_t pattern_size,
1097 size_t size,
1098 cl_uint num_events_in_wait_list,
1099 const cl_event *event_wait_list,
1100 cl_event *event,
1101 cl_int cmd) try {
1102 auto &q = obj(d_q);
1103
1104 if (!q.device().svm_support())
1105 return CL_INVALID_OPERATION;
1106
1107 if (svm_ptr == nullptr || pattern == nullptr ||
1108 !util_is_power_of_two_nonzero(pattern_size) ||
1109 pattern_size > 128 ||
1110 !ptr_is_aligned(svm_ptr, pattern_size) ||
1111 size % pattern_size)
1112 return CL_INVALID_VALUE;
1113
1114 bool can_emulate = q.device().has_system_svm();
1115 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1116
1117 validate_common(q, deps);
1118
1119 if (can_emulate) {
1120 auto hev = create<hard_event>(q, cmd, deps,
1121 [=](clover::event &) {
1122 void *ptr = svm_ptr;
1123 for (size_t s = size; s; s -= pattern_size) {
1124 memcpy(ptr, pattern, pattern_size);
1125 ptr = static_cast<uint8_t*>(ptr) + pattern_size;
1126 }
1127 });
1128
1129 ret_object(event, hev);
1130 return CL_SUCCESS;
1131 }
1132
1133 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1134 return CL_INVALID_VALUE;
1135
1136 } catch (error &e) {
1137 return e.get();
1138 }
1139
1140 CLOVER_API cl_int
clEnqueueSVMMemFill(cl_command_queue d_q,void * svm_ptr,const void * pattern,size_t pattern_size,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1141 clEnqueueSVMMemFill(cl_command_queue d_q,
1142 void *svm_ptr,
1143 const void *pattern,
1144 size_t pattern_size,
1145 size_t size,
1146 cl_uint num_events_in_wait_list,
1147 const cl_event *event_wait_list,
1148 cl_event *event) {
1149
1150 return EnqueueSVMMemFill(d_q, svm_ptr, pattern, pattern_size,
1151 size, num_events_in_wait_list, event_wait_list,
1152 event, CL_COMMAND_SVM_MEMFILL);
1153 }
1154
1155 cl_int
EnqueueSVMMap(cl_command_queue d_q,cl_bool blocking_map,cl_map_flags map_flags,void * svm_ptr,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)1156 clover::EnqueueSVMMap(cl_command_queue d_q,
1157 cl_bool blocking_map,
1158 cl_map_flags map_flags,
1159 void *svm_ptr,
1160 size_t size,
1161 cl_uint num_events_in_wait_list,
1162 const cl_event *event_wait_list,
1163 cl_event *event,
1164 cl_int cmd) try {
1165 auto &q = obj(d_q);
1166
1167 if (!q.device().svm_support())
1168 return CL_INVALID_OPERATION;
1169
1170 if (svm_ptr == nullptr || size == 0)
1171 return CL_INVALID_VALUE;
1172
1173 bool can_emulate = q.device().has_system_svm();
1174 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1175
1176 validate_common(q, deps);
1177
1178 if (can_emulate) {
1179 auto hev = create<hard_event>(q, cmd, deps,
1180 [](clover::event &) { });
1181
1182 ret_object(event, hev);
1183 return CL_SUCCESS;
1184 }
1185
1186 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1187 return CL_INVALID_VALUE;
1188
1189 } catch (error &e) {
1190 return e.get();
1191 }
1192
1193 CLOVER_API cl_int
clEnqueueSVMMap(cl_command_queue d_q,cl_bool blocking_map,cl_map_flags map_flags,void * svm_ptr,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1194 clEnqueueSVMMap(cl_command_queue d_q,
1195 cl_bool blocking_map,
1196 cl_map_flags map_flags,
1197 void *svm_ptr,
1198 size_t size,
1199 cl_uint num_events_in_wait_list,
1200 const cl_event *event_wait_list,
1201 cl_event *event) {
1202
1203 return EnqueueSVMMap(d_q, blocking_map, map_flags, svm_ptr, size,
1204 num_events_in_wait_list, event_wait_list, event,
1205 CL_COMMAND_SVM_MAP);
1206 }
1207
1208 cl_int
EnqueueSVMUnmap(cl_command_queue d_q,void * svm_ptr,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)1209 clover::EnqueueSVMUnmap(cl_command_queue d_q,
1210 void *svm_ptr,
1211 cl_uint num_events_in_wait_list,
1212 const cl_event *event_wait_list,
1213 cl_event *event,
1214 cl_int cmd) try {
1215 auto &q = obj(d_q);
1216
1217 if (!q.device().svm_support())
1218 return CL_INVALID_OPERATION;
1219
1220 if (svm_ptr == nullptr)
1221 return CL_INVALID_VALUE;
1222
1223 bool can_emulate = q.device().has_system_svm();
1224 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1225
1226 validate_common(q, deps);
1227
1228 if (can_emulate) {
1229 auto hev = create<hard_event>(q, cmd, deps,
1230 [](clover::event &) { });
1231
1232 ret_object(event, hev);
1233 return CL_SUCCESS;
1234 }
1235
1236 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1237 return CL_INVALID_VALUE;
1238
1239 } catch (error &e) {
1240 return e.get();
1241 }
1242
1243 CLOVER_API cl_int
clEnqueueSVMUnmap(cl_command_queue d_q,void * svm_ptr,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1244 clEnqueueSVMUnmap(cl_command_queue d_q,
1245 void *svm_ptr,
1246 cl_uint num_events_in_wait_list,
1247 const cl_event *event_wait_list,
1248 cl_event *event) {
1249
1250 return EnqueueSVMUnmap(d_q, svm_ptr, num_events_in_wait_list,
1251 event_wait_list, event, CL_COMMAND_SVM_UNMAP);
1252 }
1253
1254 CLOVER_API cl_int
clEnqueueSVMMigrateMem(cl_command_queue d_q,cl_uint num_svm_pointers,const void ** svm_pointers,const size_t * sizes,const cl_mem_migration_flags flags,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)1255 clEnqueueSVMMigrateMem(cl_command_queue d_q,
1256 cl_uint num_svm_pointers,
1257 const void **svm_pointers,
1258 const size_t *sizes,
1259 const cl_mem_migration_flags flags,
1260 cl_uint num_deps,
1261 const cl_event *d_deps,
1262 cl_event *rd_ev) try {
1263 auto &q = obj(d_q);
1264 auto deps = objs<wait_list_tag>(d_deps, num_deps);
1265
1266 validate_common(q, deps);
1267 validate_mem_migration_flags(flags);
1268
1269 if (!q.device().svm_support())
1270 return CL_INVALID_OPERATION;
1271
1272 if (!num_svm_pointers || !svm_pointers)
1273 return CL_INVALID_VALUE;
1274
1275 std::vector<size_t> sizes_copy(num_svm_pointers);
1276 std::vector<const void*> ptrs(num_svm_pointers);
1277
1278 for (unsigned i = 0; i < num_svm_pointers; ++i) {
1279 const void *ptr = svm_pointers[i];
1280 size_t size = sizes ? sizes[i] : 0;
1281 if (!ptr)
1282 return CL_INVALID_VALUE;
1283
1284 auto p = q.context().find_svm_allocation(ptr);
1285 if (!p.first)
1286 return CL_INVALID_VALUE;
1287
1288 std::ptrdiff_t pdiff = (uint8_t*)ptr - (uint8_t*)p.first;
1289 if (size && size + pdiff > p.second)
1290 return CL_INVALID_VALUE;
1291
1292 sizes_copy[i] = size ? size : p.second;
1293 ptrs[i] = size ? svm_pointers[i] : p.first;
1294 }
1295
1296 auto hev = create<hard_event>(
1297 q, CL_COMMAND_MIGRATE_MEM_OBJECTS, deps,
1298 [=, &q](event &) {
1299 q.svm_migrate(ptrs, sizes_copy, flags);
1300 });
1301
1302 ret_object(rd_ev, hev);
1303 return CL_SUCCESS;
1304
1305 } catch (error &e) {
1306 return e.get();
1307 }
1308