• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 //
22 
23 #include <cstring>
24 
25 #include "util/bitscan.h"
26 
27 #include "api/dispatch.hpp"
28 #include "api/util.hpp"
29 #include "core/event.hpp"
30 #include "core/memory.hpp"
31 
32 using namespace clover;
33 
34 namespace {
35    typedef resource::vector vector_t;
36 
37    vector_t
vector(const size_t * p)38    vector(const size_t *p) {
39       return range(p, 3);
40    }
41 
42    vector_t
pitch(const vector_t & region,vector_t pitch)43    pitch(const vector_t &region, vector_t pitch) {
44       for (auto x : zip(tail(pitch),
45                         map(multiplies(), region, pitch))) {
46          // The spec defines a value of zero as the natural pitch,
47          // i.e. the unaligned size of the previous dimension.
48          if (std::get<0>(x) == 0)
49             std::get<0>(x) = std::get<1>(x);
50       }
51 
52       return pitch;
53    }
54 
55    ///
56    /// Size of a region in bytes.
57    ///
58    size_t
size(const vector_t & pitch,const vector_t & region)59    size(const vector_t &pitch, const vector_t &region) {
60       if (any_of(is_zero(), region))
61          return 0;
62       else
63          return dot(pitch, region - vector_t{ 0, 1, 1 });
64    }
65 
66    ///
67    /// Common argument checking shared by memory transfer commands.
68    ///
69    void
validate_common(command_queue & q,const ref_vector<event> & deps)70    validate_common(command_queue &q,
71                    const ref_vector<event> &deps) {
72       if (any_of([&](const event &ev) {
73                return ev.context() != q.context();
74             }, deps))
75          throw error(CL_INVALID_CONTEXT);
76    }
77 
78    ///
79    /// Common error checking for a buffer object argument.
80    ///
81    void
validate_object(command_queue & q,buffer & mem,const vector_t & origin,const vector_t & pitch,const vector_t & region)82    validate_object(command_queue &q, buffer &mem, const vector_t &origin,
83                    const vector_t &pitch, const vector_t &region) {
84       if (mem.context() != q.context())
85          throw error(CL_INVALID_CONTEXT);
86 
87       // The region must fit within the specified pitch,
88       if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
89          throw error(CL_INVALID_VALUE);
90 
91       // ...and within the specified object.
92       if (dot(pitch, origin) + size(pitch, region) > mem.size())
93          throw error(CL_INVALID_VALUE);
94 
95       if (any_of(is_zero(), region))
96          throw error(CL_INVALID_VALUE);
97    }
98 
99    ///
100    /// Common error checking for an image argument.
101    ///
102    void
validate_object(command_queue & q,image & img,const vector_t & orig,const vector_t & region)103    validate_object(command_queue &q, image &img,
104                    const vector_t &orig, const vector_t &region) {
105       vector_t size = { img.width(), img.height(), img.depth() };
106       const auto &dev = q.device();
107 
108       if (!dev.image_support())
109          throw error(CL_INVALID_OPERATION);
110 
111       if (img.context() != q.context())
112          throw error(CL_INVALID_CONTEXT);
113 
114       if (any_of(greater(), orig + region, size))
115          throw error(CL_INVALID_VALUE);
116 
117       if (any_of(is_zero(), region))
118          throw error(CL_INVALID_VALUE);
119 
120       switch (img.type()) {
121       case CL_MEM_OBJECT_IMAGE2D: {
122          const size_t max = 1 << dev.max_image_levels_2d();
123          if (img.width() > max || img.height() > max)
124             throw error(CL_INVALID_IMAGE_SIZE);
125          break;
126       }
127       case CL_MEM_OBJECT_IMAGE3D: {
128          const size_t max = 1 << dev.max_image_levels_3d();
129          if (img.width() > max || img.height() > max || img.depth() > max)
130             throw error(CL_INVALID_IMAGE_SIZE);
131          break;
132       }
133       // XXX: Implement missing checks once Clover supports more image types.
134       default:
135          throw error(CL_INVALID_IMAGE_SIZE);
136       }
137    }
138 
139    ///
140    /// Common error checking for a host pointer argument.
141    ///
142    void
validate_object(command_queue & q,const void * ptr,const vector_t & orig,const vector_t & pitch,const vector_t & region)143    validate_object(command_queue &q, const void *ptr, const vector_t &orig,
144                    const vector_t &pitch, const vector_t &region) {
145       if (!ptr)
146          throw error(CL_INVALID_VALUE);
147 
148       // The region must fit within the specified pitch.
149       if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
150          throw error(CL_INVALID_VALUE);
151    }
152 
153    ///
154    /// Common argument checking for a copy between two buffer objects.
155    ///
156    void
validate_copy(command_queue & q,buffer & dst_mem,const vector_t & dst_orig,const vector_t & dst_pitch,buffer & src_mem,const vector_t & src_orig,const vector_t & src_pitch,const vector_t & region)157    validate_copy(command_queue &q, buffer &dst_mem,
158                  const vector_t &dst_orig, const vector_t &dst_pitch,
159                  buffer &src_mem,
160                  const vector_t &src_orig, const vector_t &src_pitch,
161                  const vector_t &region) {
162       if (dst_mem == src_mem) {
163          auto dst_offset = dot(dst_pitch, dst_orig);
164          auto src_offset = dot(src_pitch, src_orig);
165 
166          if (interval_overlaps()(
167                 dst_offset, dst_offset + size(dst_pitch, region),
168                 src_offset, src_offset + size(src_pitch, region)))
169             throw error(CL_MEM_COPY_OVERLAP);
170       }
171    }
172 
173    ///
174    /// Common argument checking for a copy between two image objects.
175    ///
176    void
validate_copy(command_queue & q,image & dst_img,const vector_t & dst_orig,image & src_img,const vector_t & src_orig,const vector_t & region)177    validate_copy(command_queue &q,
178                  image &dst_img, const vector_t &dst_orig,
179                  image &src_img, const vector_t &src_orig,
180                  const vector_t &region) {
181       if (dst_img.format() != src_img.format())
182          throw error(CL_IMAGE_FORMAT_MISMATCH);
183 
184       if (dst_img == src_img) {
185          if (all_of(interval_overlaps(),
186                     dst_orig, dst_orig + region,
187                     src_orig, src_orig + region))
188             throw error(CL_MEM_COPY_OVERLAP);
189       }
190    }
191 
192    ///
193    /// Checks that the host access flags of the memory object are
194    /// within the allowed set \a flags.
195    ///
196    void
validate_object_access(const memory_obj & mem,const cl_mem_flags flags)197    validate_object_access(const memory_obj &mem, const cl_mem_flags flags) {
198       if (mem.flags() & ~flags &
199           (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY |
200            CL_MEM_HOST_NO_ACCESS))
201          throw error(CL_INVALID_OPERATION);
202    }
203 
204    ///
205    /// Checks that the mapping flags are correct.
206    ///
207    void
validate_map_flags(const memory_obj & mem,const cl_map_flags flags)208    validate_map_flags(const memory_obj &mem, const cl_map_flags flags) {
209       if ((flags & (CL_MAP_WRITE | CL_MAP_READ)) &&
210           (flags & CL_MAP_WRITE_INVALIDATE_REGION))
211          throw error(CL_INVALID_VALUE);
212 
213       if (flags & CL_MAP_READ)
214          validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
215 
216       if (flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
217          validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
218    }
219 
220    ///
221    /// Class that encapsulates the task of mapping an object of type
222    /// \a T.  The return value of get() should be implicitly
223    /// convertible to \a void *.
224    ///
225    template<typename T>
226    struct _map;
227 
228    template<>
229    struct _map<image*> {
_map__anon9d9a60f00111::_map230       _map(command_queue &q, image *img, cl_map_flags flags,
231            vector_t offset, vector_t pitch, vector_t region) :
232          map(q, img->resource_in(q), flags, true, offset, region),
233          pitch(map.pitch())
234       { }
235 
236       template<typename T>
operator T*__anon9d9a60f00111::_map237       operator T *() const {
238          return static_cast<T *>(map);
239       }
240 
241       mapping map;
242       vector_t pitch;
243    };
244 
245    template<>
246    struct _map<buffer*> {
_map__anon9d9a60f00111::_map247       _map(command_queue &q, buffer *mem, cl_map_flags flags,
248            vector_t offset, vector_t pitch, vector_t region) :
249          map(q, mem->resource_in(q), flags, true,
250              {{ dot(pitch, offset) }}, {{ size(pitch, region) }}),
251          pitch(pitch)
252       { }
253 
254       template<typename T>
operator T*__anon9d9a60f00111::_map255       operator T *() const {
256          return static_cast<T *>(map);
257       }
258 
259       mapping map;
260       vector_t pitch;
261    };
262 
263    template<typename P>
264    struct _map<P *> {
_map__anon9d9a60f00111::_map265       _map(command_queue &q, P *ptr, cl_map_flags flags,
266            vector_t offset, vector_t pitch, vector_t region) :
267          ptr((P *)((char *)ptr + dot(pitch, offset))), pitch(pitch)
268       { }
269 
270       template<typename T>
operator T*__anon9d9a60f00111::_map271       operator T *() const {
272          return static_cast<T *>(ptr);
273       }
274 
275       P *ptr;
276       vector_t pitch;
277    };
278 
279    ///
280    /// Software copy from \a src_obj to \a dst_obj.  They can be
281    /// either pointers or memory objects.
282    ///
283    template<typename T, typename S>
284    std::function<void (event &)>
soft_copy_op(command_queue & q,T dst_obj,const vector_t & dst_orig,const vector_t & dst_pitch,S src_obj,const vector_t & src_orig,const vector_t & src_pitch,const vector_t & region)285    soft_copy_op(command_queue &q,
286                 T dst_obj, const vector_t &dst_orig, const vector_t &dst_pitch,
287                 S src_obj, const vector_t &src_orig, const vector_t &src_pitch,
288                 const vector_t &region) {
289       return [=, &q](event &) {
290          _map<T> dst = { q, dst_obj, CL_MAP_WRITE,
291                          dst_orig, dst_pitch, region };
292          _map<S> src = { q, src_obj, CL_MAP_READ,
293                          src_orig, src_pitch, region };
294          assert(src.pitch[0] == dst.pitch[0]);
295          vector_t v = {};
296 
297          for (v[2] = 0; v[2] < region[2]; ++v[2]) {
298             for (v[1] = 0; v[1] < region[1]; ++v[1]) {
299                std::memcpy(
300                   static_cast<char *>(dst) + dot(dst.pitch, v),
301                   static_cast<const char *>(src) + dot(src.pitch, v),
302                   src.pitch[0] * region[0]);
303             }
304          }
305       };
306    }
307 
308    ///
309    /// Hardware copy from \a src_obj to \a dst_obj.
310    ///
311    template<typename T, typename S>
312    std::function<void (event &)>
hard_copy_op(command_queue & q,T dst_obj,const vector_t & dst_orig,S src_obj,const vector_t & src_orig,const vector_t & region)313    hard_copy_op(command_queue &q, T dst_obj, const vector_t &dst_orig,
314                 S src_obj, const vector_t &src_orig, const vector_t &region) {
315       return [=, &q](event &) {
316          dst_obj->resource_in(q).copy(q, dst_orig, region,
317                                       src_obj->resource_in(q), src_orig);
318       };
319    }
320 }
321 
322 CLOVER_API cl_int
clEnqueueReadBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,size_t offset,size_t size,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)323 clEnqueueReadBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
324                     size_t offset, size_t size, void *ptr,
325                     cl_uint num_deps, const cl_event *d_deps,
326                     cl_event *rd_ev) try {
327    auto &q = obj(d_q);
328    auto &mem = obj<buffer>(d_mem);
329    auto deps = objs<wait_list_tag>(d_deps, num_deps);
330    vector_t region = { size, 1, 1 };
331    vector_t obj_origin = { offset };
332    auto obj_pitch = pitch(region, {{ 1 }});
333 
334    validate_common(q, deps);
335    validate_object(q, ptr, {}, obj_pitch, region);
336    validate_object(q, mem, obj_origin, obj_pitch, region);
337    validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
338 
339    auto hev = create<hard_event>(
340       q, CL_COMMAND_READ_BUFFER, deps,
341       soft_copy_op(q, ptr, {}, obj_pitch,
342                    &mem, obj_origin, obj_pitch,
343                    region));
344 
345    if (blocking)
346        hev().wait_signalled();
347 
348    ret_object(rd_ev, hev);
349    return CL_SUCCESS;
350 
351 } catch (error &e) {
352    return e.get();
353 }
354 
355 CLOVER_API cl_int
clEnqueueWriteBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,size_t offset,size_t size,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)356 clEnqueueWriteBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
357                      size_t offset, size_t size, const void *ptr,
358                      cl_uint num_deps, const cl_event *d_deps,
359                      cl_event *rd_ev) try {
360    auto &q = obj(d_q);
361    auto &mem = obj<buffer>(d_mem);
362    auto deps = objs<wait_list_tag>(d_deps, num_deps);
363    vector_t region = { size, 1, 1 };
364    vector_t obj_origin = { offset };
365    auto obj_pitch = pitch(region, {{ 1 }});
366 
367    validate_common(q, deps);
368    validate_object(q, mem, obj_origin, obj_pitch, region);
369    validate_object(q, ptr, {}, obj_pitch, region);
370    validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
371 
372    auto hev = create<hard_event>(
373       q, CL_COMMAND_WRITE_BUFFER, deps,
374       soft_copy_op(q, &mem, obj_origin, obj_pitch,
375                    ptr, {}, obj_pitch,
376                    region));
377 
378    if (blocking)
379        hev().wait_signalled();
380 
381    ret_object(rd_ev, hev);
382    return CL_SUCCESS;
383 
384 } catch (error &e) {
385    return e.get();
386 }
387 
388 CLOVER_API cl_int
clEnqueueReadBufferRect(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_obj_origin,const size_t * p_host_origin,const size_t * p_region,size_t obj_row_pitch,size_t obj_slice_pitch,size_t host_row_pitch,size_t host_slice_pitch,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)389 clEnqueueReadBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
390                         const size_t *p_obj_origin,
391                         const size_t *p_host_origin,
392                         const size_t *p_region,
393                         size_t obj_row_pitch, size_t obj_slice_pitch,
394                         size_t host_row_pitch, size_t host_slice_pitch,
395                         void *ptr,
396                         cl_uint num_deps, const cl_event *d_deps,
397                         cl_event *rd_ev) try {
398    auto &q = obj(d_q);
399    auto &mem = obj<buffer>(d_mem);
400    auto deps = objs<wait_list_tag>(d_deps, num_deps);
401    auto region = vector(p_region);
402    auto obj_origin = vector(p_obj_origin);
403    auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
404    auto host_origin = vector(p_host_origin);
405    auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
406 
407    validate_common(q, deps);
408    validate_object(q, ptr, host_origin, host_pitch, region);
409    validate_object(q, mem, obj_origin, obj_pitch, region);
410    validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
411 
412    auto hev = create<hard_event>(
413       q, CL_COMMAND_READ_BUFFER_RECT, deps,
414       soft_copy_op(q, ptr, host_origin, host_pitch,
415                    &mem, obj_origin, obj_pitch,
416                    region));
417 
418    if (blocking)
419        hev().wait_signalled();
420 
421    ret_object(rd_ev, hev);
422    return CL_SUCCESS;
423 
424 } catch (error &e) {
425    return e.get();
426 }
427 
428 CLOVER_API cl_int
clEnqueueWriteBufferRect(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_obj_origin,const size_t * p_host_origin,const size_t * p_region,size_t obj_row_pitch,size_t obj_slice_pitch,size_t host_row_pitch,size_t host_slice_pitch,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)429 clEnqueueWriteBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
430                          const size_t *p_obj_origin,
431                          const size_t *p_host_origin,
432                          const size_t *p_region,
433                          size_t obj_row_pitch, size_t obj_slice_pitch,
434                          size_t host_row_pitch, size_t host_slice_pitch,
435                          const void *ptr,
436                          cl_uint num_deps, const cl_event *d_deps,
437                          cl_event *rd_ev) try {
438    auto &q = obj(d_q);
439    auto &mem = obj<buffer>(d_mem);
440    auto deps = objs<wait_list_tag>(d_deps, num_deps);
441    auto region = vector(p_region);
442    auto obj_origin = vector(p_obj_origin);
443    auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
444    auto host_origin = vector(p_host_origin);
445    auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
446 
447    validate_common(q, deps);
448    validate_object(q, mem, obj_origin, obj_pitch, region);
449    validate_object(q, ptr, host_origin, host_pitch, region);
450    validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
451 
452    auto hev = create<hard_event>(
453       q, CL_COMMAND_WRITE_BUFFER_RECT, deps,
454       soft_copy_op(q, &mem, obj_origin, obj_pitch,
455                    ptr, host_origin, host_pitch,
456                    region));
457 
458    if (blocking)
459        hev().wait_signalled();
460 
461    ret_object(rd_ev, hev);
462    return CL_SUCCESS;
463 
464 } catch (error &e) {
465    return e.get();
466 }
467 
468 CLOVER_API cl_int
clEnqueueFillBuffer(cl_command_queue d_queue,cl_mem d_mem,const void * pattern,size_t pattern_size,size_t offset,size_t size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)469 clEnqueueFillBuffer(cl_command_queue d_queue, cl_mem d_mem,
470                     const void *pattern, size_t pattern_size,
471                     size_t offset, size_t size,
472                     cl_uint num_deps, const cl_event *d_deps,
473                     cl_event *rd_ev) try {
474    auto &q = obj(d_queue);
475    auto &mem = obj<buffer>(d_mem);
476    auto deps = objs<wait_list_tag>(d_deps, num_deps);
477    vector_t region = { size, 1, 1 };
478    vector_t origin = { offset };
479    auto dst_pitch = pitch(region, {{ 1 }});
480 
481    validate_common(q, deps);
482    validate_object(q, mem, origin, dst_pitch, region);
483 
484    if (!pattern)
485       return CL_INVALID_VALUE;
486 
487    if (!util_is_power_of_two_nonzero(pattern_size) ||
488       pattern_size > 128 || size % pattern_size
489       || offset % pattern_size) {
490       return CL_INVALID_VALUE;
491    }
492 
493    auto sub = dynamic_cast<sub_buffer *>(&mem);
494    if (sub && sub->offset() % q.device().mem_base_addr_align()) {
495       return CL_MISALIGNED_SUB_BUFFER_OFFSET;
496    }
497 
498    std::string data = std::string((char *)pattern, pattern_size);
499    auto hev = create<hard_event>(
500       q, CL_COMMAND_FILL_BUFFER, deps,
501       [=, &q, &mem](event &) {
502          mem.resource_in(q).clear(q, origin, region, data);
503       });
504 
505    ret_object(rd_ev, hev);
506    return CL_SUCCESS;
507 
508 } catch (error &e) {
509    return e.get();
510 }
511 
512 CLOVER_API cl_int
clEnqueueCopyBuffer(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,size_t src_offset,size_t dst_offset,size_t size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)513 clEnqueueCopyBuffer(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
514                     size_t src_offset, size_t dst_offset, size_t size,
515                     cl_uint num_deps, const cl_event *d_deps,
516                     cl_event *rd_ev) try {
517    auto &q = obj(d_q);
518    auto &src_mem = obj<buffer>(d_src_mem);
519    auto &dst_mem = obj<buffer>(d_dst_mem);
520    auto deps = objs<wait_list_tag>(d_deps, num_deps);
521    vector_t region = { size, 1, 1 };
522    vector_t dst_origin = { dst_offset };
523    auto dst_pitch = pitch(region, {{ 1 }});
524    vector_t src_origin = { src_offset };
525    auto src_pitch = pitch(region, {{ 1 }});
526 
527    validate_common(q, deps);
528    validate_object(q, dst_mem, dst_origin, dst_pitch, region);
529    validate_object(q, src_mem, src_origin, src_pitch, region);
530    validate_copy(q, dst_mem, dst_origin, dst_pitch,
531                  src_mem, src_origin, src_pitch, region);
532 
533    auto hev = create<hard_event>(
534       q, CL_COMMAND_COPY_BUFFER, deps,
535       hard_copy_op(q, &dst_mem, dst_origin,
536                    &src_mem, src_origin, region));
537 
538    ret_object(rd_ev, hev);
539    return CL_SUCCESS;
540 
541 } catch (error &e) {
542    return e.get();
543 }
544 
545 CLOVER_API cl_int
clEnqueueCopyBufferRect(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_dst_origin,const size_t * p_region,size_t src_row_pitch,size_t src_slice_pitch,size_t dst_row_pitch,size_t dst_slice_pitch,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)546 clEnqueueCopyBufferRect(cl_command_queue d_q, cl_mem d_src_mem,
547                         cl_mem d_dst_mem,
548                         const size_t *p_src_origin, const size_t *p_dst_origin,
549                         const size_t *p_region,
550                         size_t src_row_pitch, size_t src_slice_pitch,
551                         size_t dst_row_pitch, size_t dst_slice_pitch,
552                         cl_uint num_deps, const cl_event *d_deps,
553                         cl_event *rd_ev) try {
554    auto &q = obj(d_q);
555    auto &src_mem = obj<buffer>(d_src_mem);
556    auto &dst_mem = obj<buffer>(d_dst_mem);
557    auto deps = objs<wait_list_tag>(d_deps, num_deps);
558    auto region = vector(p_region);
559    auto dst_origin = vector(p_dst_origin);
560    auto dst_pitch = pitch(region, {{ 1, dst_row_pitch, dst_slice_pitch }});
561    auto src_origin = vector(p_src_origin);
562    auto src_pitch = pitch(region, {{ 1, src_row_pitch, src_slice_pitch }});
563 
564    validate_common(q, deps);
565    validate_object(q, dst_mem, dst_origin, dst_pitch, region);
566    validate_object(q, src_mem, src_origin, src_pitch, region);
567    validate_copy(q, dst_mem, dst_origin, dst_pitch,
568                  src_mem, src_origin, src_pitch, region);
569 
570    auto hev = create<hard_event>(
571       q, CL_COMMAND_COPY_BUFFER_RECT, deps,
572       soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
573                    &src_mem, src_origin, src_pitch,
574                    region));
575 
576    ret_object(rd_ev, hev);
577    return CL_SUCCESS;
578 
579 } catch (error &e) {
580    return e.get();
581 }
582 
583 CLOVER_API cl_int
clEnqueueReadImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_origin,const size_t * p_region,size_t row_pitch,size_t slice_pitch,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)584 clEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
585                    const size_t *p_origin, const size_t *p_region,
586                    size_t row_pitch, size_t slice_pitch, void *ptr,
587                    cl_uint num_deps, const cl_event *d_deps,
588                    cl_event *rd_ev) try {
589    auto &q = obj(d_q);
590    auto &img = obj<image>(d_mem);
591    auto deps = objs<wait_list_tag>(d_deps, num_deps);
592    auto region = vector(p_region);
593    auto dst_pitch = pitch(region, {{ img.pixel_size(),
594                                      row_pitch, slice_pitch }});
595    auto src_origin = vector(p_origin);
596    auto src_pitch = pitch(region, {{ img.pixel_size(),
597                                      img.row_pitch(), img.slice_pitch() }});
598 
599    validate_common(q, deps);
600    validate_object(q, ptr, {}, dst_pitch, region);
601    validate_object(q, img, src_origin, region);
602    validate_object_access(img, CL_MEM_HOST_READ_ONLY);
603 
604    auto hev = create<hard_event>(
605       q, CL_COMMAND_READ_IMAGE, deps,
606       soft_copy_op(q, ptr, {}, dst_pitch,
607                    &img, src_origin, src_pitch,
608                    region));
609 
610    if (blocking)
611        hev().wait_signalled();
612 
613    ret_object(rd_ev, hev);
614    return CL_SUCCESS;
615 
616 } catch (error &e) {
617    return e.get();
618 }
619 
620 CLOVER_API cl_int
clEnqueueWriteImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_origin,const size_t * p_region,size_t row_pitch,size_t slice_pitch,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)621 clEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
622                     const size_t *p_origin, const size_t *p_region,
623                     size_t row_pitch, size_t slice_pitch, const void *ptr,
624                     cl_uint num_deps, const cl_event *d_deps,
625                     cl_event *rd_ev) try {
626    auto &q = obj(d_q);
627    auto &img = obj<image>(d_mem);
628    auto deps = objs<wait_list_tag>(d_deps, num_deps);
629    auto region = vector(p_region);
630    auto dst_origin = vector(p_origin);
631    auto dst_pitch = pitch(region, {{ img.pixel_size(),
632                                      img.row_pitch(), img.slice_pitch() }});
633    auto src_pitch = pitch(region, {{ img.pixel_size(),
634                                      row_pitch, slice_pitch }});
635 
636    validate_common(q, deps);
637    validate_object(q, img, dst_origin, region);
638    validate_object(q, ptr, {}, src_pitch, region);
639    validate_object_access(img, CL_MEM_HOST_WRITE_ONLY);
640 
641    auto hev = create<hard_event>(
642       q, CL_COMMAND_WRITE_IMAGE, deps,
643       soft_copy_op(q, &img, dst_origin, dst_pitch,
644                    ptr, {}, src_pitch,
645                    region));
646 
647    if (blocking)
648        hev().wait_signalled();
649 
650    ret_object(rd_ev, hev);
651    return CL_SUCCESS;
652 
653 } catch (error &e) {
654    return e.get();
655 }
656 
657 CLOVER_API cl_int
clEnqueueFillImage(cl_command_queue d_queue,cl_mem d_mem,const void * fill_color,const size_t * p_origin,const size_t * p_region,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)658 clEnqueueFillImage(cl_command_queue d_queue, cl_mem d_mem,
659                    const void *fill_color,
660                    const size_t *p_origin, const size_t *p_region,
661                    cl_uint num_deps, const cl_event *d_deps,
662                    cl_event *rd_ev) try {
663    auto &q = obj(d_queue);
664    auto &img = obj<image>(d_mem);
665    auto deps = objs<wait_list_tag>(d_deps, num_deps);
666    auto origin = vector(p_origin);
667    auto region = vector(p_region);
668 
669    validate_common(q, deps);
670    validate_object(q, img, origin, region);
671 
672    if (!fill_color)
673       return CL_INVALID_VALUE;
674 
675    std::string data = std::string((char *)fill_color, sizeof(cl_uint4));
676    auto hev = create<hard_event>(
677       q, CL_COMMAND_FILL_IMAGE, deps,
678       [=, &q, &img](event &) {
679          img.resource_in(q).clear(q, origin, region, data);
680       });
681 
682    ret_object(rd_ev, hev);
683    return CL_SUCCESS;
684 
685 } catch (error &e) {
686    return e.get();
687 }
688 
689 CLOVER_API cl_int
clEnqueueCopyImage(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_dst_origin,const size_t * p_region,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)690 clEnqueueCopyImage(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
691                    const size_t *p_src_origin, const size_t *p_dst_origin,
692                    const size_t *p_region,
693                    cl_uint num_deps, const cl_event *d_deps,
694                    cl_event *rd_ev) try {
695    auto &q = obj(d_q);
696    auto &src_img = obj<image>(d_src_mem);
697    auto &dst_img = obj<image>(d_dst_mem);
698    auto deps = objs<wait_list_tag>(d_deps, num_deps);
699    auto region = vector(p_region);
700    auto dst_origin = vector(p_dst_origin);
701    auto src_origin = vector(p_src_origin);
702 
703    validate_common(q, deps);
704    validate_object(q, dst_img, dst_origin, region);
705    validate_object(q, src_img, src_origin, region);
706    validate_copy(q, dst_img, dst_origin, src_img, src_origin, region);
707 
708    auto hev = create<hard_event>(
709       q, CL_COMMAND_COPY_IMAGE, deps,
710       hard_copy_op(q, &dst_img, dst_origin,
711                    &src_img, src_origin,
712                    region));
713 
714    ret_object(rd_ev, hev);
715    return CL_SUCCESS;
716 
717 } catch (error &e) {
718    return e.get();
719 }
720 
721 CLOVER_API cl_int
clEnqueueCopyImageToBuffer(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_region,size_t dst_offset,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)722 clEnqueueCopyImageToBuffer(cl_command_queue d_q,
723                            cl_mem d_src_mem, cl_mem d_dst_mem,
724                            const size_t *p_src_origin, const size_t *p_region,
725                            size_t dst_offset,
726                            cl_uint num_deps, const cl_event *d_deps,
727                            cl_event *rd_ev) try {
728    auto &q = obj(d_q);
729    auto &src_img = obj<image>(d_src_mem);
730    auto &dst_mem = obj<buffer>(d_dst_mem);
731    auto deps = objs<wait_list_tag>(d_deps, num_deps);
732    auto region = vector(p_region);
733    vector_t dst_origin = { dst_offset };
734    auto dst_pitch = pitch(region, {{ src_img.pixel_size() }});
735    auto src_origin = vector(p_src_origin);
736    auto src_pitch = pitch(region, {{ src_img.pixel_size(),
737                                      src_img.row_pitch(),
738                                      src_img.slice_pitch() }});
739 
740    validate_common(q, deps);
741    validate_object(q, dst_mem, dst_origin, dst_pitch, region);
742    validate_object(q, src_img, src_origin, region);
743 
744    auto hev = create<hard_event>(
745       q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, deps,
746       soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
747                    &src_img, src_origin, src_pitch,
748                    region));
749 
750    ret_object(rd_ev, hev);
751    return CL_SUCCESS;
752 
753 } catch (error &e) {
754    return e.get();
755 }
756 
757 CLOVER_API cl_int
clEnqueueCopyBufferToImage(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,size_t src_offset,const size_t * p_dst_origin,const size_t * p_region,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)758 clEnqueueCopyBufferToImage(cl_command_queue d_q,
759                            cl_mem d_src_mem, cl_mem d_dst_mem,
760                            size_t src_offset,
761                            const size_t *p_dst_origin, const size_t *p_region,
762                            cl_uint num_deps, const cl_event *d_deps,
763                            cl_event *rd_ev) try {
764    auto &q = obj(d_q);
765    auto &src_mem = obj<buffer>(d_src_mem);
766    auto &dst_img = obj<image>(d_dst_mem);
767    auto deps = objs<wait_list_tag>(d_deps, num_deps);
768    auto region = vector(p_region);
769    auto dst_origin = vector(p_dst_origin);
770    auto dst_pitch = pitch(region, {{ dst_img.pixel_size(),
771                                      dst_img.row_pitch(),
772                                      dst_img.slice_pitch() }});
773    vector_t src_origin = { src_offset };
774    auto src_pitch = pitch(region, {{ dst_img.pixel_size() }});
775 
776    validate_common(q, deps);
777    validate_object(q, dst_img, dst_origin, region);
778    validate_object(q, src_mem, src_origin, src_pitch, region);
779 
780    auto hev = create<hard_event>(
781       q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, deps,
782       soft_copy_op(q, &dst_img, dst_origin, dst_pitch,
783                    &src_mem, src_origin, src_pitch,
784                    region));
785 
786    ret_object(rd_ev, hev);
787    return CL_SUCCESS;
788 
789 } catch (error &e) {
790    return e.get();
791 }
792 
793 CLOVER_API void *
clEnqueueMapBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,cl_map_flags flags,size_t offset,size_t size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev,cl_int * r_errcode)794 clEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
795                    cl_map_flags flags, size_t offset, size_t size,
796                    cl_uint num_deps, const cl_event *d_deps,
797                    cl_event *rd_ev, cl_int *r_errcode) try {
798    auto &q = obj(d_q);
799    auto &mem = obj<buffer>(d_mem);
800    auto deps = objs<wait_list_tag>(d_deps, num_deps);
801    vector_t region = { size, 1, 1 };
802    vector_t obj_origin = { offset };
803    auto obj_pitch = pitch(region, {{ 1 }});
804 
805    validate_common(q, deps);
806    validate_object(q, mem, obj_origin, obj_pitch, region);
807    validate_map_flags(mem, flags);
808 
809    auto *map = mem.resource_in(q).add_map(q, flags, blocking, obj_origin, region);
810 
811    auto hev = create<hard_event>(q, CL_COMMAND_MAP_BUFFER, deps);
812    if (blocking)
813        hev().wait_signalled();
814 
815    ret_object(rd_ev, hev);
816    ret_error(r_errcode, CL_SUCCESS);
817    return *map;
818 
819 } catch (error &e) {
820    ret_error(r_errcode, e);
821    return NULL;
822 }
823 
824 CLOVER_API void *
clEnqueueMapImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,cl_map_flags flags,const size_t * p_origin,const size_t * p_region,size_t * row_pitch,size_t * slice_pitch,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev,cl_int * r_errcode)825 clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
826                   cl_map_flags flags,
827                   const size_t *p_origin, const size_t *p_region,
828                   size_t *row_pitch, size_t *slice_pitch,
829                   cl_uint num_deps, const cl_event *d_deps,
830                   cl_event *rd_ev, cl_int *r_errcode) try {
831    auto &q = obj(d_q);
832    auto &img = obj<image>(d_mem);
833    auto deps = objs<wait_list_tag>(d_deps, num_deps);
834    auto region = vector(p_region);
835    auto origin = vector(p_origin);
836 
837    validate_common(q, deps);
838    validate_object(q, img, origin, region);
839    validate_map_flags(img, flags);
840 
841    if (!row_pitch)
842       throw error(CL_INVALID_VALUE);
843 
844    if (img.slice_pitch() && !slice_pitch)
845       throw error(CL_INVALID_VALUE);
846 
847    auto *map = img.resource_in(q).add_map(q, flags, blocking, origin, region);
848    *row_pitch = map->pitch()[1];
849    if (slice_pitch)
850       *slice_pitch = map->pitch()[2];
851 
852    auto hev = create<hard_event>(q, CL_COMMAND_MAP_IMAGE, deps);
853    if (blocking)
854        hev().wait_signalled();
855 
856    ret_object(rd_ev, hev);
857    ret_error(r_errcode, CL_SUCCESS);
858    return *map;
859 
860 } catch (error &e) {
861    ret_error(r_errcode, e);
862    return NULL;
863 }
864 
865 CLOVER_API cl_int
clEnqueueUnmapMemObject(cl_command_queue d_q,cl_mem d_mem,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)866 clEnqueueUnmapMemObject(cl_command_queue d_q, cl_mem d_mem, void *ptr,
867                         cl_uint num_deps, const cl_event *d_deps,
868                         cl_event *rd_ev) try {
869    auto &q = obj(d_q);
870    auto &mem = obj(d_mem);
871    auto deps = objs<wait_list_tag>(d_deps, num_deps);
872 
873    validate_common(q, deps);
874 
875    auto hev = create<hard_event>(
876       q, CL_COMMAND_UNMAP_MEM_OBJECT, deps,
877       [=, &q, &mem](event &) {
878          mem.resource_in(q).del_map(ptr);
879       });
880 
881    ret_object(rd_ev, hev);
882    return CL_SUCCESS;
883 
884 } catch (error &e) {
885    return e.get();
886 }
887 
888 CLOVER_API cl_int
clEnqueueMigrateMemObjects(cl_command_queue d_q,cl_uint num_mems,const cl_mem * d_mems,cl_mem_migration_flags flags,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)889 clEnqueueMigrateMemObjects(cl_command_queue d_q,
890                            cl_uint num_mems,
891                            const cl_mem *d_mems,
892                            cl_mem_migration_flags flags,
893                            cl_uint num_deps,
894                            const cl_event *d_deps,
895                            cl_event *rd_ev) try {
896    auto &q = obj(d_q);
897    auto mems = objs<memory_obj>(d_mems, num_mems);
898    auto deps = objs<wait_list_tag>(d_deps, num_deps);
899 
900    validate_common(q, deps);
901 
902    if (any_of([&](const memory_obj &m) {
903          return m.context() != q.context();
904          }, mems))
905       throw error(CL_INVALID_CONTEXT);
906 
907    if (flags & ~(CL_MIGRATE_MEM_OBJECT_HOST |
908                  CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED))
909       throw error(CL_INVALID_VALUE);
910 
911    auto hev = create<hard_event>(
912       q, CL_COMMAND_MIGRATE_MEM_OBJECTS, deps,
913       [=, &q](event &) {
914          for (auto &mem: mems) {
915             if (flags & CL_MIGRATE_MEM_OBJECT_HOST) {
916                if ((flags & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED))
917                   mem.resource_out(q);
918 
919                // For flags == CL_MIGRATE_MEM_OBJECT_HOST only to be
920                // efficient we would need cl*ReadBuffer* to implement
921                // reading from host memory.
922 
923             } else {
924                if (flags & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED)
925                   mem.resource_undef(q);
926                else
927                   mem.resource_in(q);
928             }
929          }
930       });
931 
932    ret_object(rd_ev, hev);
933    return CL_SUCCESS;;
934 
935 } catch (error &e) {
936    return e.get();
937 }
938 
939 cl_int
EnqueueSVMFree(cl_command_queue d_q,cl_uint num_svm_pointers,void * svm_pointers[],void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue,cl_uint num_svm_pointers,void * svm_pointers[],void * user_data),void * user_data,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)940 clover::EnqueueSVMFree(cl_command_queue d_q,
941                        cl_uint num_svm_pointers,
942                        void *svm_pointers[],
943                        void (CL_CALLBACK *pfn_free_func) (
944                            cl_command_queue queue, cl_uint num_svm_pointers,
945                            void *svm_pointers[], void *user_data),
946                        void *user_data,
947                        cl_uint num_events_in_wait_list,
948                        const cl_event *event_wait_list,
949                        cl_event *event,
950                        cl_int cmd) try {
951 
952    if (bool(num_svm_pointers) != bool(svm_pointers))
953       return CL_INVALID_VALUE;
954 
955    auto &q = obj(d_q);
956    bool can_emulate = q.device().has_system_svm();
957    auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
958 
959    validate_common(q, deps);
960 
961    std::vector<void *> svm_pointers_cpy(svm_pointers,
962                                         svm_pointers + num_svm_pointers);
963    if (!pfn_free_func) {
964       if (!can_emulate) {
965          CLOVER_NOT_SUPPORTED_UNTIL("2.0");
966          return CL_INVALID_VALUE;
967       }
968       pfn_free_func = [](cl_command_queue, cl_uint num_svm_pointers,
969                          void *svm_pointers[], void *) {
970          for (void *p : range(svm_pointers, num_svm_pointers))
971             free(p);
972       };
973    }
974 
975    auto hev = create<hard_event>(q, cmd, deps,
976       [=](clover::event &) mutable {
977          pfn_free_func(d_q, num_svm_pointers, svm_pointers_cpy.data(),
978                        user_data);
979       });
980 
981    ret_object(event, hev);
982    return CL_SUCCESS;
983 
984 } catch (error &e) {
985    return e.get();
986 }
987 
988 CLOVER_API cl_int
clEnqueueSVMFree(cl_command_queue d_q,cl_uint num_svm_pointers,void * svm_pointers[],void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue,cl_uint num_svm_pointers,void * svm_pointers[],void * user_data),void * user_data,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)989 clEnqueueSVMFree(cl_command_queue d_q,
990                  cl_uint num_svm_pointers,
991                  void *svm_pointers[],
992                  void (CL_CALLBACK *pfn_free_func) (
993                     cl_command_queue queue, cl_uint num_svm_pointers,
994                     void *svm_pointers[], void *user_data),
995                  void *user_data,
996                  cl_uint num_events_in_wait_list,
997                  const cl_event *event_wait_list,
998                  cl_event *event) {
999 
1000    return EnqueueSVMFree(d_q, num_svm_pointers, svm_pointers,
1001                          pfn_free_func, user_data, num_events_in_wait_list,
1002                          event_wait_list, event, CL_COMMAND_SVM_FREE);
1003 }
1004 
1005 cl_int
EnqueueSVMMemcpy(cl_command_queue d_q,cl_bool blocking_copy,void * dst_ptr,const void * src_ptr,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)1006 clover::EnqueueSVMMemcpy(cl_command_queue d_q,
1007                          cl_bool blocking_copy,
1008                          void *dst_ptr,
1009                          const void *src_ptr,
1010                          size_t size,
1011                          cl_uint num_events_in_wait_list,
1012                          const cl_event *event_wait_list,
1013                          cl_event *event,
1014                          cl_int cmd) try {
1015 
1016    if (dst_ptr == nullptr || src_ptr == nullptr)
1017       return CL_INVALID_VALUE;
1018 
1019    if (static_cast<size_t>(abs(reinterpret_cast<ptrdiff_t>(dst_ptr) -
1020                                reinterpret_cast<ptrdiff_t>(src_ptr))) < size)
1021       return CL_MEM_COPY_OVERLAP;
1022 
1023    auto &q = obj(d_q);
1024    bool can_emulate = q.device().has_system_svm();
1025    auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1026 
1027    validate_common(q, deps);
1028 
1029    if (can_emulate) {
1030       auto hev = create<hard_event>(q, cmd, deps,
1031          [=](clover::event &) {
1032             memcpy(dst_ptr, src_ptr, size);
1033          });
1034 
1035       if (blocking_copy)
1036          hev().wait();
1037       ret_object(event, hev);
1038       return CL_SUCCESS;
1039    }
1040 
1041    CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1042    return CL_INVALID_VALUE;
1043 
1044 } catch (error &e) {
1045    return e.get();
1046 }
1047 
1048 CLOVER_API cl_int
clEnqueueSVMMemcpy(cl_command_queue d_q,cl_bool blocking_copy,void * dst_ptr,const void * src_ptr,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1049 clEnqueueSVMMemcpy(cl_command_queue d_q,
1050                    cl_bool blocking_copy,
1051                    void *dst_ptr,
1052                    const void *src_ptr,
1053                    size_t size,
1054                    cl_uint num_events_in_wait_list,
1055                    const cl_event *event_wait_list,
1056                    cl_event *event) {
1057 
1058    return EnqueueSVMMemcpy(d_q, blocking_copy, dst_ptr, src_ptr,
1059                            size, num_events_in_wait_list, event_wait_list,
1060                            event, CL_COMMAND_SVM_MEMCPY);
1061 }
1062 
1063 cl_int
EnqueueSVMMemFill(cl_command_queue d_q,void * svm_ptr,const void * pattern,size_t pattern_size,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)1064 clover::EnqueueSVMMemFill(cl_command_queue d_q,
1065                           void *svm_ptr,
1066                           const void *pattern,
1067                           size_t pattern_size,
1068                           size_t size,
1069                           cl_uint num_events_in_wait_list,
1070                           const cl_event *event_wait_list,
1071                           cl_event *event,
1072                           cl_int cmd) try {
1073 
1074    if (svm_ptr == nullptr || pattern == nullptr ||
1075        !util_is_power_of_two_nonzero(pattern_size) ||
1076        pattern_size > 128 ||
1077        !ptr_is_aligned(svm_ptr, pattern_size) ||
1078        size % pattern_size)
1079       return CL_INVALID_VALUE;
1080 
1081    auto &q = obj(d_q);
1082    bool can_emulate = q.device().has_system_svm();
1083    auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1084 
1085    validate_common(q, deps);
1086 
1087    if (can_emulate) {
1088       auto hev = create<hard_event>(q, cmd, deps,
1089          [=](clover::event &) {
1090             void *ptr = svm_ptr;
1091             for (size_t s = size; s; s -= pattern_size) {
1092                memcpy(ptr, pattern, pattern_size);
1093                ptr = static_cast<uint8_t*>(ptr) + pattern_size;
1094             }
1095          });
1096 
1097       ret_object(event, hev);
1098       return CL_SUCCESS;
1099    }
1100 
1101    CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1102    return CL_INVALID_VALUE;
1103 
1104 } catch (error &e) {
1105    return e.get();
1106 }
1107 
1108 CLOVER_API cl_int
clEnqueueSVMMemFill(cl_command_queue d_q,void * svm_ptr,const void * pattern,size_t pattern_size,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1109 clEnqueueSVMMemFill(cl_command_queue d_q,
1110                     void *svm_ptr,
1111                     const void *pattern,
1112                     size_t pattern_size,
1113                     size_t size,
1114                     cl_uint num_events_in_wait_list,
1115                     const cl_event *event_wait_list,
1116                     cl_event *event) {
1117 
1118    return EnqueueSVMMemFill(d_q, svm_ptr, pattern, pattern_size,
1119                             size, num_events_in_wait_list, event_wait_list,
1120                             event, CL_COMMAND_SVM_MEMFILL);
1121 }
1122 
1123 cl_int
EnqueueSVMMap(cl_command_queue d_q,cl_bool blocking_map,cl_map_flags map_flags,void * svm_ptr,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)1124 clover::EnqueueSVMMap(cl_command_queue d_q,
1125                       cl_bool blocking_map,
1126                       cl_map_flags map_flags,
1127                       void *svm_ptr,
1128                       size_t size,
1129                       cl_uint num_events_in_wait_list,
1130                       const cl_event *event_wait_list,
1131                       cl_event *event,
1132                       cl_int cmd) try {
1133 
1134    if (svm_ptr == nullptr || size == 0)
1135       return CL_INVALID_VALUE;
1136 
1137    auto &q = obj(d_q);
1138    bool can_emulate = q.device().has_system_svm();
1139    auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1140 
1141    validate_common(q, deps);
1142 
1143    if (can_emulate) {
1144       auto hev = create<hard_event>(q, cmd, deps,
1145          [](clover::event &) { });
1146 
1147       ret_object(event, hev);
1148       return CL_SUCCESS;
1149    }
1150 
1151    CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1152    return CL_INVALID_VALUE;
1153 
1154 } catch (error &e) {
1155    return e.get();
1156 }
1157 
1158 CLOVER_API cl_int
clEnqueueSVMMap(cl_command_queue d_q,cl_bool blocking_map,cl_map_flags map_flags,void * svm_ptr,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1159 clEnqueueSVMMap(cl_command_queue d_q,
1160                 cl_bool blocking_map,
1161                 cl_map_flags map_flags,
1162                 void *svm_ptr,
1163                 size_t size,
1164                 cl_uint num_events_in_wait_list,
1165                 const cl_event *event_wait_list,
1166                 cl_event *event) {
1167 
1168    return EnqueueSVMMap(d_q, blocking_map, map_flags, svm_ptr, size,
1169                         num_events_in_wait_list, event_wait_list, event,
1170                         CL_COMMAND_SVM_MAP);
1171 }
1172 
1173 cl_int
EnqueueSVMUnmap(cl_command_queue d_q,void * svm_ptr,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)1174 clover::EnqueueSVMUnmap(cl_command_queue d_q,
1175                         void *svm_ptr,
1176                         cl_uint num_events_in_wait_list,
1177                         const cl_event *event_wait_list,
1178                         cl_event *event,
1179                         cl_int cmd) try {
1180 
1181    if (svm_ptr == nullptr)
1182       return CL_INVALID_VALUE;
1183 
1184    auto &q = obj(d_q);
1185    bool can_emulate = q.device().has_system_svm();
1186    auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1187 
1188    validate_common(q, deps);
1189 
1190    if (can_emulate) {
1191       auto hev = create<hard_event>(q, cmd, deps,
1192          [](clover::event &) { });
1193 
1194       ret_object(event, hev);
1195       return CL_SUCCESS;
1196    }
1197 
1198    CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1199    return CL_INVALID_VALUE;
1200 
1201 } catch (error &e) {
1202    return e.get();
1203 }
1204 
1205 CLOVER_API cl_int
clEnqueueSVMUnmap(cl_command_queue d_q,void * svm_ptr,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1206 clEnqueueSVMUnmap(cl_command_queue d_q,
1207                   void *svm_ptr,
1208                   cl_uint num_events_in_wait_list,
1209                   const cl_event *event_wait_list,
1210                   cl_event *event) {
1211 
1212    return EnqueueSVMUnmap(d_q, svm_ptr, num_events_in_wait_list,
1213                           event_wait_list, event, CL_COMMAND_SVM_UNMAP);
1214 }
1215 
1216 CLOVER_API cl_int
clEnqueueSVMMigrateMem(cl_command_queue d_q,cl_uint num_svm_pointers,const void ** svm_pointers,const size_t * sizes,const cl_mem_migration_flags flags,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1217 clEnqueueSVMMigrateMem(cl_command_queue d_q,
1218                        cl_uint num_svm_pointers,
1219                        const void **svm_pointers,
1220                        const size_t *sizes,
1221                        const cl_mem_migration_flags flags,
1222                        cl_uint  num_events_in_wait_list,
1223                        const cl_event *event_wait_list,
1224                        cl_event *event) {
1225    CLOVER_NOT_SUPPORTED_UNTIL("2.1");
1226    return CL_INVALID_VALUE;
1227 }
1228