• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 //
22 
23 #include <cstring>
24 
25 #include "util/bitscan.h"
26 
27 #include "api/dispatch.hpp"
28 #include "api/util.hpp"
29 #include "core/event.hpp"
30 #include "core/memory.hpp"
31 
32 using namespace clover;
33 
34 namespace {
35    typedef resource::vector vector_t;
36 
37    vector_t
vector(const size_t * p)38    vector(const size_t *p) {
39       if (!p)
40          throw error(CL_INVALID_VALUE);
41       return range(p, 3);
42    }
43 
44    vector_t
pitch(const vector_t & region,vector_t pitch)45    pitch(const vector_t &region, vector_t pitch) {
46       for (auto x : zip(tail(pitch),
47                         map(multiplies(), region, pitch))) {
48          // The spec defines a value of zero as the natural pitch,
49          // i.e. the unaligned size of the previous dimension.
50          if (std::get<0>(x) == 0)
51             std::get<0>(x) = std::get<1>(x);
52       }
53 
54       return pitch;
55    }
56 
57    ///
58    /// Size of a region in bytes.
59    ///
60    size_t
size(const vector_t & pitch,const vector_t & region)61    size(const vector_t &pitch, const vector_t &region) {
62       if (any_of(is_zero(), region))
63          return 0;
64       else
65          return dot(pitch, region - vector_t{ 0, 1, 1 });
66    }
67 
68    ///
69    /// Common argument checking shared by memory transfer commands.
70    ///
71    void
validate_common(command_queue & q,const ref_vector<event> & deps)72    validate_common(command_queue &q,
73                    const ref_vector<event> &deps) {
74       if (any_of([&](const event &ev) {
75                return ev.context() != q.context();
76             }, deps))
77          throw error(CL_INVALID_CONTEXT);
78    }
79 
80    ///
81    /// Common error checking for a buffer object argument.
82    ///
83    void
validate_object(command_queue & q,buffer & mem,const vector_t & origin,const vector_t & pitch,const vector_t & region)84    validate_object(command_queue &q, buffer &mem, const vector_t &origin,
85                    const vector_t &pitch, const vector_t &region) {
86       if (mem.context() != q.context())
87          throw error(CL_INVALID_CONTEXT);
88 
89       // The region must fit within the specified pitch,
90       if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
91          throw error(CL_INVALID_VALUE);
92 
93       // ...and within the specified object.
94       if (dot(pitch, origin) + size(pitch, region) > mem.size())
95          throw error(CL_INVALID_VALUE);
96 
97       if (any_of(is_zero(), region))
98          throw error(CL_INVALID_VALUE);
99    }
100 
101    ///
102    /// Common error checking for an image argument.
103    ///
104    void
validate_object(command_queue & q,image & img,const vector_t & orig,const vector_t & region)105    validate_object(command_queue &q, image &img,
106                    const vector_t &orig, const vector_t &region) {
107       size_t height = img.type() == CL_MEM_OBJECT_IMAGE1D_ARRAY ? img.array_size() : img.height();
108       size_t depth = img.type() == CL_MEM_OBJECT_IMAGE2D_ARRAY ? img.array_size() : img.depth();
109       vector_t size = { img.width(), height, depth };
110       const auto &dev = q.device();
111 
112       if (!dev.image_support())
113          throw error(CL_INVALID_OPERATION);
114 
115       if (img.context() != q.context())
116          throw error(CL_INVALID_CONTEXT);
117 
118       if (any_of(greater(), orig + region, size))
119          throw error(CL_INVALID_VALUE);
120 
121       if (any_of(is_zero(), region))
122          throw error(CL_INVALID_VALUE);
123 
124       switch (img.type()) {
125       case CL_MEM_OBJECT_IMAGE1D: {
126          const size_t max = dev.max_image_size();
127          if (img.width() > max)
128             throw error(CL_INVALID_IMAGE_SIZE);
129          break;
130       }
131       case CL_MEM_OBJECT_IMAGE1D_ARRAY: {
132          const size_t max_size = dev.max_image_size();
133          const size_t max_array = dev.max_image_array_number();
134          if (img.width() > max_size || img.array_size() > max_array)
135             throw error(CL_INVALID_IMAGE_SIZE);
136          break;
137       }
138       case CL_MEM_OBJECT_IMAGE2D: {
139          const size_t max = dev.max_image_size();
140          if (img.width() > max || img.height() > max)
141             throw error(CL_INVALID_IMAGE_SIZE);
142          break;
143       }
144       case CL_MEM_OBJECT_IMAGE2D_ARRAY: {
145          const size_t max_size = dev.max_image_size();
146          const size_t max_array = dev.max_image_array_number();
147          if (img.width() > max_size || img.height() > max_size || img.array_size() > max_array)
148             throw error(CL_INVALID_IMAGE_SIZE);
149          break;
150       }
151       case CL_MEM_OBJECT_IMAGE3D: {
152          const size_t max = dev.max_image_size_3d();
153          if (img.width() > max || img.height() > max || img.depth() > max)
154             throw error(CL_INVALID_IMAGE_SIZE);
155          break;
156       }
157       // XXX: Implement missing checks once Clover supports more image types.
158       default:
159          throw error(CL_INVALID_IMAGE_SIZE);
160       }
161    }
162 
163    ///
164    /// Common error checking for a host pointer argument.
165    ///
166    void
validate_object(command_queue & q,const void * ptr,const vector_t & orig,const vector_t & pitch,const vector_t & region)167    validate_object(command_queue &q, const void *ptr, const vector_t &orig,
168                    const vector_t &pitch, const vector_t &region) {
169       if (!ptr)
170          throw error(CL_INVALID_VALUE);
171 
172       // The region must fit within the specified pitch.
173       if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
174          throw error(CL_INVALID_VALUE);
175    }
176 
177    ///
178    /// Common argument checking for a copy between two buffer objects.
179    ///
180    void
validate_copy(command_queue & q,buffer & dst_mem,const vector_t & dst_orig,const vector_t & dst_pitch,buffer & src_mem,const vector_t & src_orig,const vector_t & src_pitch,const vector_t & region)181    validate_copy(command_queue &q, buffer &dst_mem,
182                  const vector_t &dst_orig, const vector_t &dst_pitch,
183                  buffer &src_mem,
184                  const vector_t &src_orig, const vector_t &src_pitch,
185                  const vector_t &region) {
186       if (dst_mem == src_mem) {
187          auto dst_offset = dot(dst_pitch, dst_orig);
188          auto src_offset = dot(src_pitch, src_orig);
189 
190          if (interval_overlaps()(
191                 dst_offset, dst_offset + size(dst_pitch, region),
192                 src_offset, src_offset + size(src_pitch, region)))
193             throw error(CL_MEM_COPY_OVERLAP);
194       }
195    }
196 
197    ///
198    /// Common argument checking for a copy between two image objects.
199    ///
200    void
validate_copy(command_queue & q,image & dst_img,const vector_t & dst_orig,image & src_img,const vector_t & src_orig,const vector_t & region)201    validate_copy(command_queue &q,
202                  image &dst_img, const vector_t &dst_orig,
203                  image &src_img, const vector_t &src_orig,
204                  const vector_t &region) {
205       if (dst_img.format() != src_img.format())
206          throw error(CL_IMAGE_FORMAT_MISMATCH);
207 
208       if (dst_img == src_img) {
209          if (all_of(interval_overlaps(),
210                     dst_orig, dst_orig + region,
211                     src_orig, src_orig + region))
212             throw error(CL_MEM_COPY_OVERLAP);
213       }
214    }
215 
216    ///
217    /// Checks that the host access flags of the memory object are
218    /// within the allowed set \a flags.
219    ///
220    void
validate_object_access(const memory_obj & mem,const cl_mem_flags flags)221    validate_object_access(const memory_obj &mem, const cl_mem_flags flags) {
222       if (mem.flags() & ~flags &
223           (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY |
224            CL_MEM_HOST_NO_ACCESS))
225          throw error(CL_INVALID_OPERATION);
226    }
227 
228    ///
229    /// Checks that the mapping flags are correct.
230    ///
231    void
validate_map_flags(const memory_obj & mem,const cl_map_flags flags)232    validate_map_flags(const memory_obj &mem, const cl_map_flags flags) {
233       if ((flags & (CL_MAP_WRITE | CL_MAP_READ)) &&
234           (flags & CL_MAP_WRITE_INVALIDATE_REGION))
235          throw error(CL_INVALID_VALUE);
236 
237       if (flags & CL_MAP_READ)
238          validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
239 
240       if (flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
241          validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
242    }
243 
244    ///
245    /// Checks that the memory migration flags are correct.
246    ///
247    void
validate_mem_migration_flags(const cl_mem_migration_flags flags)248    validate_mem_migration_flags(const cl_mem_migration_flags flags) {
249       const cl_mem_migration_flags valid =
250          CL_MIGRATE_MEM_OBJECT_HOST |
251          CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED;
252 
253       if (flags & ~valid)
254          throw error(CL_INVALID_VALUE);
255    }
256 
257    ///
258    /// Class that encapsulates the task of mapping an object of type
259    /// \a T.  The return value of get() should be implicitly
260    /// convertible to \a void *.
261    ///
262    template<typename T>
263    struct _map;
264 
265    template<>
266    struct _map<image*> {
_map__anon7f6d781b0111::_map267       _map(command_queue &q, image *img, cl_map_flags flags,
268            vector_t offset, vector_t pitch, vector_t region) :
269          map(q, img->resource_in(q), flags, true, offset, region),
270          pitch(map.pitch())
271       { }
272 
273       template<typename T>
operator T*__anon7f6d781b0111::_map274       operator T *() const {
275          return static_cast<T *>(map);
276       }
277 
278       mapping map;
279       vector_t pitch;
280    };
281 
282    template<>
283    struct _map<buffer*> {
_map__anon7f6d781b0111::_map284       _map(command_queue &q, buffer *mem, cl_map_flags flags,
285            vector_t offset, vector_t pitch, vector_t region) :
286          map(q, mem->resource_in(q), flags, true,
287              {{ dot(pitch, offset) }}, {{ size(pitch, region) }}),
288          pitch(pitch)
289       { }
290 
291       template<typename T>
operator T*__anon7f6d781b0111::_map292       operator T *() const {
293          return static_cast<T *>(map);
294       }
295 
296       mapping map;
297       vector_t pitch;
298    };
299 
300    template<typename P>
301    struct _map<P *> {
_map__anon7f6d781b0111::_map302       _map(command_queue &q, P *ptr, cl_map_flags flags,
303            vector_t offset, vector_t pitch, vector_t region) :
304          ptr((P *)((char *)ptr + dot(pitch, offset))), pitch(pitch)
305       { }
306 
307       template<typename T>
operator T*__anon7f6d781b0111::_map308       operator T *() const {
309          return static_cast<T *>(ptr);
310       }
311 
312       P *ptr;
313       vector_t pitch;
314    };
315 
316    ///
317    /// Software copy from \a src_obj to \a dst_obj.  They can be
318    /// either pointers or memory objects.
319    ///
320    template<typename T, typename S>
321    std::function<void (event &)>
soft_copy_op(command_queue & q,T dst_obj,const vector_t & dst_orig,const vector_t & dst_pitch,S src_obj,const vector_t & src_orig,const vector_t & src_pitch,const vector_t & region)322    soft_copy_op(command_queue &q,
323                 T dst_obj, const vector_t &dst_orig, const vector_t &dst_pitch,
324                 S src_obj, const vector_t &src_orig, const vector_t &src_pitch,
325                 const vector_t &region) {
326       return [=, &q](event &) {
327          _map<T> dst = { q, dst_obj, CL_MAP_WRITE,
328                          dst_orig, dst_pitch, region };
329          _map<S> src = { q, src_obj, CL_MAP_READ,
330                          src_orig, src_pitch, region };
331          assert(src.pitch[0] == dst.pitch[0]);
332          vector_t v = {};
333 
334          for (v[2] = 0; v[2] < region[2]; ++v[2]) {
335             for (v[1] = 0; v[1] < region[1]; ++v[1]) {
336                std::memcpy(
337                   static_cast<char *>(dst) + dot(dst.pitch, v),
338                   static_cast<const char *>(src) + dot(src.pitch, v),
339                   src.pitch[0] * region[0]);
340             }
341          }
342       };
343    }
344 
345    ///
346    /// Hardware copy from \a src_obj to \a dst_obj.
347    ///
348    template<typename T, typename S>
349    std::function<void (event &)>
hard_copy_op(command_queue & q,T dst_obj,const vector_t & dst_orig,S src_obj,const vector_t & src_orig,const vector_t & region)350    hard_copy_op(command_queue &q, T dst_obj, const vector_t &dst_orig,
351                 S src_obj, const vector_t &src_orig, const vector_t &region) {
352       return [=, &q](event &) {
353          dst_obj->resource_in(q).copy(q, dst_orig, region,
354                                       src_obj->resource_in(q), src_orig);
355       };
356    }
357 }
358 
359 CLOVER_API cl_int
clEnqueueReadBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,size_t offset,size_t size,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)360 clEnqueueReadBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
361                     size_t offset, size_t size, void *ptr,
362                     cl_uint num_deps, const cl_event *d_deps,
363                     cl_event *rd_ev) try {
364    auto &q = obj(d_q);
365    auto &mem = obj<buffer>(d_mem);
366    auto deps = objs<wait_list_tag>(d_deps, num_deps);
367    vector_t region = { size, 1, 1 };
368    vector_t obj_origin = { offset };
369    auto obj_pitch = pitch(region, {{ 1 }});
370 
371    validate_common(q, deps);
372    validate_object(q, ptr, {}, obj_pitch, region);
373    validate_object(q, mem, obj_origin, obj_pitch, region);
374    validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
375 
376    auto hev = create<hard_event>(
377       q, CL_COMMAND_READ_BUFFER, deps,
378       soft_copy_op(q, ptr, {}, obj_pitch,
379                    &mem, obj_origin, obj_pitch,
380                    region));
381 
382    if (blocking)
383        hev().wait_signalled();
384 
385    ret_object(rd_ev, hev);
386    return CL_SUCCESS;
387 
388 } catch (error &e) {
389    return e.get();
390 }
391 
392 CLOVER_API cl_int
clEnqueueWriteBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,size_t offset,size_t size,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)393 clEnqueueWriteBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
394                      size_t offset, size_t size, const void *ptr,
395                      cl_uint num_deps, const cl_event *d_deps,
396                      cl_event *rd_ev) try {
397    auto &q = obj(d_q);
398    auto &mem = obj<buffer>(d_mem);
399    auto deps = objs<wait_list_tag>(d_deps, num_deps);
400    vector_t region = { size, 1, 1 };
401    vector_t obj_origin = { offset };
402    auto obj_pitch = pitch(region, {{ 1 }});
403 
404    validate_common(q, deps);
405    validate_object(q, mem, obj_origin, obj_pitch, region);
406    validate_object(q, ptr, {}, obj_pitch, region);
407    validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
408 
409    auto hev = create<hard_event>(
410       q, CL_COMMAND_WRITE_BUFFER, deps,
411       soft_copy_op(q, &mem, obj_origin, obj_pitch,
412                    ptr, {}, obj_pitch,
413                    region));
414 
415    if (blocking)
416        hev().wait_signalled();
417 
418    ret_object(rd_ev, hev);
419    return CL_SUCCESS;
420 
421 } catch (error &e) {
422    return e.get();
423 }
424 
425 CLOVER_API cl_int
clEnqueueReadBufferRect(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_obj_origin,const size_t * p_host_origin,const size_t * p_region,size_t obj_row_pitch,size_t obj_slice_pitch,size_t host_row_pitch,size_t host_slice_pitch,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)426 clEnqueueReadBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
427                         const size_t *p_obj_origin,
428                         const size_t *p_host_origin,
429                         const size_t *p_region,
430                         size_t obj_row_pitch, size_t obj_slice_pitch,
431                         size_t host_row_pitch, size_t host_slice_pitch,
432                         void *ptr,
433                         cl_uint num_deps, const cl_event *d_deps,
434                         cl_event *rd_ev) try {
435    auto &q = obj(d_q);
436    auto &mem = obj<buffer>(d_mem);
437    auto deps = objs<wait_list_tag>(d_deps, num_deps);
438    auto region = vector(p_region);
439    auto obj_origin = vector(p_obj_origin);
440    auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
441    auto host_origin = vector(p_host_origin);
442    auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
443 
444    validate_common(q, deps);
445    validate_object(q, ptr, host_origin, host_pitch, region);
446    validate_object(q, mem, obj_origin, obj_pitch, region);
447    validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
448 
449    auto hev = create<hard_event>(
450       q, CL_COMMAND_READ_BUFFER_RECT, deps,
451       soft_copy_op(q, ptr, host_origin, host_pitch,
452                    &mem, obj_origin, obj_pitch,
453                    region));
454 
455    if (blocking)
456        hev().wait_signalled();
457 
458    ret_object(rd_ev, hev);
459    return CL_SUCCESS;
460 
461 } catch (error &e) {
462    return e.get();
463 }
464 
465 CLOVER_API cl_int
clEnqueueWriteBufferRect(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_obj_origin,const size_t * p_host_origin,const size_t * p_region,size_t obj_row_pitch,size_t obj_slice_pitch,size_t host_row_pitch,size_t host_slice_pitch,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)466 clEnqueueWriteBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
467                          const size_t *p_obj_origin,
468                          const size_t *p_host_origin,
469                          const size_t *p_region,
470                          size_t obj_row_pitch, size_t obj_slice_pitch,
471                          size_t host_row_pitch, size_t host_slice_pitch,
472                          const void *ptr,
473                          cl_uint num_deps, const cl_event *d_deps,
474                          cl_event *rd_ev) try {
475    auto &q = obj(d_q);
476    auto &mem = obj<buffer>(d_mem);
477    auto deps = objs<wait_list_tag>(d_deps, num_deps);
478    auto region = vector(p_region);
479    auto obj_origin = vector(p_obj_origin);
480    auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
481    auto host_origin = vector(p_host_origin);
482    auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
483 
484    validate_common(q, deps);
485    validate_object(q, mem, obj_origin, obj_pitch, region);
486    validate_object(q, ptr, host_origin, host_pitch, region);
487    validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
488 
489    auto hev = create<hard_event>(
490       q, CL_COMMAND_WRITE_BUFFER_RECT, deps,
491       soft_copy_op(q, &mem, obj_origin, obj_pitch,
492                    ptr, host_origin, host_pitch,
493                    region));
494 
495    if (blocking)
496        hev().wait_signalled();
497 
498    ret_object(rd_ev, hev);
499    return CL_SUCCESS;
500 
501 } catch (error &e) {
502    return e.get();
503 }
504 
505 CLOVER_API cl_int
clEnqueueFillBuffer(cl_command_queue d_queue,cl_mem d_mem,const void * pattern,size_t pattern_size,size_t offset,size_t size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)506 clEnqueueFillBuffer(cl_command_queue d_queue, cl_mem d_mem,
507                     const void *pattern, size_t pattern_size,
508                     size_t offset, size_t size,
509                     cl_uint num_deps, const cl_event *d_deps,
510                     cl_event *rd_ev) try {
511    auto &q = obj(d_queue);
512    auto &mem = obj<buffer>(d_mem);
513    auto deps = objs<wait_list_tag>(d_deps, num_deps);
514    vector_t region = { size, 1, 1 };
515    vector_t origin = { offset };
516    auto dst_pitch = pitch(region, {{ 1 }});
517 
518    validate_common(q, deps);
519    validate_object(q, mem, origin, dst_pitch, region);
520 
521    if (!pattern)
522       return CL_INVALID_VALUE;
523 
524    if (!util_is_power_of_two_nonzero(pattern_size) ||
525       pattern_size > 128 || size % pattern_size
526       || offset % pattern_size) {
527       return CL_INVALID_VALUE;
528    }
529 
530    auto sub = dynamic_cast<sub_buffer *>(&mem);
531    if (sub && sub->offset() % q.device().mem_base_addr_align()) {
532       return CL_MISALIGNED_SUB_BUFFER_OFFSET;
533    }
534 
535    std::string data = std::string((char *)pattern, pattern_size);
536    auto hev = create<hard_event>(
537       q, CL_COMMAND_FILL_BUFFER, deps,
538       [=, &q, &mem](event &) {
539          mem.resource_in(q).clear(q, origin, region, data);
540       });
541 
542    ret_object(rd_ev, hev);
543    return CL_SUCCESS;
544 
545 } catch (error &e) {
546    return e.get();
547 }
548 
549 CLOVER_API cl_int
clEnqueueCopyBuffer(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,size_t src_offset,size_t dst_offset,size_t size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)550 clEnqueueCopyBuffer(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
551                     size_t src_offset, size_t dst_offset, size_t size,
552                     cl_uint num_deps, const cl_event *d_deps,
553                     cl_event *rd_ev) try {
554    auto &q = obj(d_q);
555    auto &src_mem = obj<buffer>(d_src_mem);
556    auto &dst_mem = obj<buffer>(d_dst_mem);
557    auto deps = objs<wait_list_tag>(d_deps, num_deps);
558    vector_t region = { size, 1, 1 };
559    vector_t dst_origin = { dst_offset };
560    auto dst_pitch = pitch(region, {{ 1 }});
561    vector_t src_origin = { src_offset };
562    auto src_pitch = pitch(region, {{ 1 }});
563 
564    validate_common(q, deps);
565    validate_object(q, dst_mem, dst_origin, dst_pitch, region);
566    validate_object(q, src_mem, src_origin, src_pitch, region);
567    validate_copy(q, dst_mem, dst_origin, dst_pitch,
568                  src_mem, src_origin, src_pitch, region);
569 
570    auto hev = create<hard_event>(
571       q, CL_COMMAND_COPY_BUFFER, deps,
572       hard_copy_op(q, &dst_mem, dst_origin,
573                    &src_mem, src_origin, region));
574 
575    ret_object(rd_ev, hev);
576    return CL_SUCCESS;
577 
578 } catch (error &e) {
579    return e.get();
580 }
581 
582 CLOVER_API cl_int
clEnqueueCopyBufferRect(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_dst_origin,const size_t * p_region,size_t src_row_pitch,size_t src_slice_pitch,size_t dst_row_pitch,size_t dst_slice_pitch,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)583 clEnqueueCopyBufferRect(cl_command_queue d_q, cl_mem d_src_mem,
584                         cl_mem d_dst_mem,
585                         const size_t *p_src_origin, const size_t *p_dst_origin,
586                         const size_t *p_region,
587                         size_t src_row_pitch, size_t src_slice_pitch,
588                         size_t dst_row_pitch, size_t dst_slice_pitch,
589                         cl_uint num_deps, const cl_event *d_deps,
590                         cl_event *rd_ev) try {
591    auto &q = obj(d_q);
592    auto &src_mem = obj<buffer>(d_src_mem);
593    auto &dst_mem = obj<buffer>(d_dst_mem);
594    auto deps = objs<wait_list_tag>(d_deps, num_deps);
595    auto region = vector(p_region);
596    auto dst_origin = vector(p_dst_origin);
597    auto dst_pitch = pitch(region, {{ 1, dst_row_pitch, dst_slice_pitch }});
598    auto src_origin = vector(p_src_origin);
599    auto src_pitch = pitch(region, {{ 1, src_row_pitch, src_slice_pitch }});
600 
601    validate_common(q, deps);
602    validate_object(q, dst_mem, dst_origin, dst_pitch, region);
603    validate_object(q, src_mem, src_origin, src_pitch, region);
604    validate_copy(q, dst_mem, dst_origin, dst_pitch,
605                  src_mem, src_origin, src_pitch, region);
606 
607    auto hev = create<hard_event>(
608       q, CL_COMMAND_COPY_BUFFER_RECT, deps,
609       soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
610                    &src_mem, src_origin, src_pitch,
611                    region));
612 
613    ret_object(rd_ev, hev);
614    return CL_SUCCESS;
615 
616 } catch (error &e) {
617    return e.get();
618 }
619 
620 CLOVER_API cl_int
clEnqueueReadImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_origin,const size_t * p_region,size_t row_pitch,size_t slice_pitch,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)621 clEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
622                    const size_t *p_origin, const size_t *p_region,
623                    size_t row_pitch, size_t slice_pitch, void *ptr,
624                    cl_uint num_deps, const cl_event *d_deps,
625                    cl_event *rd_ev) try {
626    auto &q = obj(d_q);
627    auto &img = obj<image>(d_mem);
628    auto deps = objs<wait_list_tag>(d_deps, num_deps);
629    auto region = vector(p_region);
630    auto dst_pitch = pitch(region, {{ img.pixel_size(),
631                                      row_pitch, slice_pitch }});
632    auto src_origin = vector(p_origin);
633    auto src_pitch = pitch(region, {{ img.pixel_size(),
634                                      img.row_pitch(), img.slice_pitch() }});
635 
636    validate_common(q, deps);
637    validate_object(q, ptr, {}, dst_pitch, region);
638    validate_object(q, img, src_origin, region);
639    validate_object_access(img, CL_MEM_HOST_READ_ONLY);
640 
641    auto hev = create<hard_event>(
642       q, CL_COMMAND_READ_IMAGE, deps,
643       soft_copy_op(q, ptr, {}, dst_pitch,
644                    &img, src_origin, src_pitch,
645                    region));
646 
647    if (blocking)
648        hev().wait_signalled();
649 
650    ret_object(rd_ev, hev);
651    return CL_SUCCESS;
652 
653 } catch (error &e) {
654    return e.get();
655 }
656 
657 CLOVER_API cl_int
clEnqueueWriteImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_origin,const size_t * p_region,size_t row_pitch,size_t slice_pitch,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)658 clEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
659                     const size_t *p_origin, const size_t *p_region,
660                     size_t row_pitch, size_t slice_pitch, const void *ptr,
661                     cl_uint num_deps, const cl_event *d_deps,
662                     cl_event *rd_ev) try {
663    auto &q = obj(d_q);
664    auto &img = obj<image>(d_mem);
665    auto deps = objs<wait_list_tag>(d_deps, num_deps);
666    auto region = vector(p_region);
667    auto dst_origin = vector(p_origin);
668    auto dst_pitch = pitch(region, {{ img.pixel_size(),
669                                      img.row_pitch(), img.slice_pitch() }});
670    auto src_pitch = pitch(region, {{ img.pixel_size(),
671                                      row_pitch, slice_pitch }});
672 
673    validate_common(q, deps);
674    validate_object(q, img, dst_origin, region);
675    validate_object(q, ptr, {}, src_pitch, region);
676    validate_object_access(img, CL_MEM_HOST_WRITE_ONLY);
677 
678    auto hev = create<hard_event>(
679       q, CL_COMMAND_WRITE_IMAGE, deps,
680       soft_copy_op(q, &img, dst_origin, dst_pitch,
681                    ptr, {}, src_pitch,
682                    region));
683 
684    if (blocking)
685        hev().wait_signalled();
686 
687    ret_object(rd_ev, hev);
688    return CL_SUCCESS;
689 
690 } catch (error &e) {
691    return e.get();
692 }
693 
694 CLOVER_API cl_int
clEnqueueFillImage(cl_command_queue d_queue,cl_mem d_mem,const void * fill_color,const size_t * p_origin,const size_t * p_region,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)695 clEnqueueFillImage(cl_command_queue d_queue, cl_mem d_mem,
696                    const void *fill_color,
697                    const size_t *p_origin, const size_t *p_region,
698                    cl_uint num_deps, const cl_event *d_deps,
699                    cl_event *rd_ev) try {
700    auto &q = obj(d_queue);
701    auto &img = obj<image>(d_mem);
702    auto deps = objs<wait_list_tag>(d_deps, num_deps);
703    auto origin = vector(p_origin);
704    auto region = vector(p_region);
705 
706    validate_common(q, deps);
707    validate_object(q, img, origin, region);
708 
709    if (!fill_color)
710       return CL_INVALID_VALUE;
711 
712    std::string data = std::string((char *)fill_color, sizeof(cl_uint4));
713    auto hev = create<hard_event>(
714       q, CL_COMMAND_FILL_IMAGE, deps,
715       [=, &q, &img](event &) {
716          img.resource_in(q).clear(q, origin, region, data);
717       });
718 
719    ret_object(rd_ev, hev);
720    return CL_SUCCESS;
721 
722 } catch (error &e) {
723    return e.get();
724 }
725 
726 CLOVER_API cl_int
clEnqueueCopyImage(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_dst_origin,const size_t * p_region,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)727 clEnqueueCopyImage(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
728                    const size_t *p_src_origin, const size_t *p_dst_origin,
729                    const size_t *p_region,
730                    cl_uint num_deps, const cl_event *d_deps,
731                    cl_event *rd_ev) try {
732    auto &q = obj(d_q);
733    auto &src_img = obj<image>(d_src_mem);
734    auto &dst_img = obj<image>(d_dst_mem);
735    auto deps = objs<wait_list_tag>(d_deps, num_deps);
736    auto region = vector(p_region);
737    auto dst_origin = vector(p_dst_origin);
738    auto src_origin = vector(p_src_origin);
739 
740    validate_common(q, deps);
741    validate_object(q, dst_img, dst_origin, region);
742    validate_object(q, src_img, src_origin, region);
743    validate_copy(q, dst_img, dst_origin, src_img, src_origin, region);
744 
745    auto hev = create<hard_event>(
746       q, CL_COMMAND_COPY_IMAGE, deps,
747       hard_copy_op(q, &dst_img, dst_origin,
748                    &src_img, src_origin,
749                    region));
750 
751    ret_object(rd_ev, hev);
752    return CL_SUCCESS;
753 
754 } catch (error &e) {
755    return e.get();
756 }
757 
758 CLOVER_API cl_int
clEnqueueCopyImageToBuffer(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_region,size_t dst_offset,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)759 clEnqueueCopyImageToBuffer(cl_command_queue d_q,
760                            cl_mem d_src_mem, cl_mem d_dst_mem,
761                            const size_t *p_src_origin, const size_t *p_region,
762                            size_t dst_offset,
763                            cl_uint num_deps, const cl_event *d_deps,
764                            cl_event *rd_ev) try {
765    auto &q = obj(d_q);
766    auto &src_img = obj<image>(d_src_mem);
767    auto &dst_mem = obj<buffer>(d_dst_mem);
768    auto deps = objs<wait_list_tag>(d_deps, num_deps);
769    auto region = vector(p_region);
770    vector_t dst_origin = { dst_offset };
771    auto dst_pitch = pitch(region, {{ src_img.pixel_size() }});
772    auto src_origin = vector(p_src_origin);
773    auto src_pitch = pitch(region, {{ src_img.pixel_size(),
774                                      src_img.row_pitch(),
775                                      src_img.slice_pitch() }});
776 
777    validate_common(q, deps);
778    validate_object(q, dst_mem, dst_origin, dst_pitch, region);
779    validate_object(q, src_img, src_origin, region);
780 
781    auto hev = create<hard_event>(
782       q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, deps,
783       soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
784                    &src_img, src_origin, src_pitch,
785                    region));
786 
787    ret_object(rd_ev, hev);
788    return CL_SUCCESS;
789 
790 } catch (error &e) {
791    return e.get();
792 }
793 
794 CLOVER_API cl_int
clEnqueueCopyBufferToImage(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,size_t src_offset,const size_t * p_dst_origin,const size_t * p_region,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)795 clEnqueueCopyBufferToImage(cl_command_queue d_q,
796                            cl_mem d_src_mem, cl_mem d_dst_mem,
797                            size_t src_offset,
798                            const size_t *p_dst_origin, const size_t *p_region,
799                            cl_uint num_deps, const cl_event *d_deps,
800                            cl_event *rd_ev) try {
801    auto &q = obj(d_q);
802    auto &src_mem = obj<buffer>(d_src_mem);
803    auto &dst_img = obj<image>(d_dst_mem);
804    auto deps = objs<wait_list_tag>(d_deps, num_deps);
805    auto region = vector(p_region);
806    auto dst_origin = vector(p_dst_origin);
807    auto dst_pitch = pitch(region, {{ dst_img.pixel_size(),
808                                      dst_img.row_pitch(),
809                                      dst_img.slice_pitch() }});
810    vector_t src_origin = { src_offset };
811    auto src_pitch = pitch(region, {{ dst_img.pixel_size() }});
812 
813    validate_common(q, deps);
814    validate_object(q, dst_img, dst_origin, region);
815    validate_object(q, src_mem, src_origin, src_pitch, region);
816 
817    auto hev = create<hard_event>(
818       q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, deps,
819       soft_copy_op(q, &dst_img, dst_origin, dst_pitch,
820                    &src_mem, src_origin, src_pitch,
821                    region));
822 
823    ret_object(rd_ev, hev);
824    return CL_SUCCESS;
825 
826 } catch (error &e) {
827    return e.get();
828 }
829 
830 CLOVER_API void *
clEnqueueMapBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,cl_map_flags flags,size_t offset,size_t size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev,cl_int * r_errcode)831 clEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
832                    cl_map_flags flags, size_t offset, size_t size,
833                    cl_uint num_deps, const cl_event *d_deps,
834                    cl_event *rd_ev, cl_int *r_errcode) try {
835    auto &q = obj(d_q);
836    auto &mem = obj<buffer>(d_mem);
837    auto deps = objs<wait_list_tag>(d_deps, num_deps);
838    vector_t region = { size, 1, 1 };
839    vector_t obj_origin = { offset };
840    auto obj_pitch = pitch(region, {{ 1 }});
841 
842    validate_common(q, deps);
843    validate_object(q, mem, obj_origin, obj_pitch, region);
844    validate_map_flags(mem, flags);
845 
846    auto *map = mem.resource_in(q).add_map(q, flags, blocking, obj_origin, region);
847 
848    auto hev = create<hard_event>(q, CL_COMMAND_MAP_BUFFER, deps);
849    if (blocking)
850        hev().wait_signalled();
851 
852    ret_object(rd_ev, hev);
853    ret_error(r_errcode, CL_SUCCESS);
854    return *map;
855 
856 } catch (error &e) {
857    ret_error(r_errcode, e);
858    return NULL;
859 }
860 
861 CLOVER_API void *
clEnqueueMapImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,cl_map_flags flags,const size_t * p_origin,const size_t * p_region,size_t * row_pitch,size_t * slice_pitch,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev,cl_int * r_errcode)862 clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
863                   cl_map_flags flags,
864                   const size_t *p_origin, const size_t *p_region,
865                   size_t *row_pitch, size_t *slice_pitch,
866                   cl_uint num_deps, const cl_event *d_deps,
867                   cl_event *rd_ev, cl_int *r_errcode) try {
868    auto &q = obj(d_q);
869    auto &img = obj<image>(d_mem);
870    auto deps = objs<wait_list_tag>(d_deps, num_deps);
871    auto region = vector(p_region);
872    auto origin = vector(p_origin);
873 
874    validate_common(q, deps);
875    validate_object(q, img, origin, region);
876    validate_map_flags(img, flags);
877 
878    if (!row_pitch)
879       throw error(CL_INVALID_VALUE);
880 
881    if ((img.slice_pitch() || img.array_size()) && !slice_pitch)
882       throw error(CL_INVALID_VALUE);
883 
884    auto *map = img.resource_in(q).add_map(q, flags, blocking, origin, region);
885    *row_pitch = map->pitch()[1];
886    if (slice_pitch)
887       *slice_pitch = map->pitch()[2];
888 
889    auto hev = create<hard_event>(q, CL_COMMAND_MAP_IMAGE, deps);
890    if (blocking)
891        hev().wait_signalled();
892 
893    ret_object(rd_ev, hev);
894    ret_error(r_errcode, CL_SUCCESS);
895    return *map;
896 
897 } catch (error &e) {
898    ret_error(r_errcode, e);
899    return NULL;
900 }
901 
902 CLOVER_API cl_int
clEnqueueUnmapMemObject(cl_command_queue d_q,cl_mem d_mem,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)903 clEnqueueUnmapMemObject(cl_command_queue d_q, cl_mem d_mem, void *ptr,
904                         cl_uint num_deps, const cl_event *d_deps,
905                         cl_event *rd_ev) try {
906    auto &q = obj(d_q);
907    auto &mem = obj(d_mem);
908    auto deps = objs<wait_list_tag>(d_deps, num_deps);
909 
910    validate_common(q, deps);
911 
912    auto hev = create<hard_event>(
913       q, CL_COMMAND_UNMAP_MEM_OBJECT, deps,
914       [=, &q, &mem](event &) {
915          mem.resource_in(q).del_map(ptr);
916       });
917 
918    ret_object(rd_ev, hev);
919    return CL_SUCCESS;
920 
921 } catch (error &e) {
922    return e.get();
923 }
924 
925 CLOVER_API cl_int
clEnqueueMigrateMemObjects(cl_command_queue d_q,cl_uint num_mems,const cl_mem * d_mems,cl_mem_migration_flags flags,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)926 clEnqueueMigrateMemObjects(cl_command_queue d_q,
927                            cl_uint num_mems,
928                            const cl_mem *d_mems,
929                            cl_mem_migration_flags flags,
930                            cl_uint num_deps,
931                            const cl_event *d_deps,
932                            cl_event *rd_ev) try {
933    auto &q = obj(d_q);
934    auto mems = objs<memory_obj>(d_mems, num_mems);
935    auto deps = objs<wait_list_tag>(d_deps, num_deps);
936 
937    validate_common(q, deps);
938    validate_mem_migration_flags(flags);
939 
940    if (any_of([&](const memory_obj &m) {
941          return m.context() != q.context();
942          }, mems))
943       throw error(CL_INVALID_CONTEXT);
944 
945    auto hev = create<hard_event>(
946       q, CL_COMMAND_MIGRATE_MEM_OBJECTS, deps,
947       [=, &q](event &) {
948          for (auto &mem: mems) {
949             if (flags & CL_MIGRATE_MEM_OBJECT_HOST) {
950                if ((flags & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED))
951                   mem.resource_out(q);
952 
953                // For flags == CL_MIGRATE_MEM_OBJECT_HOST only to be
954                // efficient we would need cl*ReadBuffer* to implement
955                // reading from host memory.
956 
957             } else {
958                if (flags & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED)
959                   mem.resource_undef(q);
960                else
961                   mem.resource_in(q);
962             }
963          }
964       });
965 
966    ret_object(rd_ev, hev);
967    return CL_SUCCESS;;
968 
969 } catch (error &e) {
970    return e.get();
971 }
972 
973 cl_int
EnqueueSVMFree(cl_command_queue d_q,cl_uint num_svm_pointers,void * svm_pointers[],void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue,cl_uint num_svm_pointers,void * svm_pointers[],void * user_data),void * user_data,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)974 clover::EnqueueSVMFree(cl_command_queue d_q,
975                        cl_uint num_svm_pointers,
976                        void *svm_pointers[],
977                        void (CL_CALLBACK *pfn_free_func) (
978                            cl_command_queue queue, cl_uint num_svm_pointers,
979                            void *svm_pointers[], void *user_data),
980                        void *user_data,
981                        cl_uint num_events_in_wait_list,
982                        const cl_event *event_wait_list,
983                        cl_event *event,
984                        cl_int cmd) try {
985 
986    if (bool(num_svm_pointers) != bool(svm_pointers))
987       return CL_INVALID_VALUE;
988 
989    auto &q = obj(d_q);
990 
991    if (!q.device().svm_support())
992       return CL_INVALID_OPERATION;
993 
994    bool can_emulate = q.device().has_system_svm();
995    auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
996 
997    validate_common(q, deps);
998 
999    std::vector<void *> svm_pointers_cpy(svm_pointers,
1000                                         svm_pointers + num_svm_pointers);
1001    if (!pfn_free_func) {
1002       if (!can_emulate) {
1003          CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1004          return CL_INVALID_VALUE;
1005       }
1006       pfn_free_func = [](cl_command_queue d_q, cl_uint num_svm_pointers,
1007                          void *svm_pointers[], void *) {
1008          clover::context &ctx = obj(d_q).context();
1009          for (void *p : range(svm_pointers, num_svm_pointers)) {
1010             ctx.remove_svm_allocation(p);
1011             free(p);
1012          }
1013       };
1014    }
1015 
1016    auto hev = create<hard_event>(q, cmd, deps,
1017       [=](clover::event &) mutable {
1018          pfn_free_func(d_q, num_svm_pointers, svm_pointers_cpy.data(),
1019                        user_data);
1020       });
1021 
1022    ret_object(event, hev);
1023    return CL_SUCCESS;
1024 
1025 } catch (error &e) {
1026    return e.get();
1027 }
1028 
1029 CLOVER_API cl_int
clEnqueueSVMFree(cl_command_queue d_q,cl_uint num_svm_pointers,void * svm_pointers[],void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue,cl_uint num_svm_pointers,void * svm_pointers[],void * user_data),void * user_data,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1030 clEnqueueSVMFree(cl_command_queue d_q,
1031                  cl_uint num_svm_pointers,
1032                  void *svm_pointers[],
1033                  void (CL_CALLBACK *pfn_free_func) (
1034                     cl_command_queue queue, cl_uint num_svm_pointers,
1035                     void *svm_pointers[], void *user_data),
1036                  void *user_data,
1037                  cl_uint num_events_in_wait_list,
1038                  const cl_event *event_wait_list,
1039                  cl_event *event) {
1040 
1041    return EnqueueSVMFree(d_q, num_svm_pointers, svm_pointers,
1042                          pfn_free_func, user_data, num_events_in_wait_list,
1043                          event_wait_list, event, CL_COMMAND_SVM_FREE);
1044 }
1045 
1046 cl_int
EnqueueSVMMemcpy(cl_command_queue d_q,cl_bool blocking_copy,void * dst_ptr,const void * src_ptr,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)1047 clover::EnqueueSVMMemcpy(cl_command_queue d_q,
1048                          cl_bool blocking_copy,
1049                          void *dst_ptr,
1050                          const void *src_ptr,
1051                          size_t size,
1052                          cl_uint num_events_in_wait_list,
1053                          const cl_event *event_wait_list,
1054                          cl_event *event,
1055                          cl_int cmd) try {
1056    auto &q = obj(d_q);
1057 
1058    if (!q.device().svm_support())
1059       return CL_INVALID_OPERATION;
1060 
1061    if (dst_ptr == nullptr || src_ptr == nullptr)
1062       return CL_INVALID_VALUE;
1063 
1064    if (static_cast<size_t>(abs(reinterpret_cast<ptrdiff_t>(dst_ptr) -
1065                                reinterpret_cast<ptrdiff_t>(src_ptr))) < size)
1066       return CL_MEM_COPY_OVERLAP;
1067 
1068 
1069    bool can_emulate = q.device().has_system_svm();
1070    auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1071 
1072    validate_common(q, deps);
1073 
1074    if (can_emulate) {
1075       auto hev = create<hard_event>(q, cmd, deps,
1076          [=](clover::event &) {
1077             memcpy(dst_ptr, src_ptr, size);
1078          });
1079 
1080       if (blocking_copy)
1081          hev().wait();
1082       ret_object(event, hev);
1083       return CL_SUCCESS;
1084    }
1085 
1086    CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1087    return CL_INVALID_VALUE;
1088 
1089 } catch (error &e) {
1090    return e.get();
1091 }
1092 
1093 CLOVER_API cl_int
clEnqueueSVMMemcpy(cl_command_queue d_q,cl_bool blocking_copy,void * dst_ptr,const void * src_ptr,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1094 clEnqueueSVMMemcpy(cl_command_queue d_q,
1095                    cl_bool blocking_copy,
1096                    void *dst_ptr,
1097                    const void *src_ptr,
1098                    size_t size,
1099                    cl_uint num_events_in_wait_list,
1100                    const cl_event *event_wait_list,
1101                    cl_event *event) {
1102 
1103    return EnqueueSVMMemcpy(d_q, blocking_copy, dst_ptr, src_ptr,
1104                            size, num_events_in_wait_list, event_wait_list,
1105                            event, CL_COMMAND_SVM_MEMCPY);
1106 }
1107 
1108 cl_int
EnqueueSVMMemFill(cl_command_queue d_q,void * svm_ptr,const void * pattern,size_t pattern_size,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)1109 clover::EnqueueSVMMemFill(cl_command_queue d_q,
1110                           void *svm_ptr,
1111                           const void *pattern,
1112                           size_t pattern_size,
1113                           size_t size,
1114                           cl_uint num_events_in_wait_list,
1115                           const cl_event *event_wait_list,
1116                           cl_event *event,
1117                           cl_int cmd) try {
1118    auto &q = obj(d_q);
1119 
1120    if (!q.device().svm_support())
1121       return CL_INVALID_OPERATION;
1122 
1123    if (svm_ptr == nullptr || pattern == nullptr ||
1124        !util_is_power_of_two_nonzero(pattern_size) ||
1125        pattern_size > 128 ||
1126        !ptr_is_aligned(svm_ptr, pattern_size) ||
1127        size % pattern_size)
1128       return CL_INVALID_VALUE;
1129 
1130    bool can_emulate = q.device().has_system_svm();
1131    auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1132 
1133    validate_common(q, deps);
1134 
1135    if (can_emulate) {
1136       auto hev = create<hard_event>(q, cmd, deps,
1137          [=](clover::event &) {
1138             void *ptr = svm_ptr;
1139             for (size_t s = size; s; s -= pattern_size) {
1140                memcpy(ptr, pattern, pattern_size);
1141                ptr = static_cast<uint8_t*>(ptr) + pattern_size;
1142             }
1143          });
1144 
1145       ret_object(event, hev);
1146       return CL_SUCCESS;
1147    }
1148 
1149    CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1150    return CL_INVALID_VALUE;
1151 
1152 } catch (error &e) {
1153    return e.get();
1154 }
1155 
1156 CLOVER_API cl_int
clEnqueueSVMMemFill(cl_command_queue d_q,void * svm_ptr,const void * pattern,size_t pattern_size,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1157 clEnqueueSVMMemFill(cl_command_queue d_q,
1158                     void *svm_ptr,
1159                     const void *pattern,
1160                     size_t pattern_size,
1161                     size_t size,
1162                     cl_uint num_events_in_wait_list,
1163                     const cl_event *event_wait_list,
1164                     cl_event *event) {
1165 
1166    return EnqueueSVMMemFill(d_q, svm_ptr, pattern, pattern_size,
1167                             size, num_events_in_wait_list, event_wait_list,
1168                             event, CL_COMMAND_SVM_MEMFILL);
1169 }
1170 
1171 cl_int
EnqueueSVMMap(cl_command_queue d_q,cl_bool blocking_map,cl_map_flags map_flags,void * svm_ptr,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)1172 clover::EnqueueSVMMap(cl_command_queue d_q,
1173                       cl_bool blocking_map,
1174                       cl_map_flags map_flags,
1175                       void *svm_ptr,
1176                       size_t size,
1177                       cl_uint num_events_in_wait_list,
1178                       const cl_event *event_wait_list,
1179                       cl_event *event,
1180                       cl_int cmd) try {
1181    auto &q = obj(d_q);
1182 
1183    if (!q.device().svm_support())
1184       return CL_INVALID_OPERATION;
1185 
1186    if (svm_ptr == nullptr || size == 0)
1187       return CL_INVALID_VALUE;
1188 
1189    bool can_emulate = q.device().has_system_svm();
1190    auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1191 
1192    validate_common(q, deps);
1193 
1194    if (can_emulate) {
1195       auto hev = create<hard_event>(q, cmd, deps,
1196          [](clover::event &) { });
1197 
1198       ret_object(event, hev);
1199       return CL_SUCCESS;
1200    }
1201 
1202    CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1203    return CL_INVALID_VALUE;
1204 
1205 } catch (error &e) {
1206    return e.get();
1207 }
1208 
1209 CLOVER_API cl_int
clEnqueueSVMMap(cl_command_queue d_q,cl_bool blocking_map,cl_map_flags map_flags,void * svm_ptr,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1210 clEnqueueSVMMap(cl_command_queue d_q,
1211                 cl_bool blocking_map,
1212                 cl_map_flags map_flags,
1213                 void *svm_ptr,
1214                 size_t size,
1215                 cl_uint num_events_in_wait_list,
1216                 const cl_event *event_wait_list,
1217                 cl_event *event) {
1218 
1219    return EnqueueSVMMap(d_q, blocking_map, map_flags, svm_ptr, size,
1220                         num_events_in_wait_list, event_wait_list, event,
1221                         CL_COMMAND_SVM_MAP);
1222 }
1223 
1224 cl_int
EnqueueSVMUnmap(cl_command_queue d_q,void * svm_ptr,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)1225 clover::EnqueueSVMUnmap(cl_command_queue d_q,
1226                         void *svm_ptr,
1227                         cl_uint num_events_in_wait_list,
1228                         const cl_event *event_wait_list,
1229                         cl_event *event,
1230                         cl_int cmd) try {
1231    auto &q = obj(d_q);
1232 
1233    if (!q.device().svm_support())
1234       return CL_INVALID_OPERATION;
1235 
1236    if (svm_ptr == nullptr)
1237       return CL_INVALID_VALUE;
1238 
1239    bool can_emulate = q.device().has_system_svm();
1240    auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1241 
1242    validate_common(q, deps);
1243 
1244    if (can_emulate) {
1245       auto hev = create<hard_event>(q, cmd, deps,
1246          [](clover::event &) { });
1247 
1248       ret_object(event, hev);
1249       return CL_SUCCESS;
1250    }
1251 
1252    CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1253    return CL_INVALID_VALUE;
1254 
1255 } catch (error &e) {
1256    return e.get();
1257 }
1258 
1259 CLOVER_API cl_int
clEnqueueSVMUnmap(cl_command_queue d_q,void * svm_ptr,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1260 clEnqueueSVMUnmap(cl_command_queue d_q,
1261                   void *svm_ptr,
1262                   cl_uint num_events_in_wait_list,
1263                   const cl_event *event_wait_list,
1264                   cl_event *event) {
1265 
1266    return EnqueueSVMUnmap(d_q, svm_ptr, num_events_in_wait_list,
1267                           event_wait_list, event, CL_COMMAND_SVM_UNMAP);
1268 }
1269 
1270 CLOVER_API cl_int
clEnqueueSVMMigrateMem(cl_command_queue d_q,cl_uint num_svm_pointers,const void ** svm_pointers,const size_t * sizes,const cl_mem_migration_flags flags,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)1271 clEnqueueSVMMigrateMem(cl_command_queue d_q,
1272                        cl_uint num_svm_pointers,
1273                        const void **svm_pointers,
1274                        const size_t *sizes,
1275                        const cl_mem_migration_flags flags,
1276                        cl_uint num_deps,
1277                        const cl_event *d_deps,
1278                        cl_event *rd_ev) try {
1279    auto &q = obj(d_q);
1280    auto deps = objs<wait_list_tag>(d_deps, num_deps);
1281 
1282    validate_common(q, deps);
1283    validate_mem_migration_flags(flags);
1284 
1285    if (!q.device().svm_support())
1286       return CL_INVALID_OPERATION;
1287 
1288    if (!num_svm_pointers || !svm_pointers)
1289       return CL_INVALID_VALUE;
1290 
1291    std::vector<size_t> sizes_copy(num_svm_pointers);
1292    std::vector<const void*>  ptrs(num_svm_pointers);
1293 
1294    for (unsigned i = 0; i < num_svm_pointers; ++i) {
1295       const void *ptr = svm_pointers[i];
1296       size_t size = sizes ? sizes[i] : 0;
1297       if (!ptr)
1298          return CL_INVALID_VALUE;
1299 
1300       auto p = q.context().find_svm_allocation(ptr);
1301       if (!p.first)
1302          return CL_INVALID_VALUE;
1303 
1304       std::ptrdiff_t pdiff = (uint8_t*)ptr - (uint8_t*)p.first;
1305       if (size && size + pdiff > p.second)
1306          return CL_INVALID_VALUE;
1307 
1308       sizes_copy[i] = size ? size : p.second;
1309       ptrs[i] = size ? svm_pointers[i] : p.first;
1310    }
1311 
1312    auto hev = create<hard_event>(
1313       q, CL_COMMAND_MIGRATE_MEM_OBJECTS, deps,
1314       [=, &q](event &) {
1315          q.svm_migrate(ptrs, sizes_copy, flags);
1316       });
1317 
1318    ret_object(rd_ev, hev);
1319    return CL_SUCCESS;
1320 
1321 } catch (error &e) {
1322    return e.get();
1323 }
1324