• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 //
22 
23 #include <cstring>
24 
25 #include "util/bitscan.h"
26 
27 #include "api/dispatch.hpp"
28 #include "api/util.hpp"
29 #include "core/event.hpp"
30 #include "core/memory.hpp"
31 
32 using namespace clover;
33 
34 namespace {
35    typedef resource::vector vector_t;
36 
37    vector_t
vector(const size_t * p)38    vector(const size_t *p) {
39       if (!p)
40          throw error(CL_INVALID_VALUE);
41       return range(p, 3);
42    }
43 
44    vector_t
pitch(const vector_t & region,vector_t pitch)45    pitch(const vector_t &region, vector_t pitch) {
46       for (auto x : zip(tail(pitch),
47                         map(multiplies(), region, pitch))) {
48          // The spec defines a value of zero as the natural pitch,
49          // i.e. the unaligned size of the previous dimension.
50          if (std::get<0>(x) == 0)
51             std::get<0>(x) = std::get<1>(x);
52       }
53 
54       return pitch;
55    }
56 
57    ///
58    /// Size of a region in bytes.
59    ///
60    size_t
size(const vector_t & pitch,const vector_t & region)61    size(const vector_t &pitch, const vector_t &region) {
62       if (any_of(is_zero(), region))
63          return 0;
64       else
65          return dot(pitch, region - vector_t{ 0, 1, 1 });
66    }
67 
68    ///
69    /// Common argument checking shared by memory transfer commands.
70    ///
71    void
validate_common(command_queue & q,const ref_vector<event> & deps)72    validate_common(command_queue &q,
73                    const ref_vector<event> &deps) {
74       if (any_of([&](const event &ev) {
75                return ev.context() != q.context();
76             }, deps))
77          throw error(CL_INVALID_CONTEXT);
78    }
79 
80    ///
81    /// Common error checking for a buffer object argument.
82    ///
83    void
validate_object(command_queue & q,buffer & mem,const vector_t & origin,const vector_t & pitch,const vector_t & region)84    validate_object(command_queue &q, buffer &mem, const vector_t &origin,
85                    const vector_t &pitch, const vector_t &region) {
86       if (mem.context() != q.context())
87          throw error(CL_INVALID_CONTEXT);
88 
89       // The region must fit within the specified pitch,
90       if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
91          throw error(CL_INVALID_VALUE);
92 
93       // ...and within the specified object.
94       if (dot(pitch, origin) + size(pitch, region) > mem.size())
95          throw error(CL_INVALID_VALUE);
96 
97       if (any_of(is_zero(), region))
98          throw error(CL_INVALID_VALUE);
99    }
100 
101    ///
102    /// Common error checking for an image argument.
103    ///
104    void
validate_object(command_queue & q,image & img,const vector_t & orig,const vector_t & region)105    validate_object(command_queue &q, image &img,
106                    const vector_t &orig, const vector_t &region) {
107       vector_t size = { img.width(), img.height(), img.depth() };
108       const auto &dev = q.device();
109 
110       if (!dev.image_support())
111          throw error(CL_INVALID_OPERATION);
112 
113       if (img.context() != q.context())
114          throw error(CL_INVALID_CONTEXT);
115 
116       if (any_of(greater(), orig + region, size))
117          throw error(CL_INVALID_VALUE);
118 
119       if (any_of(is_zero(), region))
120          throw error(CL_INVALID_VALUE);
121 
122       switch (img.type()) {
123       case CL_MEM_OBJECT_IMAGE1D: {
124          const size_t max = dev.max_image_size();
125          if (img.width() > max)
126             throw error(CL_INVALID_IMAGE_SIZE);
127          break;
128       }
129       case CL_MEM_OBJECT_IMAGE2D: {
130          const size_t max = dev.max_image_size();
131          if (img.width() > max || img.height() > max)
132             throw error(CL_INVALID_IMAGE_SIZE);
133          break;
134       }
135       case CL_MEM_OBJECT_IMAGE3D: {
136          const size_t max = dev.max_image_size_3d();
137          if (img.width() > max || img.height() > max || img.depth() > max)
138             throw error(CL_INVALID_IMAGE_SIZE);
139          break;
140       }
141       // XXX: Implement missing checks once Clover supports more image types.
142       default:
143          throw error(CL_INVALID_IMAGE_SIZE);
144       }
145    }
146 
147    ///
148    /// Common error checking for a host pointer argument.
149    ///
150    void
validate_object(command_queue & q,const void * ptr,const vector_t & orig,const vector_t & pitch,const vector_t & region)151    validate_object(command_queue &q, const void *ptr, const vector_t &orig,
152                    const vector_t &pitch, const vector_t &region) {
153       if (!ptr)
154          throw error(CL_INVALID_VALUE);
155 
156       // The region must fit within the specified pitch.
157       if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
158          throw error(CL_INVALID_VALUE);
159    }
160 
161    ///
162    /// Common argument checking for a copy between two buffer objects.
163    ///
164    void
validate_copy(command_queue & q,buffer & dst_mem,const vector_t & dst_orig,const vector_t & dst_pitch,buffer & src_mem,const vector_t & src_orig,const vector_t & src_pitch,const vector_t & region)165    validate_copy(command_queue &q, buffer &dst_mem,
166                  const vector_t &dst_orig, const vector_t &dst_pitch,
167                  buffer &src_mem,
168                  const vector_t &src_orig, const vector_t &src_pitch,
169                  const vector_t &region) {
170       if (dst_mem == src_mem) {
171          auto dst_offset = dot(dst_pitch, dst_orig);
172          auto src_offset = dot(src_pitch, src_orig);
173 
174          if (interval_overlaps()(
175                 dst_offset, dst_offset + size(dst_pitch, region),
176                 src_offset, src_offset + size(src_pitch, region)))
177             throw error(CL_MEM_COPY_OVERLAP);
178       }
179    }
180 
181    ///
182    /// Common argument checking for a copy between two image objects.
183    ///
184    void
validate_copy(command_queue & q,image & dst_img,const vector_t & dst_orig,image & src_img,const vector_t & src_orig,const vector_t & region)185    validate_copy(command_queue &q,
186                  image &dst_img, const vector_t &dst_orig,
187                  image &src_img, const vector_t &src_orig,
188                  const vector_t &region) {
189       if (dst_img.format() != src_img.format())
190          throw error(CL_IMAGE_FORMAT_MISMATCH);
191 
192       if (dst_img == src_img) {
193          if (all_of(interval_overlaps(),
194                     dst_orig, dst_orig + region,
195                     src_orig, src_orig + region))
196             throw error(CL_MEM_COPY_OVERLAP);
197       }
198    }
199 
200    ///
201    /// Checks that the host access flags of the memory object are
202    /// within the allowed set \a flags.
203    ///
204    void
validate_object_access(const memory_obj & mem,const cl_mem_flags flags)205    validate_object_access(const memory_obj &mem, const cl_mem_flags flags) {
206       if (mem.flags() & ~flags &
207           (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY |
208            CL_MEM_HOST_NO_ACCESS))
209          throw error(CL_INVALID_OPERATION);
210    }
211 
212    ///
213    /// Checks that the mapping flags are correct.
214    ///
215    void
validate_map_flags(const memory_obj & mem,const cl_map_flags flags)216    validate_map_flags(const memory_obj &mem, const cl_map_flags flags) {
217       if ((flags & (CL_MAP_WRITE | CL_MAP_READ)) &&
218           (flags & CL_MAP_WRITE_INVALIDATE_REGION))
219          throw error(CL_INVALID_VALUE);
220 
221       if (flags & CL_MAP_READ)
222          validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
223 
224       if (flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
225          validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
226    }
227 
228    ///
229    /// Checks that the memory migration flags are correct.
230    ///
231    void
validate_mem_migration_flags(const cl_mem_migration_flags flags)232    validate_mem_migration_flags(const cl_mem_migration_flags flags) {
233       const cl_mem_migration_flags valid =
234          CL_MIGRATE_MEM_OBJECT_HOST |
235          CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED;
236 
237       if (flags & ~valid)
238          throw error(CL_INVALID_VALUE);
239    }
240 
241    ///
242    /// Class that encapsulates the task of mapping an object of type
243    /// \a T.  The return value of get() should be implicitly
244    /// convertible to \a void *.
245    ///
246    template<typename T>
247    struct _map;
248 
249    template<>
250    struct _map<image*> {
_map__anona72fcd3b0111::_map251       _map(command_queue &q, image *img, cl_map_flags flags,
252            vector_t offset, vector_t pitch, vector_t region) :
253          map(q, img->resource_in(q), flags, true, offset, region),
254          pitch(map.pitch())
255       { }
256 
257       template<typename T>
operator T*__anona72fcd3b0111::_map258       operator T *() const {
259          return static_cast<T *>(map);
260       }
261 
262       mapping map;
263       vector_t pitch;
264    };
265 
266    template<>
267    struct _map<buffer*> {
_map__anona72fcd3b0111::_map268       _map(command_queue &q, buffer *mem, cl_map_flags flags,
269            vector_t offset, vector_t pitch, vector_t region) :
270          map(q, mem->resource_in(q), flags, true,
271              {{ dot(pitch, offset) }}, {{ size(pitch, region) }}),
272          pitch(pitch)
273       { }
274 
275       template<typename T>
operator T*__anona72fcd3b0111::_map276       operator T *() const {
277          return static_cast<T *>(map);
278       }
279 
280       mapping map;
281       vector_t pitch;
282    };
283 
284    template<typename P>
285    struct _map<P *> {
_map__anona72fcd3b0111::_map286       _map(command_queue &q, P *ptr, cl_map_flags flags,
287            vector_t offset, vector_t pitch, vector_t region) :
288          ptr((P *)((char *)ptr + dot(pitch, offset))), pitch(pitch)
289       { }
290 
291       template<typename T>
operator T*__anona72fcd3b0111::_map292       operator T *() const {
293          return static_cast<T *>(ptr);
294       }
295 
296       P *ptr;
297       vector_t pitch;
298    };
299 
300    ///
301    /// Software copy from \a src_obj to \a dst_obj.  They can be
302    /// either pointers or memory objects.
303    ///
304    template<typename T, typename S>
305    std::function<void (event &)>
soft_copy_op(command_queue & q,T dst_obj,const vector_t & dst_orig,const vector_t & dst_pitch,S src_obj,const vector_t & src_orig,const vector_t & src_pitch,const vector_t & region)306    soft_copy_op(command_queue &q,
307                 T dst_obj, const vector_t &dst_orig, const vector_t &dst_pitch,
308                 S src_obj, const vector_t &src_orig, const vector_t &src_pitch,
309                 const vector_t &region) {
310       return [=, &q](event &) {
311          _map<T> dst = { q, dst_obj, CL_MAP_WRITE,
312                          dst_orig, dst_pitch, region };
313          _map<S> src = { q, src_obj, CL_MAP_READ,
314                          src_orig, src_pitch, region };
315          assert(src.pitch[0] == dst.pitch[0]);
316          vector_t v = {};
317 
318          for (v[2] = 0; v[2] < region[2]; ++v[2]) {
319             for (v[1] = 0; v[1] < region[1]; ++v[1]) {
320                std::memcpy(
321                   static_cast<char *>(dst) + dot(dst.pitch, v),
322                   static_cast<const char *>(src) + dot(src.pitch, v),
323                   src.pitch[0] * region[0]);
324             }
325          }
326       };
327    }
328 
329    ///
330    /// Hardware copy from \a src_obj to \a dst_obj.
331    ///
332    template<typename T, typename S>
333    std::function<void (event &)>
hard_copy_op(command_queue & q,T dst_obj,const vector_t & dst_orig,S src_obj,const vector_t & src_orig,const vector_t & region)334    hard_copy_op(command_queue &q, T dst_obj, const vector_t &dst_orig,
335                 S src_obj, const vector_t &src_orig, const vector_t &region) {
336       return [=, &q](event &) {
337          dst_obj->resource_in(q).copy(q, dst_orig, region,
338                                       src_obj->resource_in(q), src_orig);
339       };
340    }
341 }
342 
343 CLOVER_API cl_int
clEnqueueReadBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,size_t offset,size_t size,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)344 clEnqueueReadBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
345                     size_t offset, size_t size, void *ptr,
346                     cl_uint num_deps, const cl_event *d_deps,
347                     cl_event *rd_ev) try {
348    auto &q = obj(d_q);
349    auto &mem = obj<buffer>(d_mem);
350    auto deps = objs<wait_list_tag>(d_deps, num_deps);
351    vector_t region = { size, 1, 1 };
352    vector_t obj_origin = { offset };
353    auto obj_pitch = pitch(region, {{ 1 }});
354 
355    validate_common(q, deps);
356    validate_object(q, ptr, {}, obj_pitch, region);
357    validate_object(q, mem, obj_origin, obj_pitch, region);
358    validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
359 
360    auto hev = create<hard_event>(
361       q, CL_COMMAND_READ_BUFFER, deps,
362       soft_copy_op(q, ptr, {}, obj_pitch,
363                    &mem, obj_origin, obj_pitch,
364                    region));
365 
366    if (blocking)
367        hev().wait_signalled();
368 
369    ret_object(rd_ev, hev);
370    return CL_SUCCESS;
371 
372 } catch (error &e) {
373    return e.get();
374 }
375 
376 CLOVER_API cl_int
clEnqueueWriteBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,size_t offset,size_t size,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)377 clEnqueueWriteBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
378                      size_t offset, size_t size, const void *ptr,
379                      cl_uint num_deps, const cl_event *d_deps,
380                      cl_event *rd_ev) try {
381    auto &q = obj(d_q);
382    auto &mem = obj<buffer>(d_mem);
383    auto deps = objs<wait_list_tag>(d_deps, num_deps);
384    vector_t region = { size, 1, 1 };
385    vector_t obj_origin = { offset };
386    auto obj_pitch = pitch(region, {{ 1 }});
387 
388    validate_common(q, deps);
389    validate_object(q, mem, obj_origin, obj_pitch, region);
390    validate_object(q, ptr, {}, obj_pitch, region);
391    validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
392 
393    auto hev = create<hard_event>(
394       q, CL_COMMAND_WRITE_BUFFER, deps,
395       soft_copy_op(q, &mem, obj_origin, obj_pitch,
396                    ptr, {}, obj_pitch,
397                    region));
398 
399    if (blocking)
400        hev().wait_signalled();
401 
402    ret_object(rd_ev, hev);
403    return CL_SUCCESS;
404 
405 } catch (error &e) {
406    return e.get();
407 }
408 
409 CLOVER_API cl_int
clEnqueueReadBufferRect(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_obj_origin,const size_t * p_host_origin,const size_t * p_region,size_t obj_row_pitch,size_t obj_slice_pitch,size_t host_row_pitch,size_t host_slice_pitch,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)410 clEnqueueReadBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
411                         const size_t *p_obj_origin,
412                         const size_t *p_host_origin,
413                         const size_t *p_region,
414                         size_t obj_row_pitch, size_t obj_slice_pitch,
415                         size_t host_row_pitch, size_t host_slice_pitch,
416                         void *ptr,
417                         cl_uint num_deps, const cl_event *d_deps,
418                         cl_event *rd_ev) try {
419    auto &q = obj(d_q);
420    auto &mem = obj<buffer>(d_mem);
421    auto deps = objs<wait_list_tag>(d_deps, num_deps);
422    auto region = vector(p_region);
423    auto obj_origin = vector(p_obj_origin);
424    auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
425    auto host_origin = vector(p_host_origin);
426    auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
427 
428    validate_common(q, deps);
429    validate_object(q, ptr, host_origin, host_pitch, region);
430    validate_object(q, mem, obj_origin, obj_pitch, region);
431    validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
432 
433    auto hev = create<hard_event>(
434       q, CL_COMMAND_READ_BUFFER_RECT, deps,
435       soft_copy_op(q, ptr, host_origin, host_pitch,
436                    &mem, obj_origin, obj_pitch,
437                    region));
438 
439    if (blocking)
440        hev().wait_signalled();
441 
442    ret_object(rd_ev, hev);
443    return CL_SUCCESS;
444 
445 } catch (error &e) {
446    return e.get();
447 }
448 
449 CLOVER_API cl_int
clEnqueueWriteBufferRect(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_obj_origin,const size_t * p_host_origin,const size_t * p_region,size_t obj_row_pitch,size_t obj_slice_pitch,size_t host_row_pitch,size_t host_slice_pitch,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)450 clEnqueueWriteBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
451                          const size_t *p_obj_origin,
452                          const size_t *p_host_origin,
453                          const size_t *p_region,
454                          size_t obj_row_pitch, size_t obj_slice_pitch,
455                          size_t host_row_pitch, size_t host_slice_pitch,
456                          const void *ptr,
457                          cl_uint num_deps, const cl_event *d_deps,
458                          cl_event *rd_ev) try {
459    auto &q = obj(d_q);
460    auto &mem = obj<buffer>(d_mem);
461    auto deps = objs<wait_list_tag>(d_deps, num_deps);
462    auto region = vector(p_region);
463    auto obj_origin = vector(p_obj_origin);
464    auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
465    auto host_origin = vector(p_host_origin);
466    auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
467 
468    validate_common(q, deps);
469    validate_object(q, mem, obj_origin, obj_pitch, region);
470    validate_object(q, ptr, host_origin, host_pitch, region);
471    validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
472 
473    auto hev = create<hard_event>(
474       q, CL_COMMAND_WRITE_BUFFER_RECT, deps,
475       soft_copy_op(q, &mem, obj_origin, obj_pitch,
476                    ptr, host_origin, host_pitch,
477                    region));
478 
479    if (blocking)
480        hev().wait_signalled();
481 
482    ret_object(rd_ev, hev);
483    return CL_SUCCESS;
484 
485 } catch (error &e) {
486    return e.get();
487 }
488 
489 CLOVER_API cl_int
clEnqueueFillBuffer(cl_command_queue d_queue,cl_mem d_mem,const void * pattern,size_t pattern_size,size_t offset,size_t size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)490 clEnqueueFillBuffer(cl_command_queue d_queue, cl_mem d_mem,
491                     const void *pattern, size_t pattern_size,
492                     size_t offset, size_t size,
493                     cl_uint num_deps, const cl_event *d_deps,
494                     cl_event *rd_ev) try {
495    auto &q = obj(d_queue);
496    auto &mem = obj<buffer>(d_mem);
497    auto deps = objs<wait_list_tag>(d_deps, num_deps);
498    vector_t region = { size, 1, 1 };
499    vector_t origin = { offset };
500    auto dst_pitch = pitch(region, {{ 1 }});
501 
502    validate_common(q, deps);
503    validate_object(q, mem, origin, dst_pitch, region);
504 
505    if (!pattern)
506       return CL_INVALID_VALUE;
507 
508    if (!util_is_power_of_two_nonzero(pattern_size) ||
509       pattern_size > 128 || size % pattern_size
510       || offset % pattern_size) {
511       return CL_INVALID_VALUE;
512    }
513 
514    auto sub = dynamic_cast<sub_buffer *>(&mem);
515    if (sub && sub->offset() % q.device().mem_base_addr_align()) {
516       return CL_MISALIGNED_SUB_BUFFER_OFFSET;
517    }
518 
519    std::string data = std::string((char *)pattern, pattern_size);
520    auto hev = create<hard_event>(
521       q, CL_COMMAND_FILL_BUFFER, deps,
522       [=, &q, &mem](event &) {
523          mem.resource_in(q).clear(q, origin, region, data);
524       });
525 
526    ret_object(rd_ev, hev);
527    return CL_SUCCESS;
528 
529 } catch (error &e) {
530    return e.get();
531 }
532 
533 CLOVER_API cl_int
clEnqueueCopyBuffer(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,size_t src_offset,size_t dst_offset,size_t size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)534 clEnqueueCopyBuffer(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
535                     size_t src_offset, size_t dst_offset, size_t size,
536                     cl_uint num_deps, const cl_event *d_deps,
537                     cl_event *rd_ev) try {
538    auto &q = obj(d_q);
539    auto &src_mem = obj<buffer>(d_src_mem);
540    auto &dst_mem = obj<buffer>(d_dst_mem);
541    auto deps = objs<wait_list_tag>(d_deps, num_deps);
542    vector_t region = { size, 1, 1 };
543    vector_t dst_origin = { dst_offset };
544    auto dst_pitch = pitch(region, {{ 1 }});
545    vector_t src_origin = { src_offset };
546    auto src_pitch = pitch(region, {{ 1 }});
547 
548    validate_common(q, deps);
549    validate_object(q, dst_mem, dst_origin, dst_pitch, region);
550    validate_object(q, src_mem, src_origin, src_pitch, region);
551    validate_copy(q, dst_mem, dst_origin, dst_pitch,
552                  src_mem, src_origin, src_pitch, region);
553 
554    auto hev = create<hard_event>(
555       q, CL_COMMAND_COPY_BUFFER, deps,
556       hard_copy_op(q, &dst_mem, dst_origin,
557                    &src_mem, src_origin, region));
558 
559    ret_object(rd_ev, hev);
560    return CL_SUCCESS;
561 
562 } catch (error &e) {
563    return e.get();
564 }
565 
566 CLOVER_API cl_int
clEnqueueCopyBufferRect(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_dst_origin,const size_t * p_region,size_t src_row_pitch,size_t src_slice_pitch,size_t dst_row_pitch,size_t dst_slice_pitch,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)567 clEnqueueCopyBufferRect(cl_command_queue d_q, cl_mem d_src_mem,
568                         cl_mem d_dst_mem,
569                         const size_t *p_src_origin, const size_t *p_dst_origin,
570                         const size_t *p_region,
571                         size_t src_row_pitch, size_t src_slice_pitch,
572                         size_t dst_row_pitch, size_t dst_slice_pitch,
573                         cl_uint num_deps, const cl_event *d_deps,
574                         cl_event *rd_ev) try {
575    auto &q = obj(d_q);
576    auto &src_mem = obj<buffer>(d_src_mem);
577    auto &dst_mem = obj<buffer>(d_dst_mem);
578    auto deps = objs<wait_list_tag>(d_deps, num_deps);
579    auto region = vector(p_region);
580    auto dst_origin = vector(p_dst_origin);
581    auto dst_pitch = pitch(region, {{ 1, dst_row_pitch, dst_slice_pitch }});
582    auto src_origin = vector(p_src_origin);
583    auto src_pitch = pitch(region, {{ 1, src_row_pitch, src_slice_pitch }});
584 
585    validate_common(q, deps);
586    validate_object(q, dst_mem, dst_origin, dst_pitch, region);
587    validate_object(q, src_mem, src_origin, src_pitch, region);
588    validate_copy(q, dst_mem, dst_origin, dst_pitch,
589                  src_mem, src_origin, src_pitch, region);
590 
591    auto hev = create<hard_event>(
592       q, CL_COMMAND_COPY_BUFFER_RECT, deps,
593       soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
594                    &src_mem, src_origin, src_pitch,
595                    region));
596 
597    ret_object(rd_ev, hev);
598    return CL_SUCCESS;
599 
600 } catch (error &e) {
601    return e.get();
602 }
603 
604 CLOVER_API cl_int
clEnqueueReadImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_origin,const size_t * p_region,size_t row_pitch,size_t slice_pitch,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)605 clEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
606                    const size_t *p_origin, const size_t *p_region,
607                    size_t row_pitch, size_t slice_pitch, void *ptr,
608                    cl_uint num_deps, const cl_event *d_deps,
609                    cl_event *rd_ev) try {
610    auto &q = obj(d_q);
611    auto &img = obj<image>(d_mem);
612    auto deps = objs<wait_list_tag>(d_deps, num_deps);
613    auto region = vector(p_region);
614    auto dst_pitch = pitch(region, {{ img.pixel_size(),
615                                      row_pitch, slice_pitch }});
616    auto src_origin = vector(p_origin);
617    auto src_pitch = pitch(region, {{ img.pixel_size(),
618                                      img.row_pitch(), img.slice_pitch() }});
619 
620    validate_common(q, deps);
621    validate_object(q, ptr, {}, dst_pitch, region);
622    validate_object(q, img, src_origin, region);
623    validate_object_access(img, CL_MEM_HOST_READ_ONLY);
624 
625    auto hev = create<hard_event>(
626       q, CL_COMMAND_READ_IMAGE, deps,
627       soft_copy_op(q, ptr, {}, dst_pitch,
628                    &img, src_origin, src_pitch,
629                    region));
630 
631    if (blocking)
632        hev().wait_signalled();
633 
634    ret_object(rd_ev, hev);
635    return CL_SUCCESS;
636 
637 } catch (error &e) {
638    return e.get();
639 }
640 
641 CLOVER_API cl_int
clEnqueueWriteImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_origin,const size_t * p_region,size_t row_pitch,size_t slice_pitch,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)642 clEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
643                     const size_t *p_origin, const size_t *p_region,
644                     size_t row_pitch, size_t slice_pitch, const void *ptr,
645                     cl_uint num_deps, const cl_event *d_deps,
646                     cl_event *rd_ev) try {
647    auto &q = obj(d_q);
648    auto &img = obj<image>(d_mem);
649    auto deps = objs<wait_list_tag>(d_deps, num_deps);
650    auto region = vector(p_region);
651    auto dst_origin = vector(p_origin);
652    auto dst_pitch = pitch(region, {{ img.pixel_size(),
653                                      img.row_pitch(), img.slice_pitch() }});
654    auto src_pitch = pitch(region, {{ img.pixel_size(),
655                                      row_pitch, slice_pitch }});
656 
657    validate_common(q, deps);
658    validate_object(q, img, dst_origin, region);
659    validate_object(q, ptr, {}, src_pitch, region);
660    validate_object_access(img, CL_MEM_HOST_WRITE_ONLY);
661 
662    auto hev = create<hard_event>(
663       q, CL_COMMAND_WRITE_IMAGE, deps,
664       soft_copy_op(q, &img, dst_origin, dst_pitch,
665                    ptr, {}, src_pitch,
666                    region));
667 
668    if (blocking)
669        hev().wait_signalled();
670 
671    ret_object(rd_ev, hev);
672    return CL_SUCCESS;
673 
674 } catch (error &e) {
675    return e.get();
676 }
677 
678 CLOVER_API cl_int
clEnqueueFillImage(cl_command_queue d_queue,cl_mem d_mem,const void * fill_color,const size_t * p_origin,const size_t * p_region,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)679 clEnqueueFillImage(cl_command_queue d_queue, cl_mem d_mem,
680                    const void *fill_color,
681                    const size_t *p_origin, const size_t *p_region,
682                    cl_uint num_deps, const cl_event *d_deps,
683                    cl_event *rd_ev) try {
684    auto &q = obj(d_queue);
685    auto &img = obj<image>(d_mem);
686    auto deps = objs<wait_list_tag>(d_deps, num_deps);
687    auto origin = vector(p_origin);
688    auto region = vector(p_region);
689 
690    validate_common(q, deps);
691    validate_object(q, img, origin, region);
692 
693    if (!fill_color)
694       return CL_INVALID_VALUE;
695 
696    std::string data = std::string((char *)fill_color, sizeof(cl_uint4));
697    auto hev = create<hard_event>(
698       q, CL_COMMAND_FILL_IMAGE, deps,
699       [=, &q, &img](event &) {
700          img.resource_in(q).clear(q, origin, region, data);
701       });
702 
703    ret_object(rd_ev, hev);
704    return CL_SUCCESS;
705 
706 } catch (error &e) {
707    return e.get();
708 }
709 
710 CLOVER_API cl_int
clEnqueueCopyImage(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_dst_origin,const size_t * p_region,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)711 clEnqueueCopyImage(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
712                    const size_t *p_src_origin, const size_t *p_dst_origin,
713                    const size_t *p_region,
714                    cl_uint num_deps, const cl_event *d_deps,
715                    cl_event *rd_ev) try {
716    auto &q = obj(d_q);
717    auto &src_img = obj<image>(d_src_mem);
718    auto &dst_img = obj<image>(d_dst_mem);
719    auto deps = objs<wait_list_tag>(d_deps, num_deps);
720    auto region = vector(p_region);
721    auto dst_origin = vector(p_dst_origin);
722    auto src_origin = vector(p_src_origin);
723 
724    validate_common(q, deps);
725    validate_object(q, dst_img, dst_origin, region);
726    validate_object(q, src_img, src_origin, region);
727    validate_copy(q, dst_img, dst_origin, src_img, src_origin, region);
728 
729    auto hev = create<hard_event>(
730       q, CL_COMMAND_COPY_IMAGE, deps,
731       hard_copy_op(q, &dst_img, dst_origin,
732                    &src_img, src_origin,
733                    region));
734 
735    ret_object(rd_ev, hev);
736    return CL_SUCCESS;
737 
738 } catch (error &e) {
739    return e.get();
740 }
741 
742 CLOVER_API cl_int
clEnqueueCopyImageToBuffer(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_region,size_t dst_offset,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)743 clEnqueueCopyImageToBuffer(cl_command_queue d_q,
744                            cl_mem d_src_mem, cl_mem d_dst_mem,
745                            const size_t *p_src_origin, const size_t *p_region,
746                            size_t dst_offset,
747                            cl_uint num_deps, const cl_event *d_deps,
748                            cl_event *rd_ev) try {
749    auto &q = obj(d_q);
750    auto &src_img = obj<image>(d_src_mem);
751    auto &dst_mem = obj<buffer>(d_dst_mem);
752    auto deps = objs<wait_list_tag>(d_deps, num_deps);
753    auto region = vector(p_region);
754    vector_t dst_origin = { dst_offset };
755    auto dst_pitch = pitch(region, {{ src_img.pixel_size() }});
756    auto src_origin = vector(p_src_origin);
757    auto src_pitch = pitch(region, {{ src_img.pixel_size(),
758                                      src_img.row_pitch(),
759                                      src_img.slice_pitch() }});
760 
761    validate_common(q, deps);
762    validate_object(q, dst_mem, dst_origin, dst_pitch, region);
763    validate_object(q, src_img, src_origin, region);
764 
765    auto hev = create<hard_event>(
766       q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, deps,
767       soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
768                    &src_img, src_origin, src_pitch,
769                    region));
770 
771    ret_object(rd_ev, hev);
772    return CL_SUCCESS;
773 
774 } catch (error &e) {
775    return e.get();
776 }
777 
778 CLOVER_API cl_int
clEnqueueCopyBufferToImage(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,size_t src_offset,const size_t * p_dst_origin,const size_t * p_region,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)779 clEnqueueCopyBufferToImage(cl_command_queue d_q,
780                            cl_mem d_src_mem, cl_mem d_dst_mem,
781                            size_t src_offset,
782                            const size_t *p_dst_origin, const size_t *p_region,
783                            cl_uint num_deps, const cl_event *d_deps,
784                            cl_event *rd_ev) try {
785    auto &q = obj(d_q);
786    auto &src_mem = obj<buffer>(d_src_mem);
787    auto &dst_img = obj<image>(d_dst_mem);
788    auto deps = objs<wait_list_tag>(d_deps, num_deps);
789    auto region = vector(p_region);
790    auto dst_origin = vector(p_dst_origin);
791    auto dst_pitch = pitch(region, {{ dst_img.pixel_size(),
792                                      dst_img.row_pitch(),
793                                      dst_img.slice_pitch() }});
794    vector_t src_origin = { src_offset };
795    auto src_pitch = pitch(region, {{ dst_img.pixel_size() }});
796 
797    validate_common(q, deps);
798    validate_object(q, dst_img, dst_origin, region);
799    validate_object(q, src_mem, src_origin, src_pitch, region);
800 
801    auto hev = create<hard_event>(
802       q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, deps,
803       soft_copy_op(q, &dst_img, dst_origin, dst_pitch,
804                    &src_mem, src_origin, src_pitch,
805                    region));
806 
807    ret_object(rd_ev, hev);
808    return CL_SUCCESS;
809 
810 } catch (error &e) {
811    return e.get();
812 }
813 
814 CLOVER_API void *
clEnqueueMapBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,cl_map_flags flags,size_t offset,size_t size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev,cl_int * r_errcode)815 clEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
816                    cl_map_flags flags, size_t offset, size_t size,
817                    cl_uint num_deps, const cl_event *d_deps,
818                    cl_event *rd_ev, cl_int *r_errcode) try {
819    auto &q = obj(d_q);
820    auto &mem = obj<buffer>(d_mem);
821    auto deps = objs<wait_list_tag>(d_deps, num_deps);
822    vector_t region = { size, 1, 1 };
823    vector_t obj_origin = { offset };
824    auto obj_pitch = pitch(region, {{ 1 }});
825 
826    validate_common(q, deps);
827    validate_object(q, mem, obj_origin, obj_pitch, region);
828    validate_map_flags(mem, flags);
829 
830    auto *map = mem.resource_in(q).add_map(q, flags, blocking, obj_origin, region);
831 
832    auto hev = create<hard_event>(q, CL_COMMAND_MAP_BUFFER, deps);
833    if (blocking)
834        hev().wait_signalled();
835 
836    ret_object(rd_ev, hev);
837    ret_error(r_errcode, CL_SUCCESS);
838    return *map;
839 
840 } catch (error &e) {
841    ret_error(r_errcode, e);
842    return NULL;
843 }
844 
845 CLOVER_API void *
clEnqueueMapImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,cl_map_flags flags,const size_t * p_origin,const size_t * p_region,size_t * row_pitch,size_t * slice_pitch,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev,cl_int * r_errcode)846 clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
847                   cl_map_flags flags,
848                   const size_t *p_origin, const size_t *p_region,
849                   size_t *row_pitch, size_t *slice_pitch,
850                   cl_uint num_deps, const cl_event *d_deps,
851                   cl_event *rd_ev, cl_int *r_errcode) try {
852    auto &q = obj(d_q);
853    auto &img = obj<image>(d_mem);
854    auto deps = objs<wait_list_tag>(d_deps, num_deps);
855    auto region = vector(p_region);
856    auto origin = vector(p_origin);
857 
858    validate_common(q, deps);
859    validate_object(q, img, origin, region);
860    validate_map_flags(img, flags);
861 
862    if (!row_pitch)
863       throw error(CL_INVALID_VALUE);
864 
865    if (img.slice_pitch() && !slice_pitch)
866       throw error(CL_INVALID_VALUE);
867 
868    auto *map = img.resource_in(q).add_map(q, flags, blocking, origin, region);
869    *row_pitch = map->pitch()[1];
870    if (slice_pitch)
871       *slice_pitch = map->pitch()[2];
872 
873    auto hev = create<hard_event>(q, CL_COMMAND_MAP_IMAGE, deps);
874    if (blocking)
875        hev().wait_signalled();
876 
877    ret_object(rd_ev, hev);
878    ret_error(r_errcode, CL_SUCCESS);
879    return *map;
880 
881 } catch (error &e) {
882    ret_error(r_errcode, e);
883    return NULL;
884 }
885 
886 CLOVER_API cl_int
clEnqueueUnmapMemObject(cl_command_queue d_q,cl_mem d_mem,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)887 clEnqueueUnmapMemObject(cl_command_queue d_q, cl_mem d_mem, void *ptr,
888                         cl_uint num_deps, const cl_event *d_deps,
889                         cl_event *rd_ev) try {
890    auto &q = obj(d_q);
891    auto &mem = obj(d_mem);
892    auto deps = objs<wait_list_tag>(d_deps, num_deps);
893 
894    validate_common(q, deps);
895 
896    auto hev = create<hard_event>(
897       q, CL_COMMAND_UNMAP_MEM_OBJECT, deps,
898       [=, &q, &mem](event &) {
899          mem.resource_in(q).del_map(ptr);
900       });
901 
902    ret_object(rd_ev, hev);
903    return CL_SUCCESS;
904 
905 } catch (error &e) {
906    return e.get();
907 }
908 
909 CLOVER_API cl_int
clEnqueueMigrateMemObjects(cl_command_queue d_q,cl_uint num_mems,const cl_mem * d_mems,cl_mem_migration_flags flags,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)910 clEnqueueMigrateMemObjects(cl_command_queue d_q,
911                            cl_uint num_mems,
912                            const cl_mem *d_mems,
913                            cl_mem_migration_flags flags,
914                            cl_uint num_deps,
915                            const cl_event *d_deps,
916                            cl_event *rd_ev) try {
917    auto &q = obj(d_q);
918    auto mems = objs<memory_obj>(d_mems, num_mems);
919    auto deps = objs<wait_list_tag>(d_deps, num_deps);
920 
921    validate_common(q, deps);
922    validate_mem_migration_flags(flags);
923 
924    if (any_of([&](const memory_obj &m) {
925          return m.context() != q.context();
926          }, mems))
927       throw error(CL_INVALID_CONTEXT);
928 
929    auto hev = create<hard_event>(
930       q, CL_COMMAND_MIGRATE_MEM_OBJECTS, deps,
931       [=, &q](event &) {
932          for (auto &mem: mems) {
933             if (flags & CL_MIGRATE_MEM_OBJECT_HOST) {
934                if ((flags & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED))
935                   mem.resource_out(q);
936 
937                // For flags == CL_MIGRATE_MEM_OBJECT_HOST only to be
938                // efficient we would need cl*ReadBuffer* to implement
939                // reading from host memory.
940 
941             } else {
942                if (flags & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED)
943                   mem.resource_undef(q);
944                else
945                   mem.resource_in(q);
946             }
947          }
948       });
949 
950    ret_object(rd_ev, hev);
951    return CL_SUCCESS;;
952 
953 } catch (error &e) {
954    return e.get();
955 }
956 
957 cl_int
EnqueueSVMFree(cl_command_queue d_q,cl_uint num_svm_pointers,void * svm_pointers[],void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue,cl_uint num_svm_pointers,void * svm_pointers[],void * user_data),void * user_data,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)958 clover::EnqueueSVMFree(cl_command_queue d_q,
959                        cl_uint num_svm_pointers,
960                        void *svm_pointers[],
961                        void (CL_CALLBACK *pfn_free_func) (
962                            cl_command_queue queue, cl_uint num_svm_pointers,
963                            void *svm_pointers[], void *user_data),
964                        void *user_data,
965                        cl_uint num_events_in_wait_list,
966                        const cl_event *event_wait_list,
967                        cl_event *event,
968                        cl_int cmd) try {
969 
970    if (bool(num_svm_pointers) != bool(svm_pointers))
971       return CL_INVALID_VALUE;
972 
973    auto &q = obj(d_q);
974 
975    if (!q.device().svm_support())
976       return CL_INVALID_OPERATION;
977 
978    bool can_emulate = q.device().has_system_svm();
979    auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
980 
981    validate_common(q, deps);
982 
983    std::vector<void *> svm_pointers_cpy(svm_pointers,
984                                         svm_pointers + num_svm_pointers);
985    if (!pfn_free_func) {
986       if (!can_emulate) {
987          CLOVER_NOT_SUPPORTED_UNTIL("2.0");
988          return CL_INVALID_VALUE;
989       }
990       pfn_free_func = [](cl_command_queue d_q, cl_uint num_svm_pointers,
991                          void *svm_pointers[], void *) {
992          clover::context &ctx = obj(d_q).context();
993          for (void *p : range(svm_pointers, num_svm_pointers)) {
994             ctx.remove_svm_allocation(p);
995             free(p);
996          }
997       };
998    }
999 
1000    auto hev = create<hard_event>(q, cmd, deps,
1001       [=](clover::event &) mutable {
1002          pfn_free_func(d_q, num_svm_pointers, svm_pointers_cpy.data(),
1003                        user_data);
1004       });
1005 
1006    ret_object(event, hev);
1007    return CL_SUCCESS;
1008 
1009 } catch (error &e) {
1010    return e.get();
1011 }
1012 
1013 CLOVER_API cl_int
clEnqueueSVMFree(cl_command_queue d_q,cl_uint num_svm_pointers,void * svm_pointers[],void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue,cl_uint num_svm_pointers,void * svm_pointers[],void * user_data),void * user_data,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1014 clEnqueueSVMFree(cl_command_queue d_q,
1015                  cl_uint num_svm_pointers,
1016                  void *svm_pointers[],
1017                  void (CL_CALLBACK *pfn_free_func) (
1018                     cl_command_queue queue, cl_uint num_svm_pointers,
1019                     void *svm_pointers[], void *user_data),
1020                  void *user_data,
1021                  cl_uint num_events_in_wait_list,
1022                  const cl_event *event_wait_list,
1023                  cl_event *event) {
1024 
1025    return EnqueueSVMFree(d_q, num_svm_pointers, svm_pointers,
1026                          pfn_free_func, user_data, num_events_in_wait_list,
1027                          event_wait_list, event, CL_COMMAND_SVM_FREE);
1028 }
1029 
1030 cl_int
EnqueueSVMMemcpy(cl_command_queue d_q,cl_bool blocking_copy,void * dst_ptr,const void * src_ptr,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)1031 clover::EnqueueSVMMemcpy(cl_command_queue d_q,
1032                          cl_bool blocking_copy,
1033                          void *dst_ptr,
1034                          const void *src_ptr,
1035                          size_t size,
1036                          cl_uint num_events_in_wait_list,
1037                          const cl_event *event_wait_list,
1038                          cl_event *event,
1039                          cl_int cmd) try {
1040    auto &q = obj(d_q);
1041 
1042    if (!q.device().svm_support())
1043       return CL_INVALID_OPERATION;
1044 
1045    if (dst_ptr == nullptr || src_ptr == nullptr)
1046       return CL_INVALID_VALUE;
1047 
1048    if (static_cast<size_t>(abs(reinterpret_cast<ptrdiff_t>(dst_ptr) -
1049                                reinterpret_cast<ptrdiff_t>(src_ptr))) < size)
1050       return CL_MEM_COPY_OVERLAP;
1051 
1052 
1053    bool can_emulate = q.device().has_system_svm();
1054    auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1055 
1056    validate_common(q, deps);
1057 
1058    if (can_emulate) {
1059       auto hev = create<hard_event>(q, cmd, deps,
1060          [=](clover::event &) {
1061             memcpy(dst_ptr, src_ptr, size);
1062          });
1063 
1064       if (blocking_copy)
1065          hev().wait();
1066       ret_object(event, hev);
1067       return CL_SUCCESS;
1068    }
1069 
1070    CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1071    return CL_INVALID_VALUE;
1072 
1073 } catch (error &e) {
1074    return e.get();
1075 }
1076 
1077 CLOVER_API cl_int
clEnqueueSVMMemcpy(cl_command_queue d_q,cl_bool blocking_copy,void * dst_ptr,const void * src_ptr,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1078 clEnqueueSVMMemcpy(cl_command_queue d_q,
1079                    cl_bool blocking_copy,
1080                    void *dst_ptr,
1081                    const void *src_ptr,
1082                    size_t size,
1083                    cl_uint num_events_in_wait_list,
1084                    const cl_event *event_wait_list,
1085                    cl_event *event) {
1086 
1087    return EnqueueSVMMemcpy(d_q, blocking_copy, dst_ptr, src_ptr,
1088                            size, num_events_in_wait_list, event_wait_list,
1089                            event, CL_COMMAND_SVM_MEMCPY);
1090 }
1091 
1092 cl_int
EnqueueSVMMemFill(cl_command_queue d_q,void * svm_ptr,const void * pattern,size_t pattern_size,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)1093 clover::EnqueueSVMMemFill(cl_command_queue d_q,
1094                           void *svm_ptr,
1095                           const void *pattern,
1096                           size_t pattern_size,
1097                           size_t size,
1098                           cl_uint num_events_in_wait_list,
1099                           const cl_event *event_wait_list,
1100                           cl_event *event,
1101                           cl_int cmd) try {
1102    auto &q = obj(d_q);
1103 
1104    if (!q.device().svm_support())
1105       return CL_INVALID_OPERATION;
1106 
1107    if (svm_ptr == nullptr || pattern == nullptr ||
1108        !util_is_power_of_two_nonzero(pattern_size) ||
1109        pattern_size > 128 ||
1110        !ptr_is_aligned(svm_ptr, pattern_size) ||
1111        size % pattern_size)
1112       return CL_INVALID_VALUE;
1113 
1114    bool can_emulate = q.device().has_system_svm();
1115    auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1116 
1117    validate_common(q, deps);
1118 
1119    if (can_emulate) {
1120       auto hev = create<hard_event>(q, cmd, deps,
1121          [=](clover::event &) {
1122             void *ptr = svm_ptr;
1123             for (size_t s = size; s; s -= pattern_size) {
1124                memcpy(ptr, pattern, pattern_size);
1125                ptr = static_cast<uint8_t*>(ptr) + pattern_size;
1126             }
1127          });
1128 
1129       ret_object(event, hev);
1130       return CL_SUCCESS;
1131    }
1132 
1133    CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1134    return CL_INVALID_VALUE;
1135 
1136 } catch (error &e) {
1137    return e.get();
1138 }
1139 
1140 CLOVER_API cl_int
clEnqueueSVMMemFill(cl_command_queue d_q,void * svm_ptr,const void * pattern,size_t pattern_size,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1141 clEnqueueSVMMemFill(cl_command_queue d_q,
1142                     void *svm_ptr,
1143                     const void *pattern,
1144                     size_t pattern_size,
1145                     size_t size,
1146                     cl_uint num_events_in_wait_list,
1147                     const cl_event *event_wait_list,
1148                     cl_event *event) {
1149 
1150    return EnqueueSVMMemFill(d_q, svm_ptr, pattern, pattern_size,
1151                             size, num_events_in_wait_list, event_wait_list,
1152                             event, CL_COMMAND_SVM_MEMFILL);
1153 }
1154 
1155 cl_int
EnqueueSVMMap(cl_command_queue d_q,cl_bool blocking_map,cl_map_flags map_flags,void * svm_ptr,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)1156 clover::EnqueueSVMMap(cl_command_queue d_q,
1157                       cl_bool blocking_map,
1158                       cl_map_flags map_flags,
1159                       void *svm_ptr,
1160                       size_t size,
1161                       cl_uint num_events_in_wait_list,
1162                       const cl_event *event_wait_list,
1163                       cl_event *event,
1164                       cl_int cmd) try {
1165    auto &q = obj(d_q);
1166 
1167    if (!q.device().svm_support())
1168       return CL_INVALID_OPERATION;
1169 
1170    if (svm_ptr == nullptr || size == 0)
1171       return CL_INVALID_VALUE;
1172 
1173    bool can_emulate = q.device().has_system_svm();
1174    auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1175 
1176    validate_common(q, deps);
1177 
1178    if (can_emulate) {
1179       auto hev = create<hard_event>(q, cmd, deps,
1180          [](clover::event &) { });
1181 
1182       ret_object(event, hev);
1183       return CL_SUCCESS;
1184    }
1185 
1186    CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1187    return CL_INVALID_VALUE;
1188 
1189 } catch (error &e) {
1190    return e.get();
1191 }
1192 
1193 CLOVER_API cl_int
clEnqueueSVMMap(cl_command_queue d_q,cl_bool blocking_map,cl_map_flags map_flags,void * svm_ptr,size_t size,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1194 clEnqueueSVMMap(cl_command_queue d_q,
1195                 cl_bool blocking_map,
1196                 cl_map_flags map_flags,
1197                 void *svm_ptr,
1198                 size_t size,
1199                 cl_uint num_events_in_wait_list,
1200                 const cl_event *event_wait_list,
1201                 cl_event *event) {
1202 
1203    return EnqueueSVMMap(d_q, blocking_map, map_flags, svm_ptr, size,
1204                         num_events_in_wait_list, event_wait_list, event,
1205                         CL_COMMAND_SVM_MAP);
1206 }
1207 
1208 cl_int
EnqueueSVMUnmap(cl_command_queue d_q,void * svm_ptr,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event,cl_int cmd)1209 clover::EnqueueSVMUnmap(cl_command_queue d_q,
1210                         void *svm_ptr,
1211                         cl_uint num_events_in_wait_list,
1212                         const cl_event *event_wait_list,
1213                         cl_event *event,
1214                         cl_int cmd) try {
1215    auto &q = obj(d_q);
1216 
1217    if (!q.device().svm_support())
1218       return CL_INVALID_OPERATION;
1219 
1220    if (svm_ptr == nullptr)
1221       return CL_INVALID_VALUE;
1222 
1223    bool can_emulate = q.device().has_system_svm();
1224    auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
1225 
1226    validate_common(q, deps);
1227 
1228    if (can_emulate) {
1229       auto hev = create<hard_event>(q, cmd, deps,
1230          [](clover::event &) { });
1231 
1232       ret_object(event, hev);
1233       return CL_SUCCESS;
1234    }
1235 
1236    CLOVER_NOT_SUPPORTED_UNTIL("2.0");
1237    return CL_INVALID_VALUE;
1238 
1239 } catch (error &e) {
1240    return e.get();
1241 }
1242 
1243 CLOVER_API cl_int
clEnqueueSVMUnmap(cl_command_queue d_q,void * svm_ptr,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)1244 clEnqueueSVMUnmap(cl_command_queue d_q,
1245                   void *svm_ptr,
1246                   cl_uint num_events_in_wait_list,
1247                   const cl_event *event_wait_list,
1248                   cl_event *event) {
1249 
1250    return EnqueueSVMUnmap(d_q, svm_ptr, num_events_in_wait_list,
1251                           event_wait_list, event, CL_COMMAND_SVM_UNMAP);
1252 }
1253 
1254 CLOVER_API cl_int
clEnqueueSVMMigrateMem(cl_command_queue d_q,cl_uint num_svm_pointers,const void ** svm_pointers,const size_t * sizes,const cl_mem_migration_flags flags,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)1255 clEnqueueSVMMigrateMem(cl_command_queue d_q,
1256                        cl_uint num_svm_pointers,
1257                        const void **svm_pointers,
1258                        const size_t *sizes,
1259                        const cl_mem_migration_flags flags,
1260                        cl_uint num_deps,
1261                        const cl_event *d_deps,
1262                        cl_event *rd_ev) try {
1263    auto &q = obj(d_q);
1264    auto deps = objs<wait_list_tag>(d_deps, num_deps);
1265 
1266    validate_common(q, deps);
1267    validate_mem_migration_flags(flags);
1268 
1269    if (!q.device().svm_support())
1270       return CL_INVALID_OPERATION;
1271 
1272    if (!num_svm_pointers || !svm_pointers)
1273       return CL_INVALID_VALUE;
1274 
1275    std::vector<size_t> sizes_copy(num_svm_pointers);
1276    std::vector<const void*>  ptrs(num_svm_pointers);
1277 
1278    for (unsigned i = 0; i < num_svm_pointers; ++i) {
1279       const void *ptr = svm_pointers[i];
1280       size_t size = sizes ? sizes[i] : 0;
1281       if (!ptr)
1282          return CL_INVALID_VALUE;
1283 
1284       auto p = q.context().find_svm_allocation(ptr);
1285       if (!p.first)
1286          return CL_INVALID_VALUE;
1287 
1288       std::ptrdiff_t pdiff = (uint8_t*)ptr - (uint8_t*)p.first;
1289       if (size && size + pdiff > p.second)
1290          return CL_INVALID_VALUE;
1291 
1292       sizes_copy[i] = size ? size : p.second;
1293       ptrs[i] = size ? svm_pointers[i] : p.first;
1294    }
1295 
1296    auto hev = create<hard_event>(
1297       q, CL_COMMAND_MIGRATE_MEM_OBJECTS, deps,
1298       [=, &q](event &) {
1299          q.svm_migrate(ptrs, sizes_copy, flags);
1300       });
1301 
1302    ret_object(rd_ev, hev);
1303    return CL_SUCCESS;
1304 
1305 } catch (error &e) {
1306    return e.get();
1307 }
1308