• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 //
22 
23 #include <cstring>
24 
25 #include "api/util.hpp"
26 #include "core/event.hpp"
27 #include "core/memory.hpp"
28 
29 using namespace clover;
30 
31 namespace {
32    typedef resource::vector vector_t;
33 
34    vector_t
vector(const size_t * p)35    vector(const size_t *p) {
36       return range(p, 3);
37    }
38 
39    vector_t
pitch(const vector_t & region,vector_t pitch)40    pitch(const vector_t &region, vector_t pitch) {
41       for (auto x : zip(tail(pitch),
42                         map(multiplies(), region, pitch))) {
43          // The spec defines a value of zero as the natural pitch,
44          // i.e. the unaligned size of the previous dimension.
45          if (std::get<0>(x) == 0)
46             std::get<0>(x) = std::get<1>(x);
47       }
48 
49       return pitch;
50    }
51 
52    ///
53    /// Size of a region in bytes.
54    ///
55    size_t
size(const vector_t & pitch,const vector_t & region)56    size(const vector_t &pitch, const vector_t &region) {
57       if (any_of(is_zero(), region))
58          return 0;
59       else
60          return dot(pitch, region - vector_t{ 0, 1, 1 });
61    }
62 
63    ///
64    /// Common argument checking shared by memory transfer commands.
65    ///
66    void
validate_common(command_queue & q,const ref_vector<event> & deps)67    validate_common(command_queue &q,
68                    const ref_vector<event> &deps) {
69       if (any_of([&](const event &ev) {
70                return ev.context() != q.context();
71             }, deps))
72          throw error(CL_INVALID_CONTEXT);
73    }
74 
75    ///
76    /// Common error checking for a buffer object argument.
77    ///
78    void
validate_object(command_queue & q,buffer & mem,const vector_t & origin,const vector_t & pitch,const vector_t & region)79    validate_object(command_queue &q, buffer &mem, const vector_t &origin,
80                    const vector_t &pitch, const vector_t &region) {
81       if (mem.context() != q.context())
82          throw error(CL_INVALID_CONTEXT);
83 
84       // The region must fit within the specified pitch,
85       if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
86          throw error(CL_INVALID_VALUE);
87 
88       // ...and within the specified object.
89       if (dot(pitch, origin) + size(pitch, region) > mem.size())
90          throw error(CL_INVALID_VALUE);
91 
92       if (any_of(is_zero(), region))
93          throw error(CL_INVALID_VALUE);
94    }
95 
96    ///
97    /// Common error checking for an image argument.
98    ///
99    void
validate_object(command_queue & q,image & img,const vector_t & orig,const vector_t & region)100    validate_object(command_queue &q, image &img,
101                    const vector_t &orig, const vector_t &region) {
102       vector_t size = { img.width(), img.height(), img.depth() };
103 
104       if (!q.device().image_support())
105          throw error(CL_INVALID_OPERATION);
106 
107       if (img.context() != q.context())
108          throw error(CL_INVALID_CONTEXT);
109 
110       if (any_of(greater(), orig + region, size))
111          throw error(CL_INVALID_VALUE);
112 
113       if (any_of(is_zero(), region))
114          throw error(CL_INVALID_VALUE);
115    }
116 
117    ///
118    /// Common error checking for a host pointer argument.
119    ///
120    void
validate_object(command_queue & q,const void * ptr,const vector_t & orig,const vector_t & pitch,const vector_t & region)121    validate_object(command_queue &q, const void *ptr, const vector_t &orig,
122                    const vector_t &pitch, const vector_t &region) {
123       if (!ptr)
124          throw error(CL_INVALID_VALUE);
125 
126       // The region must fit within the specified pitch.
127       if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
128          throw error(CL_INVALID_VALUE);
129    }
130 
131    ///
132    /// Common argument checking for a copy between two buffer objects.
133    ///
134    void
validate_copy(command_queue & q,buffer & dst_mem,const vector_t & dst_orig,const vector_t & dst_pitch,buffer & src_mem,const vector_t & src_orig,const vector_t & src_pitch,const vector_t & region)135    validate_copy(command_queue &q, buffer &dst_mem,
136                  const vector_t &dst_orig, const vector_t &dst_pitch,
137                  buffer &src_mem,
138                  const vector_t &src_orig, const vector_t &src_pitch,
139                  const vector_t &region) {
140       if (dst_mem == src_mem) {
141          auto dst_offset = dot(dst_pitch, dst_orig);
142          auto src_offset = dot(src_pitch, src_orig);
143 
144          if (interval_overlaps()(
145                 dst_offset, dst_offset + size(dst_pitch, region),
146                 src_offset, src_offset + size(src_pitch, region)))
147             throw error(CL_MEM_COPY_OVERLAP);
148       }
149    }
150 
151    ///
152    /// Common argument checking for a copy between two image objects.
153    ///
154    void
validate_copy(command_queue & q,image & dst_img,const vector_t & dst_orig,image & src_img,const vector_t & src_orig,const vector_t & region)155    validate_copy(command_queue &q,
156                  image &dst_img, const vector_t &dst_orig,
157                  image &src_img, const vector_t &src_orig,
158                  const vector_t &region) {
159       if (dst_img.format() != src_img.format())
160          throw error(CL_IMAGE_FORMAT_MISMATCH);
161 
162       if (dst_img == src_img) {
163          if (all_of(interval_overlaps(),
164                     dst_orig, dst_orig + region,
165                     src_orig, src_orig + region))
166             throw error(CL_MEM_COPY_OVERLAP);
167       }
168    }
169 
170    ///
171    /// Checks that the host access flags of the memory object are
172    /// within the allowed set \a flags.
173    ///
174    void
validate_object_access(const memory_obj & mem,const cl_mem_flags flags)175    validate_object_access(const memory_obj &mem, const cl_mem_flags flags) {
176       if (mem.flags() & ~flags &
177           (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY |
178            CL_MEM_HOST_NO_ACCESS))
179          throw error(CL_INVALID_OPERATION);
180    }
181 
182    ///
183    /// Checks that the mapping flags are correct.
184    ///
185    void
validate_map_flags(const memory_obj & mem,const cl_map_flags flags)186    validate_map_flags(const memory_obj &mem, const cl_map_flags flags) {
187       if ((flags & (CL_MAP_WRITE | CL_MAP_READ)) &&
188           (flags & CL_MAP_WRITE_INVALIDATE_REGION))
189          throw error(CL_INVALID_VALUE);
190 
191       if (flags & CL_MAP_READ)
192          validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
193 
194       if (flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
195          validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
196    }
197 
198    ///
199    /// Class that encapsulates the task of mapping an object of type
200    /// \a T.  The return value of get() should be implicitly
201    /// convertible to \a void *.
202    ///
203    template<typename T>
204    struct _map {
205       static mapping
get__anon25df39fe0111::_map206       get(command_queue &q, T obj, cl_map_flags flags,
207           size_t offset, size_t size) {
208          return { q, obj->resource(q), flags, true,
209                   {{ offset }}, {{ size, 1, 1 }} };
210       }
211    };
212 
213    template<>
214    struct _map<void *> {
215       static void *
get__anon25df39fe0111::_map216       get(command_queue &q, void *obj, cl_map_flags flags,
217           size_t offset, size_t size) {
218          return (char *)obj + offset;
219       }
220    };
221 
222    template<>
223    struct _map<const void *> {
224       static const void *
get__anon25df39fe0111::_map225       get(command_queue &q, const void *obj, cl_map_flags flags,
226           size_t offset, size_t size) {
227          return (const char *)obj + offset;
228       }
229    };
230 
231    ///
232    /// Software copy from \a src_obj to \a dst_obj.  They can be
233    /// either pointers or memory objects.
234    ///
235    template<typename T, typename S>
236    std::function<void (event &)>
soft_copy_op(command_queue & q,T dst_obj,const vector_t & dst_orig,const vector_t & dst_pitch,S src_obj,const vector_t & src_orig,const vector_t & src_pitch,const vector_t & region)237    soft_copy_op(command_queue &q,
238                 T dst_obj, const vector_t &dst_orig, const vector_t &dst_pitch,
239                 S src_obj, const vector_t &src_orig, const vector_t &src_pitch,
240                 const vector_t &region) {
241       return [=, &q](event &) {
242          auto dst = _map<T>::get(q, dst_obj, CL_MAP_WRITE,
243                                  dot(dst_pitch, dst_orig),
244                                  size(dst_pitch, region));
245          auto src = _map<S>::get(q, src_obj, CL_MAP_READ,
246                                  dot(src_pitch, src_orig),
247                                  size(src_pitch, region));
248          vector_t v = {};
249 
250          for (v[2] = 0; v[2] < region[2]; ++v[2]) {
251             for (v[1] = 0; v[1] < region[1]; ++v[1]) {
252                std::memcpy(
253                   static_cast<char *>(dst) + dot(dst_pitch, v),
254                   static_cast<const char *>(src) + dot(src_pitch, v),
255                   src_pitch[0] * region[0]);
256             }
257          }
258       };
259    }
260 
261    ///
262    /// Hardware copy from \a src_obj to \a dst_obj.
263    ///
264    template<typename T, typename S>
265    std::function<void (event &)>
hard_copy_op(command_queue & q,T dst_obj,const vector_t & dst_orig,S src_obj,const vector_t & src_orig,const vector_t & region)266    hard_copy_op(command_queue &q, T dst_obj, const vector_t &dst_orig,
267                 S src_obj, const vector_t &src_orig, const vector_t &region) {
268       return [=, &q](event &) {
269          dst_obj->resource(q).copy(q, dst_orig, region,
270                                    src_obj->resource(q), src_orig);
271       };
272    }
273 }
274 
275 CLOVER_API cl_int
clEnqueueReadBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,size_t offset,size_t size,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)276 clEnqueueReadBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
277                     size_t offset, size_t size, void *ptr,
278                     cl_uint num_deps, const cl_event *d_deps,
279                     cl_event *rd_ev) try {
280    auto &q = obj(d_q);
281    auto &mem = obj<buffer>(d_mem);
282    auto deps = objs<wait_list_tag>(d_deps, num_deps);
283    vector_t region = { size, 1, 1 };
284    vector_t obj_origin = { offset };
285    auto obj_pitch = pitch(region, {{ 1 }});
286 
287    validate_common(q, deps);
288    validate_object(q, ptr, {}, obj_pitch, region);
289    validate_object(q, mem, obj_origin, obj_pitch, region);
290    validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
291 
292    auto hev = create<hard_event>(
293       q, CL_COMMAND_READ_BUFFER, deps,
294       soft_copy_op(q, ptr, {}, obj_pitch,
295                    &mem, obj_origin, obj_pitch,
296                    region));
297 
298    if (blocking)
299        hev().wait_signalled();
300 
301    ret_object(rd_ev, hev);
302    return CL_SUCCESS;
303 
304 } catch (error &e) {
305    return e.get();
306 }
307 
308 CLOVER_API cl_int
clEnqueueWriteBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,size_t offset,size_t size,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)309 clEnqueueWriteBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
310                      size_t offset, size_t size, const void *ptr,
311                      cl_uint num_deps, const cl_event *d_deps,
312                      cl_event *rd_ev) try {
313    auto &q = obj(d_q);
314    auto &mem = obj<buffer>(d_mem);
315    auto deps = objs<wait_list_tag>(d_deps, num_deps);
316    vector_t region = { size, 1, 1 };
317    vector_t obj_origin = { offset };
318    auto obj_pitch = pitch(region, {{ 1 }});
319 
320    validate_common(q, deps);
321    validate_object(q, mem, obj_origin, obj_pitch, region);
322    validate_object(q, ptr, {}, obj_pitch, region);
323    validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
324 
325    auto hev = create<hard_event>(
326       q, CL_COMMAND_WRITE_BUFFER, deps,
327       soft_copy_op(q, &mem, obj_origin, obj_pitch,
328                    ptr, {}, obj_pitch,
329                    region));
330 
331    if (blocking)
332        hev().wait_signalled();
333 
334    ret_object(rd_ev, hev);
335    return CL_SUCCESS;
336 
337 } catch (error &e) {
338    return e.get();
339 }
340 
341 CLOVER_API cl_int
clEnqueueReadBufferRect(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_obj_origin,const size_t * p_host_origin,const size_t * p_region,size_t obj_row_pitch,size_t obj_slice_pitch,size_t host_row_pitch,size_t host_slice_pitch,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)342 clEnqueueReadBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
343                         const size_t *p_obj_origin,
344                         const size_t *p_host_origin,
345                         const size_t *p_region,
346                         size_t obj_row_pitch, size_t obj_slice_pitch,
347                         size_t host_row_pitch, size_t host_slice_pitch,
348                         void *ptr,
349                         cl_uint num_deps, const cl_event *d_deps,
350                         cl_event *rd_ev) try {
351    auto &q = obj(d_q);
352    auto &mem = obj<buffer>(d_mem);
353    auto deps = objs<wait_list_tag>(d_deps, num_deps);
354    auto region = vector(p_region);
355    auto obj_origin = vector(p_obj_origin);
356    auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
357    auto host_origin = vector(p_host_origin);
358    auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
359 
360    validate_common(q, deps);
361    validate_object(q, ptr, host_origin, host_pitch, region);
362    validate_object(q, mem, obj_origin, obj_pitch, region);
363    validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
364 
365    auto hev = create<hard_event>(
366       q, CL_COMMAND_READ_BUFFER_RECT, deps,
367       soft_copy_op(q, ptr, host_origin, host_pitch,
368                    &mem, obj_origin, obj_pitch,
369                    region));
370 
371    if (blocking)
372        hev().wait_signalled();
373 
374    ret_object(rd_ev, hev);
375    return CL_SUCCESS;
376 
377 } catch (error &e) {
378    return e.get();
379 }
380 
381 CLOVER_API cl_int
clEnqueueWriteBufferRect(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_obj_origin,const size_t * p_host_origin,const size_t * p_region,size_t obj_row_pitch,size_t obj_slice_pitch,size_t host_row_pitch,size_t host_slice_pitch,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)382 clEnqueueWriteBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
383                          const size_t *p_obj_origin,
384                          const size_t *p_host_origin,
385                          const size_t *p_region,
386                          size_t obj_row_pitch, size_t obj_slice_pitch,
387                          size_t host_row_pitch, size_t host_slice_pitch,
388                          const void *ptr,
389                          cl_uint num_deps, const cl_event *d_deps,
390                          cl_event *rd_ev) try {
391    auto &q = obj(d_q);
392    auto &mem = obj<buffer>(d_mem);
393    auto deps = objs<wait_list_tag>(d_deps, num_deps);
394    auto region = vector(p_region);
395    auto obj_origin = vector(p_obj_origin);
396    auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
397    auto host_origin = vector(p_host_origin);
398    auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
399 
400    validate_common(q, deps);
401    validate_object(q, mem, obj_origin, obj_pitch, region);
402    validate_object(q, ptr, host_origin, host_pitch, region);
403    validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
404 
405    auto hev = create<hard_event>(
406       q, CL_COMMAND_WRITE_BUFFER_RECT, deps,
407       soft_copy_op(q, &mem, obj_origin, obj_pitch,
408                    ptr, host_origin, host_pitch,
409                    region));
410 
411    if (blocking)
412        hev().wait_signalled();
413 
414    ret_object(rd_ev, hev);
415    return CL_SUCCESS;
416 
417 } catch (error &e) {
418    return e.get();
419 }
420 
421 CLOVER_API cl_int
clEnqueueCopyBuffer(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,size_t src_offset,size_t dst_offset,size_t size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)422 clEnqueueCopyBuffer(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
423                     size_t src_offset, size_t dst_offset, size_t size,
424                     cl_uint num_deps, const cl_event *d_deps,
425                     cl_event *rd_ev) try {
426    auto &q = obj(d_q);
427    auto &src_mem = obj<buffer>(d_src_mem);
428    auto &dst_mem = obj<buffer>(d_dst_mem);
429    auto deps = objs<wait_list_tag>(d_deps, num_deps);
430    vector_t region = { size, 1, 1 };
431    vector_t dst_origin = { dst_offset };
432    auto dst_pitch = pitch(region, {{ 1 }});
433    vector_t src_origin = { src_offset };
434    auto src_pitch = pitch(region, {{ 1 }});
435 
436    validate_common(q, deps);
437    validate_object(q, dst_mem, dst_origin, dst_pitch, region);
438    validate_object(q, src_mem, src_origin, src_pitch, region);
439    validate_copy(q, dst_mem, dst_origin, dst_pitch,
440                  src_mem, src_origin, src_pitch, region);
441 
442    auto hev = create<hard_event>(
443       q, CL_COMMAND_COPY_BUFFER, deps,
444       hard_copy_op(q, &dst_mem, dst_origin,
445                    &src_mem, src_origin, region));
446 
447    ret_object(rd_ev, hev);
448    return CL_SUCCESS;
449 
450 } catch (error &e) {
451    return e.get();
452 }
453 
454 CLOVER_API cl_int
clEnqueueCopyBufferRect(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_dst_origin,const size_t * p_region,size_t src_row_pitch,size_t src_slice_pitch,size_t dst_row_pitch,size_t dst_slice_pitch,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)455 clEnqueueCopyBufferRect(cl_command_queue d_q, cl_mem d_src_mem,
456                         cl_mem d_dst_mem,
457                         const size_t *p_src_origin, const size_t *p_dst_origin,
458                         const size_t *p_region,
459                         size_t src_row_pitch, size_t src_slice_pitch,
460                         size_t dst_row_pitch, size_t dst_slice_pitch,
461                         cl_uint num_deps, const cl_event *d_deps,
462                         cl_event *rd_ev) try {
463    auto &q = obj(d_q);
464    auto &src_mem = obj<buffer>(d_src_mem);
465    auto &dst_mem = obj<buffer>(d_dst_mem);
466    auto deps = objs<wait_list_tag>(d_deps, num_deps);
467    auto region = vector(p_region);
468    auto dst_origin = vector(p_dst_origin);
469    auto dst_pitch = pitch(region, {{ 1, dst_row_pitch, dst_slice_pitch }});
470    auto src_origin = vector(p_src_origin);
471    auto src_pitch = pitch(region, {{ 1, src_row_pitch, src_slice_pitch }});
472 
473    validate_common(q, deps);
474    validate_object(q, dst_mem, dst_origin, dst_pitch, region);
475    validate_object(q, src_mem, src_origin, src_pitch, region);
476    validate_copy(q, dst_mem, dst_origin, dst_pitch,
477                  src_mem, src_origin, src_pitch, region);
478 
479    auto hev = create<hard_event>(
480       q, CL_COMMAND_COPY_BUFFER_RECT, deps,
481       soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
482                    &src_mem, src_origin, src_pitch,
483                    region));
484 
485    ret_object(rd_ev, hev);
486    return CL_SUCCESS;
487 
488 } catch (error &e) {
489    return e.get();
490 }
491 
492 CLOVER_API cl_int
clEnqueueReadImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_origin,const size_t * p_region,size_t row_pitch,size_t slice_pitch,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)493 clEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
494                    const size_t *p_origin, const size_t *p_region,
495                    size_t row_pitch, size_t slice_pitch, void *ptr,
496                    cl_uint num_deps, const cl_event *d_deps,
497                    cl_event *rd_ev) try {
498    auto &q = obj(d_q);
499    auto &img = obj<image>(d_mem);
500    auto deps = objs<wait_list_tag>(d_deps, num_deps);
501    auto region = vector(p_region);
502    auto dst_pitch = pitch(region, {{ img.pixel_size(),
503                                      row_pitch, slice_pitch }});
504    auto src_origin = vector(p_origin);
505    auto src_pitch = pitch(region, {{ img.pixel_size(),
506                                      img.row_pitch(), img.slice_pitch() }});
507 
508    validate_common(q, deps);
509    validate_object(q, ptr, {}, dst_pitch, region);
510    validate_object(q, img, src_origin, region);
511    validate_object_access(img, CL_MEM_HOST_READ_ONLY);
512 
513    auto hev = create<hard_event>(
514       q, CL_COMMAND_READ_IMAGE, deps,
515       soft_copy_op(q, ptr, {}, dst_pitch,
516                    &img, src_origin, src_pitch,
517                    region));
518 
519    if (blocking)
520        hev().wait_signalled();
521 
522    ret_object(rd_ev, hev);
523    return CL_SUCCESS;
524 
525 } catch (error &e) {
526    return e.get();
527 }
528 
529 CLOVER_API cl_int
clEnqueueWriteImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_origin,const size_t * p_region,size_t row_pitch,size_t slice_pitch,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)530 clEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
531                     const size_t *p_origin, const size_t *p_region,
532                     size_t row_pitch, size_t slice_pitch, const void *ptr,
533                     cl_uint num_deps, const cl_event *d_deps,
534                     cl_event *rd_ev) try {
535    auto &q = obj(d_q);
536    auto &img = obj<image>(d_mem);
537    auto deps = objs<wait_list_tag>(d_deps, num_deps);
538    auto region = vector(p_region);
539    auto dst_origin = vector(p_origin);
540    auto dst_pitch = pitch(region, {{ img.pixel_size(),
541                                      img.row_pitch(), img.slice_pitch() }});
542    auto src_pitch = pitch(region, {{ img.pixel_size(),
543                                      row_pitch, slice_pitch }});
544 
545    validate_common(q, deps);
546    validate_object(q, img, dst_origin, region);
547    validate_object(q, ptr, {}, src_pitch, region);
548    validate_object_access(img, CL_MEM_HOST_WRITE_ONLY);
549 
550    auto hev = create<hard_event>(
551       q, CL_COMMAND_WRITE_IMAGE, deps,
552       soft_copy_op(q, &img, dst_origin, dst_pitch,
553                    ptr, {}, src_pitch,
554                    region));
555 
556    if (blocking)
557        hev().wait_signalled();
558 
559    ret_object(rd_ev, hev);
560    return CL_SUCCESS;
561 
562 } catch (error &e) {
563    return e.get();
564 }
565 
566 CLOVER_API cl_int
clEnqueueCopyImage(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_dst_origin,const size_t * p_region,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)567 clEnqueueCopyImage(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
568                    const size_t *p_src_origin, const size_t *p_dst_origin,
569                    const size_t *p_region,
570                    cl_uint num_deps, const cl_event *d_deps,
571                    cl_event *rd_ev) try {
572    auto &q = obj(d_q);
573    auto &src_img = obj<image>(d_src_mem);
574    auto &dst_img = obj<image>(d_dst_mem);
575    auto deps = objs<wait_list_tag>(d_deps, num_deps);
576    auto region = vector(p_region);
577    auto dst_origin = vector(p_dst_origin);
578    auto src_origin = vector(p_src_origin);
579 
580    validate_common(q, deps);
581    validate_object(q, dst_img, dst_origin, region);
582    validate_object(q, src_img, src_origin, region);
583    validate_copy(q, dst_img, dst_origin, src_img, src_origin, region);
584 
585    auto hev = create<hard_event>(
586       q, CL_COMMAND_COPY_IMAGE, deps,
587       hard_copy_op(q, &dst_img, dst_origin,
588                    &src_img, src_origin,
589                    region));
590 
591    ret_object(rd_ev, hev);
592    return CL_SUCCESS;
593 
594 } catch (error &e) {
595    return e.get();
596 }
597 
598 CLOVER_API cl_int
clEnqueueCopyImageToBuffer(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_region,size_t dst_offset,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)599 clEnqueueCopyImageToBuffer(cl_command_queue d_q,
600                            cl_mem d_src_mem, cl_mem d_dst_mem,
601                            const size_t *p_src_origin, const size_t *p_region,
602                            size_t dst_offset,
603                            cl_uint num_deps, const cl_event *d_deps,
604                            cl_event *rd_ev) try {
605    auto &q = obj(d_q);
606    auto &src_img = obj<image>(d_src_mem);
607    auto &dst_mem = obj<buffer>(d_dst_mem);
608    auto deps = objs<wait_list_tag>(d_deps, num_deps);
609    auto region = vector(p_region);
610    vector_t dst_origin = { dst_offset };
611    auto dst_pitch = pitch(region, {{ src_img.pixel_size() }});
612    auto src_origin = vector(p_src_origin);
613    auto src_pitch = pitch(region, {{ src_img.pixel_size(),
614                                      src_img.row_pitch(),
615                                      src_img.slice_pitch() }});
616 
617    validate_common(q, deps);
618    validate_object(q, dst_mem, dst_origin, dst_pitch, region);
619    validate_object(q, src_img, src_origin, region);
620 
621    auto hev = create<hard_event>(
622       q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, deps,
623       soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
624                    &src_img, src_origin, src_pitch,
625                    region));
626 
627    ret_object(rd_ev, hev);
628    return CL_SUCCESS;
629 
630 } catch (error &e) {
631    return e.get();
632 }
633 
634 CLOVER_API cl_int
clEnqueueCopyBufferToImage(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,size_t src_offset,const size_t * p_dst_origin,const size_t * p_region,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)635 clEnqueueCopyBufferToImage(cl_command_queue d_q,
636                            cl_mem d_src_mem, cl_mem d_dst_mem,
637                            size_t src_offset,
638                            const size_t *p_dst_origin, const size_t *p_region,
639                            cl_uint num_deps, const cl_event *d_deps,
640                            cl_event *rd_ev) try {
641    auto &q = obj(d_q);
642    auto &src_mem = obj<buffer>(d_src_mem);
643    auto &dst_img = obj<image>(d_dst_mem);
644    auto deps = objs<wait_list_tag>(d_deps, num_deps);
645    auto region = vector(p_region);
646    auto dst_origin = vector(p_dst_origin);
647    auto dst_pitch = pitch(region, {{ dst_img.pixel_size(),
648                                      dst_img.row_pitch(),
649                                      dst_img.slice_pitch() }});
650    vector_t src_origin = { src_offset };
651    auto src_pitch = pitch(region, {{ dst_img.pixel_size() }});
652 
653    validate_common(q, deps);
654    validate_object(q, dst_img, dst_origin, region);
655    validate_object(q, src_mem, src_origin, src_pitch, region);
656 
657    auto hev = create<hard_event>(
658       q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, deps,
659       soft_copy_op(q, &dst_img, dst_origin, dst_pitch,
660                    &src_mem, src_origin, src_pitch,
661                    region));
662 
663    ret_object(rd_ev, hev);
664    return CL_SUCCESS;
665 
666 } catch (error &e) {
667    return e.get();
668 }
669 
670 CLOVER_API void *
clEnqueueMapBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,cl_map_flags flags,size_t offset,size_t size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev,cl_int * r_errcode)671 clEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
672                    cl_map_flags flags, size_t offset, size_t size,
673                    cl_uint num_deps, const cl_event *d_deps,
674                    cl_event *rd_ev, cl_int *r_errcode) try {
675    auto &q = obj(d_q);
676    auto &mem = obj<buffer>(d_mem);
677    auto deps = objs<wait_list_tag>(d_deps, num_deps);
678    vector_t region = { size, 1, 1 };
679    vector_t obj_origin = { offset };
680    auto obj_pitch = pitch(region, {{ 1 }});
681 
682    validate_common(q, deps);
683    validate_object(q, mem, obj_origin, obj_pitch, region);
684    validate_map_flags(mem, flags);
685 
686    void *map = mem.resource(q).add_map(q, flags, blocking, obj_origin, region);
687 
688    auto hev = create<hard_event>(q, CL_COMMAND_MAP_BUFFER, deps);
689    if (blocking)
690        hev().wait_signalled();
691 
692    ret_object(rd_ev, hev);
693    ret_error(r_errcode, CL_SUCCESS);
694    return map;
695 
696 } catch (error &e) {
697    ret_error(r_errcode, e);
698    return NULL;
699 }
700 
701 CLOVER_API void *
clEnqueueMapImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,cl_map_flags flags,const size_t * p_origin,const size_t * p_region,size_t * row_pitch,size_t * slice_pitch,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev,cl_int * r_errcode)702 clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
703                   cl_map_flags flags,
704                   const size_t *p_origin, const size_t *p_region,
705                   size_t *row_pitch, size_t *slice_pitch,
706                   cl_uint num_deps, const cl_event *d_deps,
707                   cl_event *rd_ev, cl_int *r_errcode) try {
708    auto &q = obj(d_q);
709    auto &img = obj<image>(d_mem);
710    auto deps = objs<wait_list_tag>(d_deps, num_deps);
711    auto region = vector(p_region);
712    auto origin = vector(p_origin);
713 
714    validate_common(q, deps);
715    validate_object(q, img, origin, region);
716    validate_map_flags(img, flags);
717 
718    void *map = img.resource(q).add_map(q, flags, blocking, origin, region);
719 
720    auto hev = create<hard_event>(q, CL_COMMAND_MAP_IMAGE, deps);
721    if (blocking)
722        hev().wait_signalled();
723 
724    ret_object(rd_ev, hev);
725    ret_error(r_errcode, CL_SUCCESS);
726    return map;
727 
728 } catch (error &e) {
729    ret_error(r_errcode, e);
730    return NULL;
731 }
732 
733 CLOVER_API cl_int
clEnqueueUnmapMemObject(cl_command_queue d_q,cl_mem d_mem,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)734 clEnqueueUnmapMemObject(cl_command_queue d_q, cl_mem d_mem, void *ptr,
735                         cl_uint num_deps, const cl_event *d_deps,
736                         cl_event *rd_ev) try {
737    auto &q = obj(d_q);
738    auto &mem = obj(d_mem);
739    auto deps = objs<wait_list_tag>(d_deps, num_deps);
740 
741    validate_common(q, deps);
742 
743    auto hev = create<hard_event>(
744       q, CL_COMMAND_UNMAP_MEM_OBJECT, deps,
745       [=, &q, &mem](event &) {
746          mem.resource(q).del_map(ptr);
747       });
748 
749    ret_object(rd_ev, hev);
750    return CL_SUCCESS;
751 
752 } catch (error &e) {
753    return e.get();
754 }
755 
756 CLOVER_API cl_int
clEnqueueMigrateMemObjects(cl_command_queue command_queue,cl_uint num_mem_objects,const cl_mem * mem_objects,cl_mem_migration_flags flags,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)757 clEnqueueMigrateMemObjects(cl_command_queue command_queue,
758                            cl_uint num_mem_objects,
759                            const cl_mem *mem_objects,
760                            cl_mem_migration_flags flags,
761                            cl_uint num_events_in_wait_list,
762                            const cl_event *event_wait_list,
763                            cl_event *event) {
764    CLOVER_NOT_SUPPORTED_UNTIL("1.2");
765    return CL_INVALID_VALUE;
766 }
767