1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 //
22
23 #include "util/format/u_format.h"
24 #include "util/u_math.h"
25 #include "api/util.hpp"
26 #include "core/memory.hpp"
27 #include "core/format.hpp"
28
29 using namespace clover;
30
31 namespace {
32 cl_mem_flags
validate_flags(cl_mem d_parent,cl_mem_flags d_flags,bool svm)33 validate_flags(cl_mem d_parent, cl_mem_flags d_flags, bool svm) {
34 const cl_mem_flags dev_access_flags =
35 CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY;
36 const cl_mem_flags host_ptr_flags =
37 CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR;
38 const cl_mem_flags host_access_flags =
39 CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS;
40 const cl_mem_flags svm_flags =
41 CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS;
42
43 const cl_mem_flags valid_flags =
44 dev_access_flags
45 | (svm || d_parent ? 0 : host_ptr_flags)
46 | (svm ? svm_flags : host_access_flags);
47
48 if ((d_flags & ~valid_flags) ||
49 util_bitcount(d_flags & dev_access_flags) > 1 ||
50 util_bitcount(d_flags & host_access_flags) > 1)
51 throw error(CL_INVALID_VALUE);
52
53 if ((d_flags & CL_MEM_USE_HOST_PTR) &&
54 (d_flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR)))
55 throw error(CL_INVALID_VALUE);
56
57 if ((d_flags & CL_MEM_SVM_ATOMICS) &&
58 !(d_flags & CL_MEM_SVM_FINE_GRAIN_BUFFER))
59 throw error(CL_INVALID_VALUE);
60
61 if (d_parent) {
62 const auto &parent = obj(d_parent);
63 const cl_mem_flags flags = (d_flags |
64 (d_flags & dev_access_flags ? 0 :
65 parent.flags() & dev_access_flags) |
66 (d_flags & host_access_flags ? 0 :
67 parent.flags() & host_access_flags) |
68 (parent.flags() & host_ptr_flags));
69
70 if (~flags & parent.flags() & (dev_access_flags & ~CL_MEM_READ_WRITE))
71 throw error(CL_INVALID_VALUE);
72
73 // Check if new host access flags cause a mismatch between
74 // host-read/write-only.
75 if (!(flags & CL_MEM_HOST_NO_ACCESS) &&
76 (~flags & parent.flags() & host_access_flags))
77 throw error(CL_INVALID_VALUE);
78
79 return flags;
80
81 } else {
82 return d_flags | (d_flags & dev_access_flags ? 0 : CL_MEM_READ_WRITE);
83 }
84 }
85
86 std::vector<cl_mem_properties>
fill_properties(const cl_mem_properties * d_properties)87 fill_properties(const cl_mem_properties *d_properties) {
88 std::vector<cl_mem_properties> properties;
89 if (d_properties) {
90 while (*d_properties) {
91 if (*d_properties != 0)
92 throw error(CL_INVALID_PROPERTY);
93
94 properties.push_back(*d_properties);
95 d_properties++;
96 };
97 properties.push_back(0);
98 }
99 return properties;
100 }
101 }
102
103 CLOVER_API cl_mem
clCreateBufferWithProperties(cl_context d_ctx,const cl_mem_properties * d_properties,cl_mem_flags d_flags,size_t size,void * host_ptr,cl_int * r_errcode)104 clCreateBufferWithProperties(cl_context d_ctx,
105 const cl_mem_properties *d_properties,
106 cl_mem_flags d_flags, size_t size,
107 void *host_ptr, cl_int *r_errcode) try {
108
109 auto &ctx = obj(d_ctx);
110 const cl_mem_flags flags = validate_flags(NULL, d_flags, false);
111 std::vector<cl_mem_properties> properties = fill_properties(d_properties);
112
113 if (bool(host_ptr) != bool(flags & (CL_MEM_USE_HOST_PTR |
114 CL_MEM_COPY_HOST_PTR)))
115 throw error(CL_INVALID_HOST_PTR);
116
117 if (!size ||
118 size > fold(maximum(), cl_ulong(0),
119 map(std::mem_fn(&device::max_mem_alloc_size), ctx.devices())
120 ))
121 throw error(CL_INVALID_BUFFER_SIZE);
122
123 ret_error(r_errcode, CL_SUCCESS);
124 return new root_buffer(ctx, properties, flags, size, host_ptr);
125 } catch (error &e) {
126 ret_error(r_errcode, e);
127 return NULL;
128 }
129
130
131 CLOVER_API cl_mem
clCreateBuffer(cl_context d_ctx,cl_mem_flags d_flags,size_t size,void * host_ptr,cl_int * r_errcode)132 clCreateBuffer(cl_context d_ctx, cl_mem_flags d_flags, size_t size,
133 void *host_ptr, cl_int *r_errcode) {
134 return clCreateBufferWithProperties(d_ctx, NULL, d_flags, size,
135 host_ptr, r_errcode);
136 }
137
138 CLOVER_API cl_mem
clCreateSubBuffer(cl_mem d_mem,cl_mem_flags d_flags,cl_buffer_create_type op,const void * op_info,cl_int * r_errcode)139 clCreateSubBuffer(cl_mem d_mem, cl_mem_flags d_flags,
140 cl_buffer_create_type op,
141 const void *op_info, cl_int *r_errcode) try {
142 auto &parent = obj<root_buffer>(d_mem);
143 const cl_mem_flags flags = validate_flags(d_mem, d_flags, false);
144
145 if (op == CL_BUFFER_CREATE_TYPE_REGION) {
146 auto reg = reinterpret_cast<const cl_buffer_region *>(op_info);
147
148 if (!reg ||
149 reg->origin > parent.size() ||
150 reg->origin + reg->size > parent.size())
151 throw error(CL_INVALID_VALUE);
152
153 if (!reg->size)
154 throw error(CL_INVALID_BUFFER_SIZE);
155
156 ret_error(r_errcode, CL_SUCCESS);
157 return new sub_buffer(parent, flags, reg->origin, reg->size);
158
159 } else {
160 throw error(CL_INVALID_VALUE);
161 }
162
163 } catch (error &e) {
164 ret_error(r_errcode, e);
165 return NULL;
166 }
167
168 CLOVER_API cl_mem
clCreateImageWithProperties(cl_context d_ctx,const cl_mem_properties * d_properties,cl_mem_flags d_flags,const cl_image_format * format,const cl_image_desc * desc,void * host_ptr,cl_int * r_errcode)169 clCreateImageWithProperties(cl_context d_ctx,
170 const cl_mem_properties *d_properties,
171 cl_mem_flags d_flags,
172 const cl_image_format *format,
173 const cl_image_desc *desc,
174 void *host_ptr, cl_int *r_errcode) try {
175 auto &ctx = obj(d_ctx);
176
177 if (!any_of(std::mem_fn(&device::image_support), ctx.devices()))
178 throw error(CL_INVALID_OPERATION);
179
180 if (!format)
181 throw error(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR);
182
183 if (!desc)
184 throw error(CL_INVALID_IMAGE_DESCRIPTOR);
185
186 if (desc->image_array_size == 0 &&
187 (desc->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
188 desc->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY))
189 throw error(CL_INVALID_IMAGE_DESCRIPTOR);
190
191 if (!host_ptr &&
192 (desc->image_row_pitch || desc->image_slice_pitch))
193 throw error(CL_INVALID_IMAGE_DESCRIPTOR);
194
195 if (desc->num_mip_levels || desc->num_samples)
196 throw error(CL_INVALID_IMAGE_DESCRIPTOR);
197
198 if (bool(desc->buffer) != (desc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER))
199 throw error(CL_INVALID_IMAGE_DESCRIPTOR);
200
201 if (bool(host_ptr) != bool(d_flags & (CL_MEM_USE_HOST_PTR |
202 CL_MEM_COPY_HOST_PTR)))
203 throw error(CL_INVALID_HOST_PTR);
204
205 const cl_mem_flags flags = validate_flags(desc->buffer, d_flags, false);
206
207 if (!supported_formats(ctx, desc->image_type).count(*format))
208 throw error(CL_IMAGE_FORMAT_NOT_SUPPORTED);
209
210 std::vector<cl_mem_properties> properties = fill_properties(d_properties);
211 ret_error(r_errcode, CL_SUCCESS);
212
213 const size_t row_pitch = desc->image_row_pitch ? desc->image_row_pitch :
214 util_format_get_blocksize(translate_format(*format)) * desc->image_width;
215
216 switch (desc->image_type) {
217 case CL_MEM_OBJECT_IMAGE1D:
218 if (!desc->image_width)
219 throw error(CL_INVALID_IMAGE_SIZE);
220
221 if (all_of([=](const device &dev) {
222 const size_t max = dev.max_image_size();
223 return (desc->image_width > max);
224 }, ctx.devices()))
225 throw error(CL_INVALID_IMAGE_SIZE);
226
227 return new image1d(ctx, properties, flags, format,
228 desc->image_width,
229 row_pitch, host_ptr);
230
231 case CL_MEM_OBJECT_IMAGE2D:
232 if (!desc->image_width || !desc->image_height)
233 throw error(CL_INVALID_IMAGE_SIZE);
234
235 if (all_of([=](const device &dev) {
236 const size_t max = dev.max_image_size();
237 return (desc->image_width > max ||
238 desc->image_height > max);
239 }, ctx.devices()))
240 throw error(CL_INVALID_IMAGE_SIZE);
241
242 return new image2d(ctx, properties, flags, format,
243 desc->image_width, desc->image_height,
244 row_pitch, host_ptr);
245
246 case CL_MEM_OBJECT_IMAGE3D: {
247 if (!desc->image_width || !desc->image_height || !desc->image_depth)
248 throw error(CL_INVALID_IMAGE_SIZE);
249
250 if (all_of([=](const device &dev) {
251 const size_t max = dev.max_image_size_3d();
252 return (desc->image_width > max ||
253 desc->image_height > max ||
254 desc->image_depth > max);
255 }, ctx.devices()))
256 throw error(CL_INVALID_IMAGE_SIZE);
257
258 const size_t slice_pitch = desc->image_slice_pitch ?
259 desc->image_slice_pitch : row_pitch * desc->image_height;
260
261 return new image3d(ctx, properties, flags, format,
262 desc->image_width, desc->image_height,
263 desc->image_depth, row_pitch,
264 slice_pitch, host_ptr);
265 }
266
267 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
268 case CL_MEM_OBJECT_IMAGE1D_BUFFER:
269 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
270 // XXX - Not implemented.
271 throw error(CL_IMAGE_FORMAT_NOT_SUPPORTED);
272
273 default:
274 throw error(CL_INVALID_IMAGE_DESCRIPTOR);
275 }
276
277 } catch (error &e) {
278 ret_error(r_errcode, e);
279 return NULL;
280 }
281
282 CLOVER_API cl_mem
clCreateImage(cl_context d_ctx,cl_mem_flags d_flags,const cl_image_format * format,const cl_image_desc * desc,void * host_ptr,cl_int * r_errcode)283 clCreateImage(cl_context d_ctx,
284 cl_mem_flags d_flags,
285 const cl_image_format *format,
286 const cl_image_desc *desc,
287 void *host_ptr, cl_int *r_errcode) {
288 return clCreateImageWithProperties(d_ctx, NULL, d_flags, format, desc, host_ptr, r_errcode);
289 }
290
291
292 CLOVER_API cl_mem
clCreateImage2D(cl_context d_ctx,cl_mem_flags d_flags,const cl_image_format * format,size_t width,size_t height,size_t row_pitch,void * host_ptr,cl_int * r_errcode)293 clCreateImage2D(cl_context d_ctx, cl_mem_flags d_flags,
294 const cl_image_format *format,
295 size_t width, size_t height, size_t row_pitch,
296 void *host_ptr, cl_int *r_errcode) {
297 const cl_image_desc desc = { CL_MEM_OBJECT_IMAGE2D, width, height, 0, 0,
298 row_pitch, 0, 0, 0, NULL };
299
300 return clCreateImageWithProperties(d_ctx, NULL, d_flags, format, &desc, host_ptr, r_errcode);
301 }
302
303 CLOVER_API cl_mem
clCreateImage3D(cl_context d_ctx,cl_mem_flags d_flags,const cl_image_format * format,size_t width,size_t height,size_t depth,size_t row_pitch,size_t slice_pitch,void * host_ptr,cl_int * r_errcode)304 clCreateImage3D(cl_context d_ctx, cl_mem_flags d_flags,
305 const cl_image_format *format,
306 size_t width, size_t height, size_t depth,
307 size_t row_pitch, size_t slice_pitch,
308 void *host_ptr, cl_int *r_errcode) {
309 const cl_image_desc desc = { CL_MEM_OBJECT_IMAGE3D, width, height, depth, 0,
310 row_pitch, slice_pitch, 0, 0, NULL };
311
312 return clCreateImageWithProperties(d_ctx, NULL, d_flags, format, &desc, host_ptr, r_errcode);
313 }
314
315 CLOVER_API cl_int
clGetSupportedImageFormats(cl_context d_ctx,cl_mem_flags flags,cl_mem_object_type type,cl_uint count,cl_image_format * r_buf,cl_uint * r_count)316 clGetSupportedImageFormats(cl_context d_ctx, cl_mem_flags flags,
317 cl_mem_object_type type, cl_uint count,
318 cl_image_format *r_buf, cl_uint *r_count) try {
319 auto &ctx = obj(d_ctx);
320 auto formats = supported_formats(ctx, type);
321
322 if (flags & CL_MEM_KERNEL_READ_AND_WRITE) {
323 if (r_count)
324 *r_count = 0;
325 return CL_SUCCESS;
326 }
327
328 if (flags & (CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE) &&
329 type == CL_MEM_OBJECT_IMAGE3D) {
330 if (r_count)
331 *r_count = 0;
332 return CL_SUCCESS;
333 }
334
335 validate_flags(NULL, flags, false);
336
337 if (r_buf && !count)
338 throw error(CL_INVALID_VALUE);
339
340 if (r_buf)
341 std::copy_n(formats.begin(),
342 std::min((cl_uint)formats.size(), count),
343 r_buf);
344
345 if (r_count)
346 *r_count = formats.size();
347
348 return CL_SUCCESS;
349
350 } catch (error &e) {
351 return e.get();
352 }
353
354 CLOVER_API cl_int
clGetMemObjectInfo(cl_mem d_mem,cl_mem_info param,size_t size,void * r_buf,size_t * r_size)355 clGetMemObjectInfo(cl_mem d_mem, cl_mem_info param,
356 size_t size, void *r_buf, size_t *r_size) try {
357 property_buffer buf { r_buf, size, r_size };
358 auto &mem = obj(d_mem);
359
360 switch (param) {
361 case CL_MEM_TYPE:
362 buf.as_scalar<cl_mem_object_type>() = mem.type();
363 break;
364
365 case CL_MEM_FLAGS:
366 buf.as_scalar<cl_mem_flags>() = mem.flags();
367 break;
368
369 case CL_MEM_SIZE:
370 buf.as_scalar<size_t>() = mem.size();
371 break;
372
373 case CL_MEM_HOST_PTR:
374 buf.as_scalar<void *>() = mem.host_ptr();
375 break;
376
377 case CL_MEM_MAP_COUNT:
378 buf.as_scalar<cl_uint>() = 0;
379 break;
380
381 case CL_MEM_REFERENCE_COUNT:
382 buf.as_scalar<cl_uint>() = mem.ref_count();
383 break;
384
385 case CL_MEM_CONTEXT:
386 buf.as_scalar<cl_context>() = desc(mem.context());
387 break;
388
389 case CL_MEM_ASSOCIATED_MEMOBJECT: {
390 sub_buffer *sub = dynamic_cast<sub_buffer *>(&mem);
391 buf.as_scalar<cl_mem>() = (sub ? desc(sub->parent()) : NULL);
392 break;
393 }
394 case CL_MEM_OFFSET: {
395 sub_buffer *sub = dynamic_cast<sub_buffer *>(&mem);
396 buf.as_scalar<size_t>() = (sub ? sub->offset() : 0);
397 break;
398 }
399 case CL_MEM_USES_SVM_POINTER:
400 case CL_MEM_USES_SVM_POINTER_ARM: {
401 // with system SVM all host ptrs are SVM pointers
402 // TODO: once we support devices with lower levels of SVM, we have to
403 // check the ptr in more detail
404 const bool system_svm = all_of(std::mem_fn(&device::has_system_svm),
405 mem.context().devices());
406 buf.as_scalar<cl_bool>() = mem.host_ptr() && system_svm;
407 break;
408 }
409 case CL_MEM_PROPERTIES:
410 buf.as_vector<cl_mem_properties>() = mem.properties();
411 break;
412 default:
413 throw error(CL_INVALID_VALUE);
414 }
415
416 return CL_SUCCESS;
417
418 } catch (error &e) {
419 return e.get();
420 }
421
422 CLOVER_API cl_int
clGetImageInfo(cl_mem d_mem,cl_image_info param,size_t size,void * r_buf,size_t * r_size)423 clGetImageInfo(cl_mem d_mem, cl_image_info param,
424 size_t size, void *r_buf, size_t *r_size) try {
425 property_buffer buf { r_buf, size, r_size };
426 auto &img = obj<image>(d_mem);
427
428 switch (param) {
429 case CL_IMAGE_FORMAT:
430 buf.as_scalar<cl_image_format>() = img.format();
431 break;
432
433 case CL_IMAGE_ELEMENT_SIZE:
434 buf.as_scalar<size_t>() = img.pixel_size();
435 break;
436
437 case CL_IMAGE_ROW_PITCH:
438 buf.as_scalar<size_t>() = img.row_pitch();
439 break;
440
441 case CL_IMAGE_SLICE_PITCH:
442 buf.as_scalar<size_t>() = img.slice_pitch();
443 break;
444
445 case CL_IMAGE_WIDTH:
446 buf.as_scalar<size_t>() = img.width();
447 break;
448
449 case CL_IMAGE_HEIGHT:
450 buf.as_scalar<size_t>() = img.height();
451 break;
452
453 case CL_IMAGE_DEPTH:
454 buf.as_scalar<size_t>() = img.depth();
455 break;
456
457 case CL_IMAGE_NUM_MIP_LEVELS:
458 buf.as_scalar<cl_uint>() = 0;
459 break;
460
461 case CL_IMAGE_NUM_SAMPLES:
462 buf.as_scalar<cl_uint>() = 0;
463 break;
464
465 default:
466 throw error(CL_INVALID_VALUE);
467 }
468
469 return CL_SUCCESS;
470
471 } catch (error &e) {
472 return e.get();
473 }
474
475 CLOVER_API cl_int
clRetainMemObject(cl_mem d_mem)476 clRetainMemObject(cl_mem d_mem) try {
477 obj(d_mem).retain();
478 return CL_SUCCESS;
479
480 } catch (error &e) {
481 return e.get();
482 }
483
484 CLOVER_API cl_int
clReleaseMemObject(cl_mem d_mem)485 clReleaseMemObject(cl_mem d_mem) try {
486 if (obj(d_mem).release())
487 delete pobj(d_mem);
488
489 return CL_SUCCESS;
490
491 } catch (error &e) {
492 return e.get();
493 }
494
495 CLOVER_API cl_int
clSetMemObjectDestructorCallback(cl_mem d_mem,void (CL_CALLBACK * pfn_notify)(cl_mem,void *),void * user_data)496 clSetMemObjectDestructorCallback(cl_mem d_mem,
497 void (CL_CALLBACK *pfn_notify)(cl_mem, void *),
498 void *user_data) try {
499 auto &mem = obj(d_mem);
500
501 if (!pfn_notify)
502 return CL_INVALID_VALUE;
503
504 mem.destroy_notify([=]{ pfn_notify(d_mem, user_data); });
505
506 return CL_SUCCESS;
507
508 } catch (error &e) {
509 return e.get();
510 }
511
512 CLOVER_API void *
clSVMAlloc(cl_context d_ctx,cl_svm_mem_flags flags,size_t size,unsigned int alignment)513 clSVMAlloc(cl_context d_ctx,
514 cl_svm_mem_flags flags,
515 size_t size,
516 unsigned int alignment) try {
517 auto &ctx = obj(d_ctx);
518
519 if (!any_of(std::mem_fn(&device::svm_support), ctx.devices()))
520 return NULL;
521
522 validate_flags(NULL, flags, true);
523
524 if (!size ||
525 size > fold(minimum(), cl_ulong(ULONG_MAX),
526 map(std::mem_fn(&device::max_mem_alloc_size), ctx.devices())))
527 return nullptr;
528
529 if (!util_is_power_of_two_or_zero(alignment))
530 return nullptr;
531
532 if (!alignment)
533 alignment = 0x80; // sizeof(long16)
534
535 #if HAVE_POSIX_MEMALIGN
536 bool can_emulate = all_of(std::mem_fn(&device::has_system_svm), ctx.devices());
537 if (can_emulate) {
538 // we can ignore all the flags as it's not required to honor them.
539 void *ptr = nullptr;
540 if (alignment < sizeof(void*))
541 alignment = sizeof(void*);
542 posix_memalign(&ptr, alignment, size);
543
544 if (ptr)
545 ctx.add_svm_allocation(ptr, size);
546
547 return ptr;
548 }
549 #endif
550
551 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
552 return nullptr;
553
554 } catch (error &) {
555 return nullptr;
556 }
557
558 CLOVER_API void
clSVMFree(cl_context d_ctx,void * svm_pointer)559 clSVMFree(cl_context d_ctx,
560 void *svm_pointer) try {
561 auto &ctx = obj(d_ctx);
562
563 if (!any_of(std::mem_fn(&device::svm_support), ctx.devices()))
564 return;
565
566 bool can_emulate = all_of(std::mem_fn(&device::has_system_svm), ctx.devices());
567
568 if (can_emulate) {
569 ctx.remove_svm_allocation(svm_pointer);
570 return free(svm_pointer);
571 }
572
573 CLOVER_NOT_SUPPORTED_UNTIL("2.0");
574
575 } catch (error &) {
576 }
577