1 use crate::api::icd::*;
2 use crate::api::types::*;
3 use crate::api::util::*;
4 use crate::core::context::*;
5 use crate::core::device::*;
6 use crate::core::format::*;
7 use crate::core::gl::*;
8 use crate::core::queue::*;
9 use crate::core::util::*;
10 use crate::impl_cl_type_trait;
11 use crate::impl_cl_type_trait_base;
12
13 use mesa_rust::pipe::context::*;
14 use mesa_rust::pipe::resource::*;
15 use mesa_rust::pipe::screen::ResourceType;
16 use mesa_rust::pipe::transfer::*;
17 use mesa_rust_gen::*;
18 use mesa_rust_util::math::*;
19 use mesa_rust_util::properties::Properties;
20 use rusticl_opencl_gen::*;
21
22 use std::cmp;
23 use std::collections::hash_map::Entry;
24 use std::collections::HashMap;
25 use std::convert::TryInto;
26 use std::mem;
27 use std::mem::size_of;
28 use std::ops::Deref;
29 use std::os::raw::c_void;
30 use std::ptr;
31 use std::sync::Arc;
32 use std::sync::Mutex;
33
34 struct MappingTransfer {
35 tx: PipeTransfer,
36 shadow: Option<PipeResource>,
37 pending: u32,
38 }
39
40 impl MappingTransfer {
new(tx: PipeTransfer, shadow: Option<PipeResource>) -> Self41 fn new(tx: PipeTransfer, shadow: Option<PipeResource>) -> Self {
42 MappingTransfer {
43 tx: tx,
44 shadow: shadow,
45 pending: 1,
46 }
47 }
48 }
49
50 struct Mappings {
51 tx: HashMap<&'static Device, MappingTransfer>,
52 maps: HashMap<usize, u32>,
53 }
54
55 impl Mappings {
new() -> Mutex<Self>56 fn new() -> Mutex<Self> {
57 Mutex::new(Mappings {
58 tx: HashMap::new(),
59 maps: HashMap::new(),
60 })
61 }
62
contains_ptr(&self, ptr: *mut c_void) -> bool63 fn contains_ptr(&self, ptr: *mut c_void) -> bool {
64 let ptr = ptr as usize;
65 self.maps.contains_key(&ptr)
66 }
67
mark_pending(&mut self, dev: &Device)68 fn mark_pending(&mut self, dev: &Device) {
69 self.tx.get_mut(dev).unwrap().pending += 1;
70 }
71
unmark_pending(&mut self, dev: &Device)72 fn unmark_pending(&mut self, dev: &Device) {
73 if let Some(tx) = self.tx.get_mut(dev) {
74 tx.pending -= 1;
75 }
76 }
77
increase_ref(&mut self, dev: &Device, ptr: *mut c_void) -> bool78 fn increase_ref(&mut self, dev: &Device, ptr: *mut c_void) -> bool {
79 let ptr = ptr as usize;
80 let res = self.maps.is_empty();
81 *self.maps.entry(ptr).or_default() += 1;
82 self.unmark_pending(dev);
83 res
84 }
85
decrease_ref(&mut self, ptr: *mut c_void, dev: &Device) -> (bool, Option<&PipeResource>)86 fn decrease_ref(&mut self, ptr: *mut c_void, dev: &Device) -> (bool, Option<&PipeResource>) {
87 let ptr = ptr as usize;
88 if let Some(r) = self.maps.get_mut(&ptr) {
89 *r -= 1;
90
91 if *r == 0 {
92 self.maps.remove(&ptr);
93 }
94
95 if self.maps.is_empty() {
96 let shadow = self.tx.get(dev).and_then(|tx| tx.shadow.as_ref());
97 return (true, shadow);
98 }
99 }
100 (false, None)
101 }
102
clean_up_tx(&mut self, dev: &Device, ctx: &PipeContext)103 fn clean_up_tx(&mut self, dev: &Device, ctx: &PipeContext) {
104 if self.maps.is_empty() {
105 if let Some(tx) = self.tx.get(&dev) {
106 if tx.pending == 0 {
107 self.tx.remove(dev).unwrap().tx.with_ctx(ctx);
108 }
109 }
110 }
111 }
112 }
113
114 #[repr(transparent)]
115 #[derive(Clone, Copy)]
116 pub struct ConstMemoryPtr {
117 ptr: *const c_void,
118 }
119 unsafe impl Send for ConstMemoryPtr {}
120 unsafe impl Sync for ConstMemoryPtr {}
121
122 impl ConstMemoryPtr {
as_ptr(&self) -> *const c_void123 pub fn as_ptr(&self) -> *const c_void {
124 self.ptr
125 }
126
127 /// # Safety
128 ///
129 /// Users need to ensure that `ptr` is only accessed in a thread-safe manner sufficient for
130 /// [Send] and [Sync]
from_ptr(ptr: *const c_void) -> Self131 pub unsafe fn from_ptr(ptr: *const c_void) -> Self {
132 Self { ptr: ptr }
133 }
134 }
135
136 #[repr(transparent)]
137 #[derive(Clone, Copy)]
138 pub struct MutMemoryPtr {
139 ptr: *mut c_void,
140 }
141 unsafe impl Send for MutMemoryPtr {}
142 unsafe impl Sync for MutMemoryPtr {}
143
144 impl MutMemoryPtr {
as_ptr(&self) -> *mut c_void145 pub fn as_ptr(&self) -> *mut c_void {
146 self.ptr
147 }
148
149 /// # Safety
150 ///
151 /// Users need to ensure that `ptr` is only accessed in a thread-safe manner sufficient for
152 /// [Send] and [Sync]
from_ptr(ptr: *mut c_void) -> Self153 pub unsafe fn from_ptr(ptr: *mut c_void) -> Self {
154 Self { ptr: ptr }
155 }
156 }
157
158 pub enum Mem {
159 Buffer(Arc<Buffer>),
160 Image(Arc<Image>),
161 }
162
163 impl Deref for Mem {
164 type Target = MemBase;
165
deref(&self) -> &Self::Target166 fn deref(&self) -> &Self::Target {
167 match self {
168 Self::Buffer(b) => &b.base,
169 Self::Image(i) => &i.base,
170 }
171 }
172 }
173
174 impl Mem {
unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()>175 pub fn unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
176 match self {
177 Self::Buffer(b) => b.unmap(q, ctx, ptr),
178 Self::Image(i) => i.unmap(q, ctx, ptr),
179 }
180 }
181 }
182
183 /// # Mapping memory
184 ///
185 /// Maps the queue associated device's resource.
186 ///
187 /// Mapping resources could have been quite straightforward if OpenCL wouldn't allow for so called
188 /// non blocking maps. Non blocking maps shall return a valid pointer to the mapped region
189 /// immediately, but should not synchronize data (in case of shadow buffers) until after the map
190 /// event is reached in the queue. This makes it not possible to simply use pipe_transfers as those
191 /// can't be explicitly synced by the frontend.
192 ///
193 /// In order to have a compliant implementation of the mapping API we have to consider the following
194 /// cases:
195 /// 1. Mapping a cl_mem object with CL_MEM_USE_HOST_PTR: We simply return the host_ptr.
196 /// Synchronization of shadowed host ptrs are done in `sync_shadow` on demand.
197 /// 2. Mapping linear resources on UMA systems: We simply create the pipe_transfer with
198 /// `PIPE_MAP_DIRECTLY` and `PIPE_MAP_UNSYNCHRONIZED` and return the attached pointer.
199 /// 3. On non UMA systems or when 2. fails (e.g. due to the resource being tiled) we
200 /// - create a shadow pipe_resource with `PIPE_USAGE_STAGING`,
201 /// `PIPE_RESOURCE_FLAG_MAP_PERSISTENT` and `PIPE_RESOURCE_FLAG_MAP_COHERENT`
202 /// - create a pipe_transfer with `PIPE_MAP_COHERENT`, `PIPE_MAP_PERSISTENT` and
203 /// `PIPE_MAP_UNSYNCHRONIZED`
204 /// - sync the shadow buffer like a host_ptr shadow buffer in 1.
205 ///
206 /// Taking this approach we guarentee that we only copy when actually needed while making sure the
207 /// content behind the returned pointer is valid until unmapped.
208 pub struct MemBase {
209 pub base: CLObjectBase<CL_INVALID_MEM_OBJECT>,
210 pub context: Arc<Context>,
211 pub parent: Option<Mem>,
212 pub mem_type: cl_mem_object_type,
213 pub flags: cl_mem_flags,
214 pub size: usize,
215 // it's a bit hacky, but storing the pointer as `usize` gives us `Send` and `Sync`. The
216 // application is required to ensure no data races exist on the memory anyway.
217 pub host_ptr: usize,
218 pub props: Vec<cl_mem_properties>,
219 pub cbs: Mutex<Vec<MemCB>>,
220 pub gl_obj: Option<GLObject>,
221 res: Option<HashMap<&'static Device, Arc<PipeResource>>>,
222 maps: Mutex<Mappings>,
223 }
224
225 pub struct Buffer {
226 base: MemBase,
227 pub offset: usize,
228 }
229
230 pub struct Image {
231 base: MemBase,
232 pub image_format: cl_image_format,
233 pub pipe_format: pipe_format,
234 pub image_desc: cl_image_desc,
235 pub image_elem_size: u8,
236 }
237
238 impl Deref for Buffer {
239 type Target = MemBase;
240
deref(&self) -> &Self::Target241 fn deref(&self) -> &Self::Target {
242 &self.base
243 }
244 }
245
246 impl Deref for Image {
247 type Target = MemBase;
248
deref(&self) -> &Self::Target249 fn deref(&self) -> &Self::Target {
250 &self.base
251 }
252 }
253
254 impl_cl_type_trait_base!(cl_mem, MemBase, [Buffer, Image], CL_INVALID_MEM_OBJECT);
255 impl_cl_type_trait!(cl_mem, Buffer, CL_INVALID_MEM_OBJECT, base.base);
256 impl_cl_type_trait!(cl_mem, Image, CL_INVALID_MEM_OBJECT, base.base);
257
258 pub trait CLImageDescInfo {
type_info(&self) -> (u8, bool)259 fn type_info(&self) -> (u8, bool);
pixels(&self) -> usize260 fn pixels(&self) -> usize;
bx(&self) -> CLResult<pipe_box>261 fn bx(&self) -> CLResult<pipe_box>;
row_pitch(&self) -> CLResult<u32>262 fn row_pitch(&self) -> CLResult<u32>;
slice_pitch(&self) -> usize263 fn slice_pitch(&self) -> usize;
width(&self) -> CLResult<u32>264 fn width(&self) -> CLResult<u32>;
height(&self) -> CLResult<u32>265 fn height(&self) -> CLResult<u32>;
size(&self) -> CLVec<usize>266 fn size(&self) -> CLVec<usize>;
267
dims(&self) -> u8268 fn dims(&self) -> u8 {
269 self.type_info().0
270 }
271
dims_with_array(&self) -> u8272 fn dims_with_array(&self) -> u8 {
273 let array: u8 = self.is_array().into();
274 self.dims() + array
275 }
276
has_slice(&self) -> bool277 fn has_slice(&self) -> bool {
278 self.dims() == 3 || self.is_array()
279 }
280
is_array(&self) -> bool281 fn is_array(&self) -> bool {
282 self.type_info().1
283 }
284 }
285
286 impl CLImageDescInfo for cl_image_desc {
type_info(&self) -> (u8, bool)287 fn type_info(&self) -> (u8, bool) {
288 match self.image_type {
289 CL_MEM_OBJECT_IMAGE1D | CL_MEM_OBJECT_IMAGE1D_BUFFER => (1, false),
290 CL_MEM_OBJECT_IMAGE1D_ARRAY => (1, true),
291 CL_MEM_OBJECT_IMAGE2D => (2, false),
292 CL_MEM_OBJECT_IMAGE2D_ARRAY => (2, true),
293 CL_MEM_OBJECT_IMAGE3D => (3, false),
294 _ => panic!("unknown image_type {:x}", self.image_type),
295 }
296 }
297
pixels(&self) -> usize298 fn pixels(&self) -> usize {
299 let mut res = self.image_width;
300 let dims = self.dims();
301
302 if dims > 1 {
303 res *= self.image_height;
304 }
305
306 if dims > 2 {
307 res *= self.image_depth;
308 }
309
310 if self.is_array() {
311 res *= self.image_array_size;
312 }
313
314 res
315 }
316
size(&self) -> CLVec<usize>317 fn size(&self) -> CLVec<usize> {
318 let mut height = cmp::max(self.image_height, 1);
319 let mut depth = cmp::max(self.image_depth, 1);
320
321 match self.image_type {
322 CL_MEM_OBJECT_IMAGE1D_ARRAY => height = self.image_array_size,
323 CL_MEM_OBJECT_IMAGE2D_ARRAY => depth = self.image_array_size,
324 _ => {}
325 }
326
327 CLVec::new([self.image_width, height, depth])
328 }
329
bx(&self) -> CLResult<pipe_box>330 fn bx(&self) -> CLResult<pipe_box> {
331 create_pipe_box(CLVec::default(), self.size(), self.image_type)
332 }
333
row_pitch(&self) -> CLResult<u32>334 fn row_pitch(&self) -> CLResult<u32> {
335 self.image_row_pitch
336 .try_into()
337 .map_err(|_| CL_OUT_OF_HOST_MEMORY)
338 }
339
slice_pitch(&self) -> usize340 fn slice_pitch(&self) -> usize {
341 self.image_slice_pitch
342 }
343
width(&self) -> CLResult<u32>344 fn width(&self) -> CLResult<u32> {
345 self.image_width
346 .try_into()
347 .map_err(|_| CL_OUT_OF_HOST_MEMORY)
348 }
349
height(&self) -> CLResult<u32>350 fn height(&self) -> CLResult<u32> {
351 self.image_height
352 .try_into()
353 .map_err(|_| CL_OUT_OF_HOST_MEMORY)
354 }
355 }
356
sw_copy( src: *const c_void, dst: *mut c_void, region: &CLVec<usize>, src_origin: &CLVec<usize>, src_row_pitch: usize, src_slice_pitch: usize, dst_origin: &CLVec<usize>, dst_row_pitch: usize, dst_slice_pitch: usize, pixel_size: u8, )357 fn sw_copy(
358 src: *const c_void,
359 dst: *mut c_void,
360 region: &CLVec<usize>,
361 src_origin: &CLVec<usize>,
362 src_row_pitch: usize,
363 src_slice_pitch: usize,
364 dst_origin: &CLVec<usize>,
365 dst_row_pitch: usize,
366 dst_slice_pitch: usize,
367 pixel_size: u8,
368 ) {
369 for z in 0..region[2] {
370 for y in 0..region[1] {
371 unsafe {
372 ptr::copy_nonoverlapping(
373 src.add(
374 (*src_origin + [0, y, z])
375 * [pixel_size as usize, src_row_pitch, src_slice_pitch],
376 ),
377 dst.add(
378 (*dst_origin + [0, y, z])
379 * [pixel_size as usize, dst_row_pitch, dst_slice_pitch],
380 ),
381 region[0] * pixel_size as usize,
382 )
383 };
384 }
385 }
386 }
387
388 /// helper function to determine if we can just map the resource in question or if we have to go
389 /// through a shdow buffer to let the CPU access the resources memory
can_map_directly(dev: &Device, res: &PipeResource) -> bool390 fn can_map_directly(dev: &Device, res: &PipeResource) -> bool {
391 // there are two aprts to this check:
392 // 1. is the resource located in system RAM
393 // 2. has the resource a linear memory layout
394 // we do not want to map memory over the PCIe bus as this generally leads to bad performance.
395 (dev.unified_memory() || res.is_staging() || res.is_user)
396 && (res.is_buffer() || res.is_linear())
397 }
398
399 impl MemBase {
new_buffer( context: Arc<Context>, flags: cl_mem_flags, size: usize, host_ptr: *mut c_void, props: Vec<cl_mem_properties>, ) -> CLResult<Arc<Buffer>>400 pub fn new_buffer(
401 context: Arc<Context>,
402 flags: cl_mem_flags,
403 size: usize,
404 host_ptr: *mut c_void,
405 props: Vec<cl_mem_properties>,
406 ) -> CLResult<Arc<Buffer>> {
407 let res_type = if bit_check(flags, CL_MEM_ALLOC_HOST_PTR) {
408 ResourceType::Staging
409 } else {
410 ResourceType::Normal
411 };
412
413 let buffer = context.create_buffer(
414 size,
415 host_ptr,
416 bit_check(flags, CL_MEM_COPY_HOST_PTR),
417 res_type,
418 )?;
419
420 let host_ptr = if bit_check(flags, CL_MEM_USE_HOST_PTR) {
421 host_ptr as usize
422 } else {
423 0
424 };
425
426 Ok(Arc::new(Buffer {
427 base: Self {
428 base: CLObjectBase::new(RusticlTypes::Buffer),
429 context: context,
430 parent: None,
431 mem_type: CL_MEM_OBJECT_BUFFER,
432 flags: flags,
433 size: size,
434 host_ptr: host_ptr,
435 props: props,
436 gl_obj: None,
437 cbs: Mutex::new(Vec::new()),
438 res: Some(buffer),
439 maps: Mappings::new(),
440 },
441 offset: 0,
442 }))
443 }
444
new_sub_buffer( parent: Arc<Buffer>, flags: cl_mem_flags, offset: usize, size: usize, ) -> Arc<Buffer>445 pub fn new_sub_buffer(
446 parent: Arc<Buffer>,
447 flags: cl_mem_flags,
448 offset: usize,
449 size: usize,
450 ) -> Arc<Buffer> {
451 let host_ptr = if parent.host_ptr().is_null() {
452 0
453 } else {
454 unsafe { parent.host_ptr().add(offset) as usize }
455 };
456
457 Arc::new(Buffer {
458 base: Self {
459 base: CLObjectBase::new(RusticlTypes::Buffer),
460 context: parent.context.clone(),
461 parent: Some(Mem::Buffer(parent)),
462 mem_type: CL_MEM_OBJECT_BUFFER,
463 flags: flags,
464 size: size,
465 host_ptr: host_ptr,
466 props: Vec::new(),
467 gl_obj: None,
468 cbs: Mutex::new(Vec::new()),
469 res: None,
470 maps: Mappings::new(),
471 },
472 offset: offset,
473 })
474 }
475
new_image( context: Arc<Context>, parent: Option<Mem>, mem_type: cl_mem_object_type, flags: cl_mem_flags, image_format: &cl_image_format, mut image_desc: cl_image_desc, image_elem_size: u8, host_ptr: *mut c_void, props: Vec<cl_mem_properties>, ) -> CLResult<Arc<Image>>476 pub fn new_image(
477 context: Arc<Context>,
478 parent: Option<Mem>,
479 mem_type: cl_mem_object_type,
480 flags: cl_mem_flags,
481 image_format: &cl_image_format,
482 mut image_desc: cl_image_desc,
483 image_elem_size: u8,
484 host_ptr: *mut c_void,
485 props: Vec<cl_mem_properties>,
486 ) -> CLResult<Arc<Image>> {
487 // we have to sanitize the image_desc a little for internal use
488 let api_image_desc = image_desc;
489 let dims = image_desc.dims();
490 let is_array = image_desc.is_array();
491 if dims < 3 {
492 image_desc.image_depth = 1;
493 }
494 if dims < 2 {
495 image_desc.image_height = 1;
496 }
497 if !is_array {
498 image_desc.image_array_size = 1;
499 }
500
501 let res_type = if bit_check(flags, CL_MEM_ALLOC_HOST_PTR) {
502 ResourceType::Staging
503 } else {
504 ResourceType::Normal
505 };
506
507 let texture = if parent.is_none() {
508 let mut texture = context.create_texture(
509 &image_desc,
510 image_format,
511 host_ptr,
512 bit_check(flags, CL_MEM_COPY_HOST_PTR),
513 res_type,
514 );
515
516 // if we error allocating a Staging resource, just try with normal as
517 // `CL_MEM_ALLOC_HOST_PTR` is just a performance hint.
518 if res_type == ResourceType::Staging && texture.is_err() {
519 texture = context.create_texture(
520 &image_desc,
521 image_format,
522 host_ptr,
523 bit_check(flags, CL_MEM_COPY_HOST_PTR),
524 ResourceType::Normal,
525 )
526 }
527
528 Some(texture?)
529 } else {
530 None
531 };
532
533 let host_ptr = if bit_check(flags, CL_MEM_USE_HOST_PTR) {
534 host_ptr as usize
535 } else {
536 0
537 };
538
539 let pipe_format = image_format.to_pipe_format().unwrap();
540 Ok(Arc::new(Image {
541 base: Self {
542 base: CLObjectBase::new(RusticlTypes::Image),
543 context: context,
544 parent: parent,
545 mem_type: mem_type,
546 flags: flags,
547 size: image_desc.pixels() * image_format.pixel_size().unwrap() as usize,
548 host_ptr: host_ptr,
549 props: props,
550 gl_obj: None,
551 cbs: Mutex::new(Vec::new()),
552 res: texture,
553 maps: Mappings::new(),
554 },
555 image_format: *image_format,
556 pipe_format: pipe_format,
557 image_desc: api_image_desc,
558 image_elem_size: image_elem_size,
559 }))
560 }
561
arc_from_raw(ptr: cl_mem) -> CLResult<Mem>562 pub fn arc_from_raw(ptr: cl_mem) -> CLResult<Mem> {
563 let mem = Self::ref_from_raw(ptr)?;
564 match mem.base.get_type()? {
565 RusticlTypes::Buffer => Ok(Mem::Buffer(Buffer::arc_from_raw(ptr)?)),
566 RusticlTypes::Image => Ok(Mem::Image(Image::arc_from_raw(ptr)?)),
567 _ => Err(CL_INVALID_MEM_OBJECT),
568 }
569 }
570
arcs_from_arr(objs: *const cl_mem, count: u32) -> CLResult<Vec<Mem>>571 pub fn arcs_from_arr(objs: *const cl_mem, count: u32) -> CLResult<Vec<Mem>> {
572 let count = count as usize;
573 let mut res = Vec::with_capacity(count);
574 for i in 0..count {
575 res.push(Self::arc_from_raw(unsafe { *objs.add(i) })?);
576 }
577 Ok(res)
578 }
579
from_gl( context: Arc<Context>, flags: cl_mem_flags, gl_export_manager: &GLExportManager, ) -> CLResult<cl_mem>580 pub fn from_gl(
581 context: Arc<Context>,
582 flags: cl_mem_flags,
583 gl_export_manager: &GLExportManager,
584 ) -> CLResult<cl_mem> {
585 let export_in = &gl_export_manager.export_in;
586 let export_out = &gl_export_manager.export_out;
587
588 let (mem_type, gl_object_type) = target_from_gl(export_in.target)?;
589 let gl_mem_props = gl_export_manager.get_gl_mem_props()?;
590
591 // Handle Buffers
592 let (image_format, pipe_format, rusticl_type) = if gl_export_manager.is_gl_buffer() {
593 (
594 cl_image_format::default(),
595 pipe_format::PIPE_FORMAT_NONE,
596 RusticlTypes::Buffer,
597 )
598 } else {
599 let image_format =
600 format_from_gl(export_out.internal_format).ok_or(CL_OUT_OF_HOST_MEMORY)?;
601 (
602 image_format,
603 image_format.to_pipe_format().unwrap(),
604 RusticlTypes::Image,
605 )
606 };
607
608 let imported_gl_tex = context.import_gl_buffer(
609 export_out.dmabuf_fd as u32,
610 export_out.modifier,
611 mem_type,
612 export_in.target,
613 pipe_format,
614 gl_mem_props.clone(),
615 )?;
616
617 // Cube maps faces are not linear in memory, so copy all contents
618 // of desired face into a 2D image and copy it back after gl release.
619 let (shadow_map, texture) = if is_cube_map_face(export_in.target) {
620 let shadow = create_shadow_slice(&imported_gl_tex, image_format)?;
621
622 let mut res_map = HashMap::new();
623 shadow
624 .iter()
625 .map(|(k, v)| {
626 let gl_res = imported_gl_tex.get(k).unwrap().clone();
627 res_map.insert(v.clone(), gl_res);
628 })
629 .for_each(drop);
630
631 (Some(res_map), shadow)
632 } else {
633 (None, imported_gl_tex)
634 };
635
636 // it's kinda not supported, but we want to know if anything actually hits this as it's
637 // certainly not tested by the CL CTS.
638 if mem_type != CL_MEM_OBJECT_BUFFER {
639 assert_eq!(gl_mem_props.offset, 0);
640 }
641
642 let base = Self {
643 base: CLObjectBase::new(rusticl_type),
644 context: context,
645 parent: None,
646 mem_type: mem_type,
647 flags: flags,
648 size: gl_mem_props.size(),
649 host_ptr: 0,
650 props: Vec::new(),
651 gl_obj: Some(GLObject {
652 gl_object_target: gl_export_manager.export_in.target,
653 gl_object_type: gl_object_type,
654 gl_object_name: export_in.obj,
655 shadow_map: shadow_map,
656 }),
657 cbs: Mutex::new(Vec::new()),
658 res: Some(texture),
659 maps: Mappings::new(),
660 };
661
662 Ok(if rusticl_type == RusticlTypes::Buffer {
663 Arc::new(Buffer {
664 base: base,
665 offset: gl_mem_props.offset as usize,
666 })
667 .into_cl()
668 } else {
669 Arc::new(Image {
670 base: base,
671 image_format: image_format,
672 pipe_format: pipe_format,
673 image_desc: cl_image_desc {
674 image_type: mem_type,
675 image_width: gl_mem_props.width as usize,
676 image_height: gl_mem_props.height as usize,
677 image_depth: gl_mem_props.depth as usize,
678 image_array_size: gl_mem_props.array_size as usize,
679 image_row_pitch: 0,
680 image_slice_pitch: 0,
681 num_mip_levels: 1,
682 num_samples: 1,
683 ..Default::default()
684 },
685 image_elem_size: gl_mem_props.pixel_size,
686 })
687 .into_cl()
688 })
689 }
690
is_buffer(&self) -> bool691 pub fn is_buffer(&self) -> bool {
692 self.mem_type == CL_MEM_OBJECT_BUFFER
693 }
694
has_same_parent(&self, other: &Self) -> bool695 pub fn has_same_parent(&self, other: &Self) -> bool {
696 ptr::eq(self.get_parent(), other.get_parent())
697 }
698
699 // this is kinda bogus, because that won't work with system SVM, but the spec wants us to
700 // implement this.
is_svm(&self) -> bool701 pub fn is_svm(&self) -> bool {
702 let mem = self.get_parent();
703 self.context.find_svm_alloc(mem.host_ptr).is_some()
704 && bit_check(mem.flags, CL_MEM_USE_HOST_PTR)
705 }
706
get_res_of_dev(&self, dev: &Device) -> CLResult<&Arc<PipeResource>>707 pub fn get_res_of_dev(&self, dev: &Device) -> CLResult<&Arc<PipeResource>> {
708 self.get_parent()
709 .res
710 .as_ref()
711 .and_then(|resources| resources.get(dev))
712 .ok_or(CL_OUT_OF_HOST_MEMORY)
713 }
714
get_parent(&self) -> &Self715 fn get_parent(&self) -> &Self {
716 if let Some(parent) = &self.parent {
717 parent
718 } else {
719 self
720 }
721 }
722
has_user_shadow_buffer(&self, d: &Device) -> CLResult<bool>723 fn has_user_shadow_buffer(&self, d: &Device) -> CLResult<bool> {
724 let r = self.get_res_of_dev(d)?;
725 Ok(!r.is_user && bit_check(self.flags, CL_MEM_USE_HOST_PTR))
726 }
727
host_ptr(&self) -> *mut c_void728 pub fn host_ptr(&self) -> *mut c_void {
729 self.host_ptr as *mut c_void
730 }
731
is_mapped_ptr(&self, ptr: *mut c_void) -> bool732 pub fn is_mapped_ptr(&self, ptr: *mut c_void) -> bool {
733 self.maps.lock().unwrap().contains_ptr(ptr)
734 }
735 }
736
737 impl Drop for MemBase {
drop(&mut self)738 fn drop(&mut self) {
739 let cbs = mem::take(self.cbs.get_mut().unwrap());
740 for cb in cbs.into_iter().rev() {
741 cb.call(self);
742 }
743
744 for (d, tx) in self.maps.get_mut().unwrap().tx.drain() {
745 d.helper_ctx().unmap(tx.tx);
746 }
747 }
748 }
749
750 impl Buffer {
apply_offset(&self, offset: usize) -> CLResult<usize>751 fn apply_offset(&self, offset: usize) -> CLResult<usize> {
752 self.offset.checked_add(offset).ok_or(CL_OUT_OF_HOST_MEMORY)
753 }
754
copy_rect( &self, dst: &Self, q: &Queue, ctx: &PipeContext, region: &CLVec<usize>, src_origin: &CLVec<usize>, src_row_pitch: usize, src_slice_pitch: usize, dst_origin: &CLVec<usize>, dst_row_pitch: usize, dst_slice_pitch: usize, ) -> CLResult<()>755 pub fn copy_rect(
756 &self,
757 dst: &Self,
758 q: &Queue,
759 ctx: &PipeContext,
760 region: &CLVec<usize>,
761 src_origin: &CLVec<usize>,
762 src_row_pitch: usize,
763 src_slice_pitch: usize,
764 dst_origin: &CLVec<usize>,
765 dst_row_pitch: usize,
766 dst_slice_pitch: usize,
767 ) -> CLResult<()> {
768 let (offset, size) =
769 CLVec::calc_offset_size(src_origin, region, [1, src_row_pitch, src_slice_pitch]);
770 let tx_src = self.tx(q, ctx, offset, size, RWFlags::RD)?;
771
772 let (offset, size) =
773 CLVec::calc_offset_size(dst_origin, region, [1, dst_row_pitch, dst_slice_pitch]);
774 let tx_dst = dst.tx(q, ctx, offset, size, RWFlags::WR)?;
775
776 // TODO check to use hw accelerated paths (e.g. resource_copy_region or blits)
777 sw_copy(
778 tx_src.ptr(),
779 tx_dst.ptr(),
780 region,
781 &CLVec::default(),
782 src_row_pitch,
783 src_slice_pitch,
784 &CLVec::default(),
785 dst_row_pitch,
786 dst_slice_pitch,
787 1,
788 );
789
790 Ok(())
791 }
792
copy_to_buffer( &self, q: &Queue, ctx: &PipeContext, dst: &Buffer, src_offset: usize, dst_offset: usize, size: usize, ) -> CLResult<()>793 pub fn copy_to_buffer(
794 &self,
795 q: &Queue,
796 ctx: &PipeContext,
797 dst: &Buffer,
798 src_offset: usize,
799 dst_offset: usize,
800 size: usize,
801 ) -> CLResult<()> {
802 let src_offset = self.apply_offset(src_offset)?;
803 let dst_offset = dst.apply_offset(dst_offset)?;
804 let src_res = self.get_res_of_dev(q.device)?;
805 let dst_res = dst.get_res_of_dev(q.device)?;
806
807 let bx = create_pipe_box(
808 [src_offset, 0, 0].into(),
809 [size, 1, 1].into(),
810 CL_MEM_OBJECT_BUFFER,
811 )?;
812 let dst_origin: [u32; 3] = [
813 dst_offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
814 0,
815 0,
816 ];
817
818 ctx.resource_copy_region(src_res, dst_res, &dst_origin, &bx);
819 Ok(())
820 }
821
copy_to_image( &self, q: &Queue, ctx: &PipeContext, dst: &Image, src_offset: usize, dst_origin: CLVec<usize>, region: &CLVec<usize>, ) -> CLResult<()>822 pub fn copy_to_image(
823 &self,
824 q: &Queue,
825 ctx: &PipeContext,
826 dst: &Image,
827 src_offset: usize,
828 dst_origin: CLVec<usize>,
829 region: &CLVec<usize>,
830 ) -> CLResult<()> {
831 let src_offset = self.apply_offset(src_offset)?;
832 let bpp = dst.image_format.pixel_size().unwrap().into();
833 let src_pitch = [bpp, bpp * region[0], bpp * region[0] * region[1]];
834 let size = CLVec::calc_size(region, src_pitch);
835 let tx_src = self.tx(q, ctx, src_offset, size, RWFlags::RD)?;
836
837 // If image is created from a buffer, use image's slice and row pitch instead
838 let tx_dst;
839 let dst_pitch;
840 if let Some(Mem::Buffer(buffer)) = &dst.parent {
841 dst_pitch = [
842 bpp,
843 dst.image_desc.row_pitch()? as usize,
844 dst.image_desc.slice_pitch(),
845 ];
846
847 let (offset, size) = CLVec::calc_offset_size(dst_origin, region, dst_pitch);
848 tx_dst = buffer.tx(q, ctx, offset, size, RWFlags::WR)?;
849 } else {
850 tx_dst = dst.tx_image(
851 q,
852 ctx,
853 &create_pipe_box(dst_origin, *region, dst.mem_type)?,
854 RWFlags::WR,
855 )?;
856
857 dst_pitch = [1, tx_dst.row_pitch() as usize, tx_dst.slice_pitch()];
858 }
859
860 // Those pitch values cannot have 0 value in its coordinates
861 debug_assert!(src_pitch[0] != 0 && src_pitch[1] != 0 && src_pitch[2] != 0);
862 debug_assert!(dst_pitch[0] != 0 && dst_pitch[1] != 0 && dst_pitch[2] != 0);
863
864 sw_copy(
865 tx_src.ptr(),
866 tx_dst.ptr(),
867 region,
868 &CLVec::default(),
869 src_pitch[1],
870 src_pitch[2],
871 &CLVec::default(),
872 dst_pitch[1],
873 dst_pitch[2],
874 bpp as u8,
875 );
876 Ok(())
877 }
878
fill( &self, q: &Queue, ctx: &PipeContext, pattern: &[u8], offset: usize, size: usize, ) -> CLResult<()>879 pub fn fill(
880 &self,
881 q: &Queue,
882 ctx: &PipeContext,
883 pattern: &[u8],
884 offset: usize,
885 size: usize,
886 ) -> CLResult<()> {
887 let offset = self.apply_offset(offset)?;
888 let res = self.get_res_of_dev(q.device)?;
889 ctx.clear_buffer(
890 res,
891 pattern,
892 offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
893 size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
894 );
895 Ok(())
896 }
897
map(&self, dev: &'static Device, offset: usize) -> CLResult<MutMemoryPtr>898 pub fn map(&self, dev: &'static Device, offset: usize) -> CLResult<MutMemoryPtr> {
899 let ptr = if self.has_user_shadow_buffer(dev)? {
900 self.host_ptr()
901 } else {
902 let mut lock = self.maps.lock().unwrap();
903
904 if let Entry::Vacant(e) = lock.tx.entry(dev) {
905 let (tx, res) = self.tx_raw_async(dev, RWFlags::RW)?;
906 e.insert(MappingTransfer::new(tx, res));
907 } else {
908 lock.mark_pending(dev);
909 }
910
911 lock.tx.get(dev).unwrap().tx.ptr()
912 };
913
914 let ptr = unsafe { ptr.add(offset) };
915 // SAFETY: it's required that applications do not cause data races
916 Ok(unsafe { MutMemoryPtr::from_ptr(ptr) })
917 }
918
read( &self, q: &Queue, ctx: &PipeContext, offset: usize, ptr: MutMemoryPtr, size: usize, ) -> CLResult<()>919 pub fn read(
920 &self,
921 q: &Queue,
922 ctx: &PipeContext,
923 offset: usize,
924 ptr: MutMemoryPtr,
925 size: usize,
926 ) -> CLResult<()> {
927 let ptr = ptr.as_ptr();
928 let tx = self.tx(q, ctx, offset, size, RWFlags::RD)?;
929
930 unsafe {
931 ptr::copy_nonoverlapping(tx.ptr(), ptr, size);
932 }
933
934 Ok(())
935 }
936
read_rect( &self, dst: MutMemoryPtr, q: &Queue, ctx: &PipeContext, region: &CLVec<usize>, src_origin: &CLVec<usize>, src_row_pitch: usize, src_slice_pitch: usize, dst_origin: &CLVec<usize>, dst_row_pitch: usize, dst_slice_pitch: usize, ) -> CLResult<()>937 pub fn read_rect(
938 &self,
939 dst: MutMemoryPtr,
940 q: &Queue,
941 ctx: &PipeContext,
942 region: &CLVec<usize>,
943 src_origin: &CLVec<usize>,
944 src_row_pitch: usize,
945 src_slice_pitch: usize,
946 dst_origin: &CLVec<usize>,
947 dst_row_pitch: usize,
948 dst_slice_pitch: usize,
949 ) -> CLResult<()> {
950 let dst = dst.as_ptr();
951 let (offset, size) =
952 CLVec::calc_offset_size(src_origin, region, [1, src_row_pitch, src_slice_pitch]);
953 let tx = self.tx(q, ctx, offset, size, RWFlags::RD)?;
954
955 sw_copy(
956 tx.ptr(),
957 dst,
958 region,
959 &CLVec::default(),
960 src_row_pitch,
961 src_slice_pitch,
962 dst_origin,
963 dst_row_pitch,
964 dst_slice_pitch,
965 1,
966 );
967
968 Ok(())
969 }
970
971 // TODO: only sync on map when the memory is not mapped with discard
sync_shadow(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()>972 pub fn sync_shadow(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
973 let ptr = ptr.as_ptr();
974 let mut lock = self.maps.lock().unwrap();
975 if !lock.increase_ref(q.device, ptr) {
976 return Ok(());
977 }
978
979 if self.has_user_shadow_buffer(q.device)? {
980 self.read(
981 q,
982 ctx,
983 0,
984 // SAFETY: it's required that applications do not cause data races
985 unsafe { MutMemoryPtr::from_ptr(self.host_ptr()) },
986 self.size,
987 )
988 } else {
989 if let Some(shadow) = lock.tx.get(&q.device).and_then(|tx| tx.shadow.as_ref()) {
990 let res = self.get_res_of_dev(q.device)?;
991 let bx = create_pipe_box(
992 [self.offset, 0, 0].into(),
993 [self.size, 1, 1].into(),
994 CL_MEM_OBJECT_BUFFER,
995 )?;
996 ctx.resource_copy_region(res, shadow, &[0; 3], &bx);
997 }
998 Ok(())
999 }
1000 }
1001
tx<'a>( &self, q: &Queue, ctx: &'a PipeContext, offset: usize, size: usize, rw: RWFlags, ) -> CLResult<GuardedPipeTransfer<'a>>1002 fn tx<'a>(
1003 &self,
1004 q: &Queue,
1005 ctx: &'a PipeContext,
1006 offset: usize,
1007 size: usize,
1008 rw: RWFlags,
1009 ) -> CLResult<GuardedPipeTransfer<'a>> {
1010 let offset = self.apply_offset(offset)?;
1011 let r = self.get_res_of_dev(q.device)?;
1012
1013 Ok(ctx
1014 .buffer_map(
1015 r,
1016 offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
1017 size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
1018 rw,
1019 ResourceMapType::Normal,
1020 )
1021 .ok_or(CL_OUT_OF_RESOURCES)?
1022 .with_ctx(ctx))
1023 }
1024
tx_raw_async( &self, dev: &Device, rw: RWFlags, ) -> CLResult<(PipeTransfer, Option<PipeResource>)>1025 fn tx_raw_async(
1026 &self,
1027 dev: &Device,
1028 rw: RWFlags,
1029 ) -> CLResult<(PipeTransfer, Option<PipeResource>)> {
1030 let r = self.get_res_of_dev(dev)?;
1031 let offset = self.offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
1032 let size = self.size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
1033 let ctx = dev.helper_ctx();
1034
1035 let tx = if can_map_directly(dev, r) {
1036 ctx.buffer_map_directly(r, offset, size, rw)
1037 } else {
1038 None
1039 };
1040
1041 if let Some(tx) = tx {
1042 Ok((tx, None))
1043 } else {
1044 let shadow = dev
1045 .screen()
1046 .resource_create_buffer(size as u32, ResourceType::Staging, 0)
1047 .ok_or(CL_OUT_OF_RESOURCES)?;
1048 let tx = ctx
1049 .buffer_map_coherent(&shadow, 0, size, rw)
1050 .ok_or(CL_OUT_OF_RESOURCES)?;
1051 Ok((tx, Some(shadow)))
1052 }
1053 }
1054
1055 // TODO: only sync on unmap when the memory is not mapped for writing
unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()>1056 pub fn unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
1057 let ptr = ptr.as_ptr();
1058 let mut lock = self.maps.lock().unwrap();
1059 if !lock.contains_ptr(ptr) {
1060 return Ok(());
1061 }
1062
1063 let (needs_sync, shadow) = lock.decrease_ref(ptr, q.device);
1064 if needs_sync {
1065 if let Some(shadow) = shadow {
1066 let res = self.get_res_of_dev(q.device)?;
1067 let offset = self.offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
1068 let bx = create_pipe_box(
1069 CLVec::default(),
1070 [self.size, 1, 1].into(),
1071 CL_MEM_OBJECT_BUFFER,
1072 )?;
1073
1074 ctx.resource_copy_region(shadow, res, &[offset, 0, 0], &bx);
1075 } else if self.has_user_shadow_buffer(q.device)? {
1076 self.write(
1077 q,
1078 ctx,
1079 0,
1080 // SAFETY: it's required that applications do not cause data races
1081 unsafe { ConstMemoryPtr::from_ptr(self.host_ptr()) },
1082 self.size,
1083 )?;
1084 }
1085 }
1086
1087 lock.clean_up_tx(q.device, ctx);
1088
1089 Ok(())
1090 }
1091
write( &self, q: &Queue, ctx: &PipeContext, offset: usize, ptr: ConstMemoryPtr, size: usize, ) -> CLResult<()>1092 pub fn write(
1093 &self,
1094 q: &Queue,
1095 ctx: &PipeContext,
1096 offset: usize,
1097 ptr: ConstMemoryPtr,
1098 size: usize,
1099 ) -> CLResult<()> {
1100 let ptr = ptr.as_ptr();
1101 let offset = self.apply_offset(offset)?;
1102 let r = self.get_res_of_dev(q.device)?;
1103 ctx.buffer_subdata(
1104 r,
1105 offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
1106 ptr,
1107 size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
1108 );
1109 Ok(())
1110 }
1111
write_rect( &self, src: ConstMemoryPtr, q: &Queue, ctx: &PipeContext, region: &CLVec<usize>, src_origin: &CLVec<usize>, src_row_pitch: usize, src_slice_pitch: usize, dst_origin: &CLVec<usize>, dst_row_pitch: usize, dst_slice_pitch: usize, ) -> CLResult<()>1112 pub fn write_rect(
1113 &self,
1114 src: ConstMemoryPtr,
1115 q: &Queue,
1116 ctx: &PipeContext,
1117 region: &CLVec<usize>,
1118 src_origin: &CLVec<usize>,
1119 src_row_pitch: usize,
1120 src_slice_pitch: usize,
1121 dst_origin: &CLVec<usize>,
1122 dst_row_pitch: usize,
1123 dst_slice_pitch: usize,
1124 ) -> CLResult<()> {
1125 let src = src.as_ptr();
1126 let (offset, size) =
1127 CLVec::calc_offset_size(dst_origin, region, [1, dst_row_pitch, dst_slice_pitch]);
1128 let tx = self.tx(q, ctx, offset, size, RWFlags::WR)?;
1129
1130 sw_copy(
1131 src,
1132 tx.ptr(),
1133 region,
1134 src_origin,
1135 src_row_pitch,
1136 src_slice_pitch,
1137 &CLVec::default(),
1138 dst_row_pitch,
1139 dst_slice_pitch,
1140 1,
1141 );
1142
1143 Ok(())
1144 }
1145 }
1146
1147 impl Image {
copy_to_buffer( &self, q: &Queue, ctx: &PipeContext, dst: &Buffer, src_origin: CLVec<usize>, dst_offset: usize, region: &CLVec<usize>, ) -> CLResult<()>1148 pub fn copy_to_buffer(
1149 &self,
1150 q: &Queue,
1151 ctx: &PipeContext,
1152 dst: &Buffer,
1153 src_origin: CLVec<usize>,
1154 dst_offset: usize,
1155 region: &CLVec<usize>,
1156 ) -> CLResult<()> {
1157 let dst_offset = dst.apply_offset(dst_offset)?;
1158 let bpp = self.image_format.pixel_size().unwrap().into();
1159
1160 let src_pitch;
1161 let tx_src;
1162 if let Some(Mem::Buffer(buffer)) = &self.parent {
1163 src_pitch = [
1164 bpp,
1165 self.image_desc.row_pitch()? as usize,
1166 self.image_desc.slice_pitch(),
1167 ];
1168 let (offset, size) = CLVec::calc_offset_size(src_origin, region, src_pitch);
1169 tx_src = buffer.tx(q, ctx, offset, size, RWFlags::RD)?;
1170 } else {
1171 tx_src = self.tx_image(
1172 q,
1173 ctx,
1174 &create_pipe_box(src_origin, *region, self.mem_type)?,
1175 RWFlags::RD,
1176 )?;
1177 src_pitch = [1, tx_src.row_pitch() as usize, tx_src.slice_pitch()];
1178 }
1179
1180 // If image is created from a buffer, use image's slice and row pitch instead
1181 let dst_pitch = [bpp, bpp * region[0], bpp * region[0] * region[1]];
1182
1183 let dst_origin: CLVec<usize> = [dst_offset, 0, 0].into();
1184 let (offset, size) = CLVec::calc_offset_size(dst_origin, region, dst_pitch);
1185 let tx_dst = dst.tx(q, ctx, offset, size, RWFlags::WR)?;
1186
1187 // Those pitch values cannot have 0 value in its coordinates
1188 debug_assert!(src_pitch[0] != 0 && src_pitch[1] != 0 && src_pitch[2] != 0);
1189 debug_assert!(dst_pitch[0] != 0 && dst_pitch[1] != 0 && dst_pitch[2] != 0);
1190
1191 sw_copy(
1192 tx_src.ptr(),
1193 tx_dst.ptr(),
1194 region,
1195 &CLVec::default(),
1196 src_pitch[1],
1197 src_pitch[2],
1198 &CLVec::default(),
1199 dst_pitch[1],
1200 dst_pitch[2],
1201 bpp as u8,
1202 );
1203 Ok(())
1204 }
1205
copy_to_image( &self, q: &Queue, ctx: &PipeContext, dst: &Image, src_origin: CLVec<usize>, dst_origin: CLVec<usize>, region: &CLVec<usize>, ) -> CLResult<()>1206 pub fn copy_to_image(
1207 &self,
1208 q: &Queue,
1209 ctx: &PipeContext,
1210 dst: &Image,
1211 src_origin: CLVec<usize>,
1212 dst_origin: CLVec<usize>,
1213 region: &CLVec<usize>,
1214 ) -> CLResult<()> {
1215 let src_parent = self.get_parent();
1216 let dst_parent = dst.get_parent();
1217 let src_res = src_parent.get_res_of_dev(q.device)?;
1218 let dst_res = dst_parent.get_res_of_dev(q.device)?;
1219
1220 // We just want to use sw_copy if mem objects have different types or if copy can have
1221 // custom strides (image2d from buff/images)
1222 if src_parent.is_buffer() || dst_parent.is_buffer() {
1223 let bpp = self.image_format.pixel_size().unwrap().into();
1224
1225 let tx_src;
1226 let tx_dst;
1227 let dst_pitch;
1228 let src_pitch;
1229 if let Some(Mem::Buffer(buffer)) = &self.parent {
1230 src_pitch = [
1231 bpp,
1232 self.image_desc.row_pitch()? as usize,
1233 self.image_desc.slice_pitch(),
1234 ];
1235
1236 let (offset, size) = CLVec::calc_offset_size(src_origin, region, src_pitch);
1237 tx_src = buffer.tx(q, ctx, offset, size, RWFlags::RD)?;
1238 } else {
1239 tx_src = self.tx_image(
1240 q,
1241 ctx,
1242 &create_pipe_box(src_origin, *region, src_parent.mem_type)?,
1243 RWFlags::RD,
1244 )?;
1245
1246 src_pitch = [1, tx_src.row_pitch() as usize, tx_src.slice_pitch()];
1247 }
1248
1249 if let Some(Mem::Buffer(buffer)) = &dst.parent {
1250 // If image is created from a buffer, use image's slice and row pitch instead
1251 dst_pitch = [
1252 bpp,
1253 dst.image_desc.row_pitch()? as usize,
1254 dst.image_desc.slice_pitch(),
1255 ];
1256
1257 let (offset, size) = CLVec::calc_offset_size(dst_origin, region, dst_pitch);
1258 tx_dst = buffer.tx(q, ctx, offset, size, RWFlags::WR)?;
1259 } else {
1260 tx_dst = dst.tx_image(
1261 q,
1262 ctx,
1263 &create_pipe_box(dst_origin, *region, dst_parent.mem_type)?,
1264 RWFlags::WR,
1265 )?;
1266
1267 dst_pitch = [1, tx_dst.row_pitch() as usize, tx_dst.slice_pitch()];
1268 }
1269
1270 // Those pitch values cannot have 0 value in its coordinates
1271 debug_assert!(src_pitch[0] != 0 && src_pitch[1] != 0 && src_pitch[2] != 0);
1272 debug_assert!(dst_pitch[0] != 0 && dst_pitch[1] != 0 && dst_pitch[2] != 0);
1273
1274 sw_copy(
1275 tx_src.ptr(),
1276 tx_dst.ptr(),
1277 region,
1278 &CLVec::default(),
1279 src_pitch[1],
1280 src_pitch[2],
1281 &CLVec::default(),
1282 dst_pitch[1],
1283 dst_pitch[2],
1284 bpp as u8,
1285 )
1286 } else {
1287 let bx = create_pipe_box(src_origin, *region, src_parent.mem_type)?;
1288 let mut dst_origin: [u32; 3] = dst_origin.try_into()?;
1289
1290 if src_parent.mem_type == CL_MEM_OBJECT_IMAGE1D_ARRAY {
1291 (dst_origin[1], dst_origin[2]) = (dst_origin[2], dst_origin[1]);
1292 }
1293
1294 ctx.resource_copy_region(src_res, dst_res, &dst_origin, &bx);
1295 }
1296 Ok(())
1297 }
1298
fill( &self, q: &Queue, ctx: &PipeContext, pattern: &[u32], origin: &CLVec<usize>, region: &CLVec<usize>, ) -> CLResult<()>1299 pub fn fill(
1300 &self,
1301 q: &Queue,
1302 ctx: &PipeContext,
1303 pattern: &[u32],
1304 origin: &CLVec<usize>,
1305 region: &CLVec<usize>,
1306 ) -> CLResult<()> {
1307 let res = self.get_res_of_dev(q.device)?;
1308
1309 // make sure we allocate multiples of 4 bytes so drivers don't read out of bounds or
1310 // unaligned.
1311 // TODO: use div_ceil once it's available
1312 let pixel_size = align(
1313 self.image_format.pixel_size().unwrap().into(),
1314 size_of::<u32>(),
1315 );
1316 let mut new_pattern: Vec<u32> = vec![0; pixel_size / size_of::<u32>()];
1317
1318 // we don't support CL_DEPTH for now
1319 assert!(pattern.len() == 4);
1320
1321 // SAFETY: pointers have to be valid for read/writes of exactly one pixel of their
1322 // respective format.
1323 // `new_pattern` has the correct size due to the `size` above.
1324 // `pattern` is validated through the CL API and allows undefined behavior if not followed
1325 // by CL API rules. It's expected to be a 4 component array of 32 bit values, except for
1326 // CL_DEPTH where it's just one value.
1327 unsafe {
1328 util_format_pack_rgba(
1329 self.pipe_format,
1330 new_pattern.as_mut_ptr().cast(),
1331 pattern.as_ptr().cast(),
1332 1,
1333 );
1334 }
1335
1336 // If image is created from a buffer, use clear_image_buffer instead
1337 if self.is_parent_buffer() {
1338 let strides = (
1339 self.image_desc.row_pitch()? as usize,
1340 self.image_desc.slice_pitch(),
1341 );
1342 ctx.clear_image_buffer(res, &new_pattern, origin, region, strides, pixel_size);
1343 } else {
1344 let bx = create_pipe_box(*origin, *region, self.mem_type)?;
1345 ctx.clear_texture(res, &new_pattern, &bx);
1346 }
1347
1348 Ok(())
1349 }
1350
is_parent_buffer(&self) -> bool1351 pub fn is_parent_buffer(&self) -> bool {
1352 matches!(self.parent, Some(Mem::Buffer(_)))
1353 }
1354
map( &self, dev: &'static Device, origin: &CLVec<usize>, row_pitch: &mut usize, slice_pitch: &mut usize, ) -> CLResult<*mut c_void>1355 pub fn map(
1356 &self,
1357 dev: &'static Device,
1358 origin: &CLVec<usize>,
1359 row_pitch: &mut usize,
1360 slice_pitch: &mut usize,
1361 ) -> CLResult<*mut c_void> {
1362 // we might have a host_ptr shadow buffer or image created from buffer
1363 let ptr = if self.has_user_shadow_buffer(dev)? {
1364 *row_pitch = self.image_desc.image_row_pitch;
1365 *slice_pitch = self.image_desc.image_slice_pitch;
1366 self.host_ptr()
1367 } else if let Some(Mem::Buffer(buffer)) = &self.parent {
1368 *row_pitch = self.image_desc.image_row_pitch;
1369 *slice_pitch = self.image_desc.image_slice_pitch;
1370 buffer.map(dev, 0)?.as_ptr()
1371 } else {
1372 let mut lock = self.maps.lock().unwrap();
1373
1374 if let Entry::Vacant(e) = lock.tx.entry(dev) {
1375 let bx = self.image_desc.bx()?;
1376 let (tx, res) = self.tx_raw_async(dev, &bx, RWFlags::RW)?;
1377 e.insert(MappingTransfer::new(tx, res));
1378 } else {
1379 lock.mark_pending(dev);
1380 }
1381
1382 let tx = &lock.tx.get(dev).unwrap().tx;
1383
1384 if self.image_desc.dims() > 1 {
1385 *row_pitch = tx.row_pitch() as usize;
1386 }
1387 if self.image_desc.dims() > 2 || self.image_desc.is_array() {
1388 *slice_pitch = tx.slice_pitch();
1389 }
1390
1391 tx.ptr()
1392 };
1393
1394 let ptr = unsafe {
1395 ptr.add(
1396 *origin
1397 * [
1398 self.image_format.pixel_size().unwrap().into(),
1399 *row_pitch,
1400 *slice_pitch,
1401 ],
1402 )
1403 };
1404
1405 Ok(ptr)
1406 }
1407
pipe_image_host_access(&self) -> u161408 pub fn pipe_image_host_access(&self) -> u16 {
1409 // those flags are all mutually exclusive
1410 (if bit_check(self.flags, CL_MEM_HOST_READ_ONLY) {
1411 PIPE_IMAGE_ACCESS_READ
1412 } else if bit_check(self.flags, CL_MEM_HOST_WRITE_ONLY) {
1413 PIPE_IMAGE_ACCESS_WRITE
1414 } else if bit_check(self.flags, CL_MEM_HOST_NO_ACCESS) {
1415 0
1416 } else {
1417 PIPE_IMAGE_ACCESS_READ_WRITE
1418 }) as u16
1419 }
1420
read( &self, dst: MutMemoryPtr, q: &Queue, ctx: &PipeContext, region: &CLVec<usize>, src_origin: &CLVec<usize>, dst_row_pitch: usize, dst_slice_pitch: usize, ) -> CLResult<()>1421 pub fn read(
1422 &self,
1423 dst: MutMemoryPtr,
1424 q: &Queue,
1425 ctx: &PipeContext,
1426 region: &CLVec<usize>,
1427 src_origin: &CLVec<usize>,
1428 dst_row_pitch: usize,
1429 dst_slice_pitch: usize,
1430 ) -> CLResult<()> {
1431 let dst = dst.as_ptr();
1432 let pixel_size = self.image_format.pixel_size().unwrap();
1433
1434 let tx;
1435 let src_row_pitch;
1436 let src_slice_pitch;
1437 if let Some(Mem::Buffer(buffer)) = &self.parent {
1438 src_row_pitch = self.image_desc.image_row_pitch;
1439 src_slice_pitch = self.image_desc.image_slice_pitch;
1440
1441 let (offset, size) = CLVec::calc_offset_size(
1442 src_origin,
1443 region,
1444 [pixel_size.into(), src_row_pitch, src_slice_pitch],
1445 );
1446
1447 tx = buffer.tx(q, ctx, offset, size, RWFlags::RD)?;
1448 } else {
1449 let bx = create_pipe_box(*src_origin, *region, self.mem_type)?;
1450 tx = self.tx_image(q, ctx, &bx, RWFlags::RD)?;
1451 src_row_pitch = tx.row_pitch() as usize;
1452 src_slice_pitch = tx.slice_pitch();
1453 };
1454
1455 sw_copy(
1456 tx.ptr(),
1457 dst,
1458 region,
1459 &CLVec::default(),
1460 src_row_pitch,
1461 src_slice_pitch,
1462 &CLVec::default(),
1463 dst_row_pitch,
1464 dst_slice_pitch,
1465 pixel_size,
1466 );
1467
1468 Ok(())
1469 }
1470
1471 // TODO: only sync on map when the memory is not mapped with discard
sync_shadow(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()>1472 pub fn sync_shadow(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
1473 let ptr = ptr.as_ptr();
1474 let mut lock = self.maps.lock().unwrap();
1475 if !lock.increase_ref(q.device, ptr) {
1476 return Ok(());
1477 }
1478
1479 if self.has_user_shadow_buffer(q.device)? {
1480 self.read(
1481 // SAFETY: it's required that applications do not cause data races
1482 unsafe { MutMemoryPtr::from_ptr(self.host_ptr()) },
1483 q,
1484 ctx,
1485 &self.image_desc.size(),
1486 &CLVec::default(),
1487 self.image_desc.image_row_pitch,
1488 self.image_desc.image_slice_pitch,
1489 )
1490 } else {
1491 if let Some(shadow) = lock.tx.get(q.device).and_then(|tx| tx.shadow.as_ref()) {
1492 let res = self.get_res_of_dev(q.device)?;
1493 let bx = self.image_desc.bx()?;
1494 ctx.resource_copy_region(res, shadow, &[0, 0, 0], &bx);
1495 }
1496 Ok(())
1497 }
1498 }
1499
tx_image<'a>( &self, q: &Queue, ctx: &'a PipeContext, bx: &pipe_box, rw: RWFlags, ) -> CLResult<GuardedPipeTransfer<'a>>1500 fn tx_image<'a>(
1501 &self,
1502 q: &Queue,
1503 ctx: &'a PipeContext,
1504 bx: &pipe_box,
1505 rw: RWFlags,
1506 ) -> CLResult<GuardedPipeTransfer<'a>> {
1507 let r = self.get_res_of_dev(q.device)?;
1508 Ok(ctx
1509 .texture_map(r, bx, rw, ResourceMapType::Normal)
1510 .ok_or(CL_OUT_OF_RESOURCES)?
1511 .with_ctx(ctx))
1512 }
1513
tx_raw_async( &self, dev: &Device, bx: &pipe_box, rw: RWFlags, ) -> CLResult<(PipeTransfer, Option<PipeResource>)>1514 fn tx_raw_async(
1515 &self,
1516 dev: &Device,
1517 bx: &pipe_box,
1518 rw: RWFlags,
1519 ) -> CLResult<(PipeTransfer, Option<PipeResource>)> {
1520 let r = self.get_res_of_dev(dev)?;
1521 let ctx = dev.helper_ctx();
1522
1523 let tx = if can_map_directly(dev, r) {
1524 ctx.texture_map_directly(r, bx, rw)
1525 } else {
1526 None
1527 };
1528
1529 if let Some(tx) = tx {
1530 Ok((tx, None))
1531 } else {
1532 let shadow = dev
1533 .screen()
1534 .resource_create_texture(
1535 r.width(),
1536 r.height(),
1537 r.depth(),
1538 r.array_size(),
1539 cl_mem_type_to_texture_target(self.image_desc.image_type),
1540 self.pipe_format,
1541 ResourceType::Staging,
1542 false,
1543 )
1544 .ok_or(CL_OUT_OF_RESOURCES)?;
1545 let tx = ctx
1546 .texture_map_coherent(&shadow, bx, rw)
1547 .ok_or(CL_OUT_OF_RESOURCES)?;
1548 Ok((tx, Some(shadow)))
1549 }
1550 }
1551
1552 // TODO: only sync on unmap when the memory is not mapped for writing
unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()>1553 pub fn unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
1554 let ptr = ptr.as_ptr();
1555 let mut lock = self.maps.lock().unwrap();
1556 if !lock.contains_ptr(ptr) {
1557 return Ok(());
1558 }
1559
1560 let (needs_sync, shadow) = lock.decrease_ref(ptr, q.device);
1561 if needs_sync {
1562 if let Some(shadow) = shadow {
1563 let res = self.get_res_of_dev(q.device)?;
1564 let bx = self.image_desc.bx()?;
1565 ctx.resource_copy_region(shadow, res, &[0, 0, 0], &bx);
1566 } else if self.has_user_shadow_buffer(q.device)? {
1567 self.write(
1568 // SAFETY: it's required that applications do not cause data races
1569 unsafe { ConstMemoryPtr::from_ptr(self.host_ptr()) },
1570 q,
1571 ctx,
1572 &self.image_desc.size(),
1573 self.image_desc.image_row_pitch,
1574 self.image_desc.image_slice_pitch,
1575 &CLVec::default(),
1576 )?;
1577 }
1578 }
1579
1580 lock.clean_up_tx(q.device, ctx);
1581
1582 Ok(())
1583 }
1584
write( &self, src: ConstMemoryPtr, q: &Queue, ctx: &PipeContext, region: &CLVec<usize>, src_row_pitch: usize, mut src_slice_pitch: usize, dst_origin: &CLVec<usize>, ) -> CLResult<()>1585 pub fn write(
1586 &self,
1587 src: ConstMemoryPtr,
1588 q: &Queue,
1589 ctx: &PipeContext,
1590 region: &CLVec<usize>,
1591 src_row_pitch: usize,
1592 mut src_slice_pitch: usize,
1593 dst_origin: &CLVec<usize>,
1594 ) -> CLResult<()> {
1595 let src = src.as_ptr();
1596 let dst_row_pitch = self.image_desc.image_row_pitch;
1597 let dst_slice_pitch = self.image_desc.image_slice_pitch;
1598
1599 if let Some(Mem::Buffer(buffer)) = &self.parent {
1600 let pixel_size = self.image_format.pixel_size().unwrap();
1601 let (offset, size) = CLVec::calc_offset_size(
1602 dst_origin,
1603 region,
1604 [pixel_size.into(), dst_row_pitch, dst_slice_pitch],
1605 );
1606 let tx = buffer.tx(q, ctx, offset, size, RWFlags::WR)?;
1607
1608 sw_copy(
1609 src,
1610 tx.ptr(),
1611 region,
1612 &CLVec::default(),
1613 src_row_pitch,
1614 src_slice_pitch,
1615 &CLVec::default(),
1616 dst_row_pitch,
1617 dst_slice_pitch,
1618 pixel_size,
1619 );
1620 } else {
1621 let res = self.get_res_of_dev(q.device)?;
1622 let bx = create_pipe_box(*dst_origin, *region, self.mem_type)?;
1623
1624 if self.mem_type == CL_MEM_OBJECT_IMAGE1D_ARRAY {
1625 src_slice_pitch = src_row_pitch;
1626 }
1627
1628 ctx.texture_subdata(
1629 res,
1630 &bx,
1631 src,
1632 src_row_pitch
1633 .try_into()
1634 .map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
1635 src_slice_pitch,
1636 );
1637 }
1638 Ok(())
1639 }
1640 }
1641
1642 pub struct Sampler {
1643 pub base: CLObjectBase<CL_INVALID_SAMPLER>,
1644 pub context: Arc<Context>,
1645 pub normalized_coords: bool,
1646 pub addressing_mode: cl_addressing_mode,
1647 pub filter_mode: cl_filter_mode,
1648 pub props: Option<Properties<cl_sampler_properties>>,
1649 }
1650
1651 impl_cl_type_trait!(cl_sampler, Sampler, CL_INVALID_SAMPLER);
1652
1653 impl Sampler {
new( context: Arc<Context>, normalized_coords: bool, addressing_mode: cl_addressing_mode, filter_mode: cl_filter_mode, props: Option<Properties<cl_sampler_properties>>, ) -> Arc<Sampler>1654 pub fn new(
1655 context: Arc<Context>,
1656 normalized_coords: bool,
1657 addressing_mode: cl_addressing_mode,
1658 filter_mode: cl_filter_mode,
1659 props: Option<Properties<cl_sampler_properties>>,
1660 ) -> Arc<Sampler> {
1661 Arc::new(Self {
1662 base: CLObjectBase::new(RusticlTypes::Sampler),
1663 context: context,
1664 normalized_coords: normalized_coords,
1665 addressing_mode: addressing_mode,
1666 filter_mode: filter_mode,
1667 props: props,
1668 })
1669 }
1670
nir_to_cl( addressing_mode: u32, filter_mode: u32, normalized_coords: u32, ) -> (cl_addressing_mode, cl_filter_mode, bool)1671 pub fn nir_to_cl(
1672 addressing_mode: u32,
1673 filter_mode: u32,
1674 normalized_coords: u32,
1675 ) -> (cl_addressing_mode, cl_filter_mode, bool) {
1676 let addr_mode = match addressing_mode {
1677 cl_sampler_addressing_mode::SAMPLER_ADDRESSING_MODE_NONE => CL_ADDRESS_NONE,
1678 cl_sampler_addressing_mode::SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE => {
1679 CL_ADDRESS_CLAMP_TO_EDGE
1680 }
1681 cl_sampler_addressing_mode::SAMPLER_ADDRESSING_MODE_CLAMP => CL_ADDRESS_CLAMP,
1682 cl_sampler_addressing_mode::SAMPLER_ADDRESSING_MODE_REPEAT => CL_ADDRESS_REPEAT,
1683 cl_sampler_addressing_mode::SAMPLER_ADDRESSING_MODE_REPEAT_MIRRORED => {
1684 CL_ADDRESS_MIRRORED_REPEAT
1685 }
1686 _ => panic!("unknown addressing_mode"),
1687 };
1688
1689 let filter = match filter_mode {
1690 cl_sampler_filter_mode::SAMPLER_FILTER_MODE_NEAREST => CL_FILTER_NEAREST,
1691 cl_sampler_filter_mode::SAMPLER_FILTER_MODE_LINEAR => CL_FILTER_LINEAR,
1692 _ => panic!("unknown filter_mode"),
1693 };
1694
1695 (addr_mode, filter, normalized_coords != 0)
1696 }
1697
cl_to_pipe( (addressing_mode, filter_mode, normalized_coords): ( cl_addressing_mode, cl_filter_mode, bool, ), ) -> pipe_sampler_state1698 pub fn cl_to_pipe(
1699 (addressing_mode, filter_mode, normalized_coords): (
1700 cl_addressing_mode,
1701 cl_filter_mode,
1702 bool,
1703 ),
1704 ) -> pipe_sampler_state {
1705 let mut res = pipe_sampler_state::default();
1706
1707 let wrap = match addressing_mode {
1708 CL_ADDRESS_CLAMP_TO_EDGE => pipe_tex_wrap::PIPE_TEX_WRAP_CLAMP_TO_EDGE,
1709 CL_ADDRESS_CLAMP => pipe_tex_wrap::PIPE_TEX_WRAP_CLAMP_TO_BORDER,
1710 CL_ADDRESS_REPEAT => pipe_tex_wrap::PIPE_TEX_WRAP_REPEAT,
1711 CL_ADDRESS_MIRRORED_REPEAT => pipe_tex_wrap::PIPE_TEX_WRAP_MIRROR_REPEAT,
1712 // TODO: what's a reasonable default?
1713 _ => pipe_tex_wrap::PIPE_TEX_WRAP_CLAMP_TO_EDGE,
1714 };
1715
1716 let img_filter = match filter_mode {
1717 CL_FILTER_NEAREST => pipe_tex_filter::PIPE_TEX_FILTER_NEAREST,
1718 CL_FILTER_LINEAR => pipe_tex_filter::PIPE_TEX_FILTER_LINEAR,
1719 _ => panic!("unknown filter_mode"),
1720 };
1721
1722 res.set_min_img_filter(img_filter);
1723 res.set_mag_img_filter(img_filter);
1724 res.set_unnormalized_coords((!normalized_coords).into());
1725 res.set_wrap_r(wrap);
1726 res.set_wrap_s(wrap);
1727 res.set_wrap_t(wrap);
1728
1729 res
1730 }
1731
pipe(&self) -> pipe_sampler_state1732 pub fn pipe(&self) -> pipe_sampler_state {
1733 Self::cl_to_pipe((
1734 self.addressing_mode,
1735 self.filter_mode,
1736 self.normalized_coords,
1737 ))
1738 }
1739 }
1740